ab18fbc6124d1c573741a9ebc7ac2dbb77b65086
[notmuch] / lib / query.cc
1 /* query.cc - Support for searching a notmuch database
2  *
3  * Copyright © 2009 Carl Worth
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see http://www.gnu.org/licenses/ .
17  *
18  * Author: Carl Worth <cworth@cworth.org>
19  */
20
21 #include "notmuch-private.h"
22 #include "database-private.h"
23
24 #include <glib.h> /* GHashTable, GPtrArray */
25
26 struct _notmuch_query {
27     notmuch_database_t *notmuch;
28     const char *query_string;
29     notmuch_sort_t sort;
30     notmuch_string_list_t *exclude_terms;
31     notmuch_bool_t omit_excluded_messages;
32 };
33
34 typedef struct _notmuch_mset_messages {
35     notmuch_messages_t base;
36     notmuch_database_t *notmuch;
37     Xapian::MSetIterator iterator;
38     Xapian::MSetIterator iterator_end;
39 } notmuch_mset_messages_t;
40
41 struct _notmuch_doc_id_set {
42     unsigned int *bitmap;
43     unsigned int bound;
44 };
45
46 #define DOCIDSET_WORD(bit) ((bit) / sizeof (unsigned int))
47 #define DOCIDSET_BIT(bit) ((bit) % sizeof (unsigned int))
48
49 struct visible _notmuch_threads {
50     notmuch_query_t *query;
51
52     /* The ordered list of doc ids matched by the query. */
53     GArray *doc_ids;
54     /* Our iterator's current position in doc_ids. */
55     unsigned int doc_id_pos;
56     /* The set of matched docid's that have not been assigned to a
57      * thread. Initially, this contains every docid in doc_ids. */
58     notmuch_doc_id_set_t match_set;
59 };
60
61 /* We need this in the message functions so forward declare. */
62 static notmuch_bool_t
63 _notmuch_doc_id_set_init (void *ctx,
64                           notmuch_doc_id_set_t *doc_ids,
65                           GArray *arr);
66
67 notmuch_query_t *
68 notmuch_query_create (notmuch_database_t *notmuch,
69                       const char *query_string)
70 {
71     notmuch_query_t *query;
72
73 #ifdef DEBUG_QUERY
74     fprintf (stderr, "Query string is:\n%s\n", query_string);
75 #endif
76
77     query = talloc (NULL, notmuch_query_t);
78     if (unlikely (query == NULL))
79         return NULL;
80
81     query->notmuch = notmuch;
82
83     query->query_string = talloc_strdup (query, query_string);
84
85     query->sort = NOTMUCH_SORT_NEWEST_FIRST;
86
87     query->exclude_terms = _notmuch_string_list_create (query);
88
89     query->omit_excluded_messages = FALSE;
90
91     return query;
92 }
93
94 const char *
95 notmuch_query_get_query_string (notmuch_query_t *query)
96 {
97     return query->query_string;
98 }
99
100 void
101 notmuch_query_set_omit_excluded_messages (notmuch_query_t *query, notmuch_bool_t omit)
102 {
103     query->omit_excluded_messages = omit;
104 }
105
106 void
107 notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
108 {
109     query->sort = sort;
110 }
111
112 notmuch_sort_t
113 notmuch_query_get_sort (notmuch_query_t *query)
114 {
115     return query->sort;
116 }
117
118 void
119 notmuch_query_add_tag_exclude (notmuch_query_t *query, const char *tag)
120 {
121     char *term = talloc_asprintf (query, "%s%s", _find_prefix ("tag"), tag);
122     _notmuch_string_list_append (query->exclude_terms, term);
123 }
124
125 /* We end up having to call the destructors explicitly because we had
126  * to use "placement new" in order to initialize C++ objects within a
127  * block that we allocated with talloc. So C++ is making talloc
128  * slightly less simple to use, (we wouldn't need
129  * talloc_set_destructor at all otherwise).
130  */
131 static int
132 _notmuch_messages_destructor (notmuch_mset_messages_t *messages)
133 {
134     messages->iterator.~MSetIterator ();
135     messages->iterator_end.~MSetIterator ();
136
137     return 0;
138 }
139
140 /* Return a query that matches messages with the excluded tags
141  * registered with query.  Any tags that explicitly appear in xquery
142  * will not be excluded, and will be removed from the list of exclude
143  * tags.  The caller of this function has to combine the returned
144  * query appropriately.*/
145 static Xapian::Query
146 _notmuch_exclude_tags (notmuch_query_t *query, Xapian::Query xquery)
147 {
148     Xapian::Query exclude_query = Xapian::Query::MatchNothing;
149
150     for (notmuch_string_node_t *term = query->exclude_terms->head; term;
151          term = term->next) {
152         Xapian::TermIterator it = xquery.get_terms_begin ();
153         Xapian::TermIterator end = xquery.get_terms_end ();
154         for (; it != end; it++) {
155             if ((*it).compare (term->string) == 0)
156                 break;
157         }
158         if (it == end)
159             exclude_query = Xapian::Query (Xapian::Query::OP_OR,
160                                     exclude_query, Xapian::Query (term->string));
161         else
162             term->string = talloc_strdup (query, "");
163     }
164     return exclude_query;
165 }
166
167 notmuch_messages_t *
168 notmuch_query_search_messages (notmuch_query_t *query)
169 {
170     notmuch_database_t *notmuch = query->notmuch;
171     const char *query_string = query->query_string;
172     notmuch_mset_messages_t *messages;
173
174     messages = talloc (query, notmuch_mset_messages_t);
175     if (unlikely (messages == NULL))
176         return NULL;
177
178     try {
179
180         messages->base.is_of_list_type = FALSE;
181         messages->base.iterator = NULL;
182         messages->notmuch = notmuch;
183         new (&messages->iterator) Xapian::MSetIterator ();
184         new (&messages->iterator_end) Xapian::MSetIterator ();
185
186         talloc_set_destructor (messages, _notmuch_messages_destructor);
187
188         Xapian::Enquire enquire (*notmuch->xapian_db);
189         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
190                                                    _find_prefix ("type"),
191                                                    "mail"));
192         Xapian::Query string_query, final_query, exclude_query;
193         Xapian::MSet mset;
194         Xapian::MSetIterator iterator;
195         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
196                               Xapian::QueryParser::FLAG_PHRASE |
197                               Xapian::QueryParser::FLAG_LOVEHATE |
198                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
199                               Xapian::QueryParser::FLAG_WILDCARD |
200                               Xapian::QueryParser::FLAG_PURE_NOT);
201
202         if (strcmp (query_string, "") == 0 ||
203             strcmp (query_string, "*") == 0)
204         {
205             final_query = mail_query;
206         } else {
207             string_query = notmuch->query_parser->
208                 parse_query (query_string, flags);
209             final_query = Xapian::Query (Xapian::Query::OP_AND,
210                                          mail_query, string_query);
211         }
212         messages->base.excluded_doc_ids = NULL;
213
214         if (query->exclude_terms) {
215             exclude_query = _notmuch_exclude_tags (query, final_query);
216             exclude_query = Xapian::Query (Xapian::Query::OP_AND,
217                                            exclude_query, final_query);
218
219             if (query->omit_excluded_messages)
220                 final_query = Xapian::Query (Xapian::Query::OP_AND_NOT,
221                                              final_query, exclude_query);
222             else {
223                 enquire.set_weighting_scheme (Xapian::BoolWeight());
224                 enquire.set_query (exclude_query);
225
226                 mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
227
228                 GArray *excluded_doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
229
230                 for (iterator = mset.begin (); iterator != mset.end (); iterator++) {
231                     unsigned int doc_id = *iterator;
232                     g_array_append_val (excluded_doc_ids, doc_id);
233                 }
234                 messages->base.excluded_doc_ids = talloc (messages, _notmuch_doc_id_set);
235                 _notmuch_doc_id_set_init (query, messages->base.excluded_doc_ids,
236                                           excluded_doc_ids);
237                 g_array_unref (excluded_doc_ids);
238             }
239         }
240
241
242         enquire.set_weighting_scheme (Xapian::BoolWeight());
243
244         switch (query->sort) {
245         case NOTMUCH_SORT_OLDEST_FIRST:
246             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE);
247             break;
248         case NOTMUCH_SORT_NEWEST_FIRST:
249             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, TRUE);
250             break;
251         case NOTMUCH_SORT_MESSAGE_ID:
252             enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, FALSE);
253             break;
254         case NOTMUCH_SORT_UNSORTED:
255             break;
256         }
257
258 #if DEBUG_QUERY
259         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
260 #endif
261
262         enquire.set_query (final_query);
263
264         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
265
266         messages->iterator = mset.begin ();
267         messages->iterator_end = mset.end ();
268
269         return &messages->base;
270
271     } catch (const Xapian::Error &error) {
272         fprintf (stderr, "A Xapian exception occurred performing query: %s\n",
273                  error.get_msg().c_str());
274         fprintf (stderr, "Query string was: %s\n", query->query_string);
275         notmuch->exception_reported = TRUE;
276         talloc_free (messages);
277         return NULL;
278     }
279 }
280
281 notmuch_bool_t
282 _notmuch_mset_messages_valid (notmuch_messages_t *messages)
283 {
284     notmuch_mset_messages_t *mset_messages;
285
286     mset_messages = (notmuch_mset_messages_t *) messages;
287
288     return (mset_messages->iterator != mset_messages->iterator_end);
289 }
290
291 static Xapian::docid
292 _notmuch_mset_messages_get_doc_id (notmuch_messages_t *messages)
293 {
294     notmuch_mset_messages_t *mset_messages;
295
296     mset_messages = (notmuch_mset_messages_t *) messages;
297
298     if (! _notmuch_mset_messages_valid (&mset_messages->base))
299         return 0;
300
301     return *mset_messages->iterator;
302 }
303
304 notmuch_message_t *
305 _notmuch_mset_messages_get (notmuch_messages_t *messages)
306 {
307     notmuch_message_t *message;
308     Xapian::docid doc_id;
309     notmuch_private_status_t status;
310     notmuch_mset_messages_t *mset_messages;
311
312     mset_messages = (notmuch_mset_messages_t *) messages;
313
314     if (! _notmuch_mset_messages_valid (&mset_messages->base))
315         return NULL;
316
317     doc_id = *mset_messages->iterator;
318
319     message = _notmuch_message_create (mset_messages,
320                                        mset_messages->notmuch, doc_id,
321                                        &status);
322
323     if (message == NULL &&
324        status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
325     {
326         INTERNAL_ERROR ("a messages iterator contains a non-existent document ID.\n");
327     }
328
329     if (messages->excluded_doc_ids &&
330         _notmuch_doc_id_set_contains (messages->excluded_doc_ids, doc_id))
331         notmuch_message_set_flag (message, NOTMUCH_MESSAGE_FLAG_EXCLUDED, TRUE);
332
333     return message;
334 }
335
336 void
337 _notmuch_mset_messages_move_to_next (notmuch_messages_t *messages)
338 {
339     notmuch_mset_messages_t *mset_messages;
340
341     mset_messages = (notmuch_mset_messages_t *) messages;
342
343     mset_messages->iterator++;
344 }
345
346 static notmuch_bool_t
347 _notmuch_doc_id_set_init (void *ctx,
348                           notmuch_doc_id_set_t *doc_ids,
349                           GArray *arr)
350 {
351     unsigned int max = 0;
352     unsigned int *bitmap;
353
354     for (unsigned int i = 0; i < arr->len; i++)
355         max = MAX(max, g_array_index (arr, unsigned int, i));
356     bitmap = talloc_zero_array (ctx, unsigned int, 1 + max / sizeof (*bitmap));
357
358     if (bitmap == NULL)
359         return FALSE;
360
361     doc_ids->bitmap = bitmap;
362     doc_ids->bound = max + 1;
363
364     for (unsigned int i = 0; i < arr->len; i++) {
365         unsigned int doc_id = g_array_index (arr, unsigned int, i);
366         bitmap[DOCIDSET_WORD(doc_id)] |= 1 << DOCIDSET_BIT(doc_id);
367     }
368
369     return TRUE;
370 }
371
372 notmuch_bool_t
373 _notmuch_doc_id_set_contains (notmuch_doc_id_set_t *doc_ids,
374                               unsigned int doc_id)
375 {
376     if (doc_id >= doc_ids->bound)
377         return FALSE;
378     return doc_ids->bitmap[DOCIDSET_WORD(doc_id)] & (1 << DOCIDSET_BIT(doc_id));
379 }
380
381 void
382 _notmuch_doc_id_set_remove (notmuch_doc_id_set_t *doc_ids,
383                             unsigned int doc_id)
384 {
385     if (doc_id < doc_ids->bound)
386         doc_ids->bitmap[DOCIDSET_WORD(doc_id)] &= ~(1 << DOCIDSET_BIT(doc_id));
387 }
388
389 /* Glib objects force use to use a talloc destructor as well, (but not
390  * nearly as ugly as the for messages due to C++ objects). At
391  * this point, I'd really like to have some talloc-friendly
392  * equivalents for the few pieces of glib that I'm using. */
393 static int
394 _notmuch_threads_destructor (notmuch_threads_t *threads)
395 {
396     if (threads->doc_ids)
397         g_array_unref (threads->doc_ids);
398
399     return 0;
400 }
401
402 notmuch_threads_t *
403 notmuch_query_search_threads (notmuch_query_t *query)
404 {
405     notmuch_threads_t *threads;
406     notmuch_messages_t *messages;
407
408     threads = talloc (query, notmuch_threads_t);
409     if (threads == NULL)
410         return NULL;
411     threads->doc_ids = NULL;
412     talloc_set_destructor (threads, _notmuch_threads_destructor);
413
414     threads->query = query;
415
416     messages = notmuch_query_search_messages (query);
417     if (messages == NULL) {
418             talloc_free (threads);
419             return NULL;
420     }
421
422     threads->doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
423     while (notmuch_messages_valid (messages)) {
424         unsigned int doc_id = _notmuch_mset_messages_get_doc_id (messages);
425         g_array_append_val (threads->doc_ids, doc_id);
426         notmuch_messages_move_to_next (messages);
427     }
428     threads->doc_id_pos = 0;
429
430     talloc_free (messages);
431
432     if (! _notmuch_doc_id_set_init (threads, &threads->match_set,
433                                     threads->doc_ids)) {
434         talloc_free (threads);
435         return NULL;
436     }
437
438     return threads;
439 }
440
441 void
442 notmuch_query_destroy (notmuch_query_t *query)
443 {
444     talloc_free (query);
445 }
446
447 notmuch_bool_t
448 notmuch_threads_valid (notmuch_threads_t *threads)
449 {
450     unsigned int doc_id;
451
452     while (threads->doc_id_pos < threads->doc_ids->len) {
453         doc_id = g_array_index (threads->doc_ids, unsigned int,
454                                 threads->doc_id_pos);
455         if (_notmuch_doc_id_set_contains (&threads->match_set, doc_id))
456             break;
457
458         threads->doc_id_pos++;
459     }
460
461     return threads->doc_id_pos < threads->doc_ids->len;
462 }
463
464 notmuch_thread_t *
465 notmuch_threads_get (notmuch_threads_t *threads)
466 {
467     unsigned int doc_id;
468
469     if (! notmuch_threads_valid (threads))
470         return NULL;
471
472     doc_id = g_array_index (threads->doc_ids, unsigned int,
473                             threads->doc_id_pos);
474     return _notmuch_thread_create (threads->query,
475                                    threads->query->notmuch,
476                                    doc_id,
477                                    &threads->match_set,
478                                    threads->query->exclude_terms,
479                                    threads->query->sort);
480 }
481
482 void
483 notmuch_threads_move_to_next (notmuch_threads_t *threads)
484 {
485     threads->doc_id_pos++;
486 }
487
488 void
489 notmuch_threads_destroy (notmuch_threads_t *threads)
490 {
491     talloc_free (threads);
492 }
493
494 unsigned
495 notmuch_query_count_messages (notmuch_query_t *query)
496 {
497     notmuch_database_t *notmuch = query->notmuch;
498     const char *query_string = query->query_string;
499     Xapian::doccount count = 0;
500
501     try {
502         Xapian::Enquire enquire (*notmuch->xapian_db);
503         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
504                                                    _find_prefix ("type"),
505                                                    "mail"));
506         Xapian::Query string_query, final_query, exclude_query;
507         Xapian::MSet mset;
508         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
509                               Xapian::QueryParser::FLAG_PHRASE |
510                               Xapian::QueryParser::FLAG_LOVEHATE |
511                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
512                               Xapian::QueryParser::FLAG_WILDCARD |
513                               Xapian::QueryParser::FLAG_PURE_NOT);
514
515         if (strcmp (query_string, "") == 0 ||
516             strcmp (query_string, "*") == 0)
517         {
518             final_query = mail_query;
519         } else {
520             string_query = notmuch->query_parser->
521                 parse_query (query_string, flags);
522             final_query = Xapian::Query (Xapian::Query::OP_AND,
523                                          mail_query, string_query);
524         }
525
526         exclude_query = _notmuch_exclude_tags (query, final_query);
527
528         final_query = Xapian::Query (Xapian::Query::OP_AND_NOT,
529                                          final_query, exclude_query);
530
531         enquire.set_weighting_scheme(Xapian::BoolWeight());
532         enquire.set_docid_order(Xapian::Enquire::ASCENDING);
533
534 #if DEBUG_QUERY
535         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
536 #endif
537
538         enquire.set_query (final_query);
539
540         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
541
542         count = mset.get_matches_estimated();
543
544     } catch (const Xapian::Error &error) {
545         fprintf (stderr, "A Xapian exception occurred: %s\n",
546                  error.get_msg().c_str());
547         fprintf (stderr, "Query string was: %s\n", query->query_string);
548     }
549
550     return count;
551 }
552
553 unsigned
554 notmuch_query_count_threads (notmuch_query_t *query)
555 {
556     notmuch_messages_t *messages;
557     GHashTable *hash;
558     unsigned int count;
559     notmuch_sort_t sort;
560
561     sort = query->sort;
562     query->sort = NOTMUCH_SORT_UNSORTED;
563     messages = notmuch_query_search_messages (query);
564     query->sort = sort;
565     if (messages == NULL)
566         return 0;
567
568     hash = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, NULL);
569     if (hash == NULL) {
570         talloc_free (messages);
571         return 0;
572     }
573
574     while (notmuch_messages_valid (messages)) {
575         notmuch_message_t *message = notmuch_messages_get (messages);
576         const char *thread_id = notmuch_message_get_thread_id (message);
577         char *thread_id_copy = talloc_strdup (messages, thread_id);
578         if (unlikely (thread_id_copy == NULL)) {
579             notmuch_message_destroy (message);
580             count = 0;
581             goto DONE;
582         }
583         g_hash_table_insert (hash, thread_id_copy, NULL);
584         notmuch_message_destroy (message);
585         notmuch_messages_move_to_next (messages);
586     }
587
588     count = g_hash_table_size (hash);
589
590   DONE:
591     g_hash_table_unref (hash);
592     talloc_free (messages);
593
594     return count;
595 }