Remove code repetition in the doc ID bitmap code.
[notmuch] / lib / query.cc
1 /* query.cc - Support for searching a notmuch database
2  *
3  * Copyright © 2009 Carl Worth
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see http://www.gnu.org/licenses/ .
17  *
18  * Author: Carl Worth <cworth@cworth.org>
19  */
20
21 #include "notmuch-private.h"
22 #include "database-private.h"
23
24 #include <glib.h> /* GHashTable, GPtrArray */
25
26 struct _notmuch_query {
27     notmuch_database_t *notmuch;
28     const char *query_string;
29     notmuch_sort_t sort;
30 };
31
32 typedef struct _notmuch_mset_messages {
33     notmuch_messages_t base;
34     notmuch_database_t *notmuch;
35     Xapian::MSetIterator iterator;
36     Xapian::MSetIterator iterator_end;
37 } notmuch_mset_messages_t;
38
39 struct _notmuch_doc_id_set {
40     unsigned int *bitmap;
41     unsigned int bound;
42 };
43
44 #define DOCIDSET_WORD(bit) ((bit) / sizeof (unsigned int))
45 #define DOCIDSET_BIT(bit) ((bit) % sizeof (unsigned int))
46
47 struct _notmuch_threads {
48     notmuch_query_t *query;
49
50     /* The ordered list of doc ids matched by the query. */
51     GArray *doc_ids;
52     /* Our iterator's current position in doc_ids. */
53     unsigned int doc_id_pos;
54     /* The set of matched docid's that have not been assigned to a
55      * thread. Initially, this contains every docid in doc_ids. */
56     notmuch_doc_id_set_t match_set;
57 };
58
59 notmuch_query_t *
60 notmuch_query_create (notmuch_database_t *notmuch,
61                       const char *query_string)
62 {
63     notmuch_query_t *query;
64
65 #ifdef DEBUG_QUERY
66     fprintf (stderr, "Query string is:\n%s\n", query_string);
67 #endif
68
69     query = talloc (NULL, notmuch_query_t);
70     if (unlikely (query == NULL))
71         return NULL;
72
73     query->notmuch = notmuch;
74
75     query->query_string = talloc_strdup (query, query_string);
76
77     query->sort = NOTMUCH_SORT_NEWEST_FIRST;
78
79     return query;
80 }
81
82 const char *
83 notmuch_query_get_query_string (notmuch_query_t *query)
84 {
85     return query->query_string;
86 }
87
88 void
89 notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
90 {
91     query->sort = sort;
92 }
93
94 notmuch_sort_t
95 notmuch_query_get_sort (notmuch_query_t *query)
96 {
97     return query->sort;
98 }
99
100 /* We end up having to call the destructors explicitly because we had
101  * to use "placement new" in order to initialize C++ objects within a
102  * block that we allocated with talloc. So C++ is making talloc
103  * slightly less simple to use, (we wouldn't need
104  * talloc_set_destructor at all otherwise).
105  */
106 static int
107 _notmuch_messages_destructor (notmuch_mset_messages_t *messages)
108 {
109     messages->iterator.~MSetIterator ();
110     messages->iterator_end.~MSetIterator ();
111
112     return 0;
113 }
114
115 notmuch_messages_t *
116 notmuch_query_search_messages (notmuch_query_t *query)
117 {
118     notmuch_database_t *notmuch = query->notmuch;
119     const char *query_string = query->query_string;
120     notmuch_mset_messages_t *messages;
121
122     messages = talloc (query, notmuch_mset_messages_t);
123     if (unlikely (messages == NULL))
124         return NULL;
125
126     try {
127
128         messages->base.is_of_list_type = FALSE;
129         messages->base.iterator = NULL;
130         messages->notmuch = notmuch;
131         new (&messages->iterator) Xapian::MSetIterator ();
132         new (&messages->iterator_end) Xapian::MSetIterator ();
133
134         talloc_set_destructor (messages, _notmuch_messages_destructor);
135
136         Xapian::Enquire enquire (*notmuch->xapian_db);
137         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
138                                                    _find_prefix ("type"),
139                                                    "mail"));
140         Xapian::Query string_query, final_query;
141         Xapian::MSet mset;
142         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
143                               Xapian::QueryParser::FLAG_PHRASE |
144                               Xapian::QueryParser::FLAG_LOVEHATE |
145                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
146                               Xapian::QueryParser::FLAG_WILDCARD |
147                               Xapian::QueryParser::FLAG_PURE_NOT);
148
149         if (strcmp (query_string, "") == 0 ||
150             strcmp (query_string, "*") == 0)
151         {
152             final_query = mail_query;
153         } else {
154             string_query = notmuch->query_parser->
155                 parse_query (query_string, flags);
156             final_query = Xapian::Query (Xapian::Query::OP_AND,
157                                          mail_query, string_query);
158         }
159
160         enquire.set_weighting_scheme (Xapian::BoolWeight());
161
162         switch (query->sort) {
163         case NOTMUCH_SORT_OLDEST_FIRST:
164             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE);
165             break;
166         case NOTMUCH_SORT_NEWEST_FIRST:
167             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, TRUE);
168             break;
169         case NOTMUCH_SORT_MESSAGE_ID:
170             enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, FALSE);
171             break;
172         case NOTMUCH_SORT_UNSORTED:
173             break;
174         }
175
176 #if DEBUG_QUERY
177         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
178 #endif
179
180         enquire.set_query (final_query);
181
182         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
183
184         messages->iterator = mset.begin ();
185         messages->iterator_end = mset.end ();
186
187         return &messages->base;
188
189     } catch (const Xapian::Error &error) {
190         fprintf (stderr, "A Xapian exception occurred performing query: %s\n",
191                  error.get_msg().c_str());
192         fprintf (stderr, "Query string was: %s\n", query->query_string);
193         notmuch->exception_reported = TRUE;
194         talloc_free (messages);
195         return NULL;
196     }
197 }
198
199 notmuch_bool_t
200 _notmuch_mset_messages_valid (notmuch_messages_t *messages)
201 {
202     notmuch_mset_messages_t *mset_messages;
203
204     mset_messages = (notmuch_mset_messages_t *) messages;
205
206     return (mset_messages->iterator != mset_messages->iterator_end);
207 }
208
209 static Xapian::docid
210 _notmuch_mset_messages_get_doc_id (notmuch_messages_t *messages)
211 {
212     notmuch_mset_messages_t *mset_messages;
213
214     mset_messages = (notmuch_mset_messages_t *) messages;
215
216     if (! _notmuch_mset_messages_valid (&mset_messages->base))
217         return 0;
218
219     return *mset_messages->iterator;
220 }
221
222 notmuch_message_t *
223 _notmuch_mset_messages_get (notmuch_messages_t *messages)
224 {
225     notmuch_message_t *message;
226     Xapian::docid doc_id;
227     notmuch_private_status_t status;
228     notmuch_mset_messages_t *mset_messages;
229
230     mset_messages = (notmuch_mset_messages_t *) messages;
231
232     if (! _notmuch_mset_messages_valid (&mset_messages->base))
233         return NULL;
234
235     doc_id = *mset_messages->iterator;
236
237     message = _notmuch_message_create (mset_messages,
238                                        mset_messages->notmuch, doc_id,
239                                        &status);
240
241     if (message == NULL &&
242        status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
243     {
244         INTERNAL_ERROR ("a messages iterator contains a non-existent document ID.\n");
245     }
246
247     return message;
248 }
249
250 void
251 _notmuch_mset_messages_move_to_next (notmuch_messages_t *messages)
252 {
253     notmuch_mset_messages_t *mset_messages;
254
255     mset_messages = (notmuch_mset_messages_t *) messages;
256
257     mset_messages->iterator++;
258 }
259
260 static notmuch_bool_t
261 _notmuch_doc_id_set_init (void *ctx,
262                           notmuch_doc_id_set_t *doc_ids,
263                           GArray *arr, unsigned int bound)
264 {
265     size_t count = (bound + sizeof (doc_ids->bitmap[0]) - 1) /
266         sizeof (doc_ids->bitmap[0]);
267     unsigned int *bitmap = talloc_zero_array (ctx, unsigned int, count);
268
269     if (bitmap == NULL)
270         return FALSE;
271
272     doc_ids->bitmap = bitmap;
273     doc_ids->bound = bound;
274
275     for (unsigned int i = 0; i < arr->len; i++) {
276         unsigned int doc_id = g_array_index (arr, unsigned int, i);
277         bitmap[DOCIDSET_WORD(doc_id)] |= 1 << DOCIDSET_BIT(doc_id);
278     }
279
280     return TRUE;
281 }
282
283 notmuch_bool_t
284 _notmuch_doc_id_set_contains (notmuch_doc_id_set_t *doc_ids,
285                               unsigned int doc_id)
286 {
287     if (doc_id >= doc_ids->bound)
288         return FALSE;
289     return doc_ids->bitmap[DOCIDSET_WORD(doc_id)] & (1 << DOCIDSET_BIT(doc_id));
290 }
291
292 void
293 _notmuch_doc_id_set_remove (notmuch_doc_id_set_t *doc_ids,
294                             unsigned int doc_id)
295 {
296     if (doc_id < doc_ids->bound)
297         doc_ids->bitmap[DOCIDSET_WORD(doc_id)] &= ~(1 << DOCIDSET_BIT(doc_id));
298 }
299
300 /* Glib objects force use to use a talloc destructor as well, (but not
301  * nearly as ugly as the for messages due to C++ objects). At
302  * this point, I'd really like to have some talloc-friendly
303  * equivalents for the few pieces of glib that I'm using. */
304 static int
305 _notmuch_threads_destructor (notmuch_threads_t *threads)
306 {
307     if (threads->doc_ids)
308         g_array_unref (threads->doc_ids);
309
310     return 0;
311 }
312
313 notmuch_threads_t *
314 notmuch_query_search_threads (notmuch_query_t *query)
315 {
316     notmuch_threads_t *threads;
317     notmuch_messages_t *messages;
318     Xapian::docid max_doc_id = 0;
319
320     threads = talloc (query, notmuch_threads_t);
321     if (threads == NULL)
322         return NULL;
323     threads->doc_ids = NULL;
324     talloc_set_destructor (threads, _notmuch_threads_destructor);
325
326     threads->query = query;
327
328     messages = notmuch_query_search_messages (query);
329     if (messages == NULL) {
330             talloc_free (threads);
331             return NULL;
332     }
333
334     threads->doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
335     while (notmuch_messages_valid (messages)) {
336         unsigned int doc_id = _notmuch_mset_messages_get_doc_id (messages);
337         g_array_append_val (threads->doc_ids, doc_id);
338         max_doc_id = MAX (max_doc_id, doc_id);
339         notmuch_messages_move_to_next (messages);
340     }
341     threads->doc_id_pos = 0;
342
343     talloc_free (messages);
344
345     if (! _notmuch_doc_id_set_init (threads, &threads->match_set,
346                                     threads->doc_ids, max_doc_id + 1)) {
347         talloc_free (threads);
348         return NULL;
349     }
350
351     return threads;
352 }
353
354 void
355 notmuch_query_destroy (notmuch_query_t *query)
356 {
357     talloc_free (query);
358 }
359
360 notmuch_bool_t
361 notmuch_threads_valid (notmuch_threads_t *threads)
362 {
363     unsigned int doc_id;
364
365     while (threads->doc_id_pos < threads->doc_ids->len) {
366         doc_id = g_array_index (threads->doc_ids, unsigned int,
367                                 threads->doc_id_pos);
368         if (_notmuch_doc_id_set_contains (&threads->match_set, doc_id))
369             break;
370
371         threads->doc_id_pos++;
372     }
373
374     return threads->doc_id_pos < threads->doc_ids->len;
375 }
376
377 notmuch_thread_t *
378 notmuch_threads_get (notmuch_threads_t *threads)
379 {
380     unsigned int doc_id;
381
382     if (! notmuch_threads_valid (threads))
383         return NULL;
384
385     doc_id = g_array_index (threads->doc_ids, unsigned int,
386                             threads->doc_id_pos);
387     return _notmuch_thread_create (threads->query,
388                                    threads->query->notmuch,
389                                    doc_id,
390                                    &threads->match_set,
391                                    threads->query->sort);
392 }
393
394 void
395 notmuch_threads_move_to_next (notmuch_threads_t *threads)
396 {
397     threads->doc_id_pos++;
398 }
399
400 void
401 notmuch_threads_destroy (notmuch_threads_t *threads)
402 {
403     talloc_free (threads);
404 }
405
406 unsigned
407 notmuch_query_count_messages (notmuch_query_t *query)
408 {
409     notmuch_database_t *notmuch = query->notmuch;
410     const char *query_string = query->query_string;
411     Xapian::doccount count = 0;
412
413     try {
414         Xapian::Enquire enquire (*notmuch->xapian_db);
415         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
416                                                    _find_prefix ("type"),
417                                                    "mail"));
418         Xapian::Query string_query, final_query;
419         Xapian::MSet mset;
420         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
421                               Xapian::QueryParser::FLAG_PHRASE |
422                               Xapian::QueryParser::FLAG_LOVEHATE |
423                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
424                               Xapian::QueryParser::FLAG_WILDCARD |
425                               Xapian::QueryParser::FLAG_PURE_NOT);
426
427         if (strcmp (query_string, "") == 0 ||
428             strcmp (query_string, "*") == 0)
429         {
430             final_query = mail_query;
431         } else {
432             string_query = notmuch->query_parser->
433                 parse_query (query_string, flags);
434             final_query = Xapian::Query (Xapian::Query::OP_AND,
435                                          mail_query, string_query);
436         }
437
438         enquire.set_weighting_scheme(Xapian::BoolWeight());
439         enquire.set_docid_order(Xapian::Enquire::ASCENDING);
440
441 #if DEBUG_QUERY
442         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
443 #endif
444
445         enquire.set_query (final_query);
446
447         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
448
449         count = mset.get_matches_estimated();
450
451     } catch (const Xapian::Error &error) {
452         fprintf (stderr, "A Xapian exception occurred: %s\n",
453                  error.get_msg().c_str());
454         fprintf (stderr, "Query string was: %s\n", query->query_string);
455     }
456
457     return count;
458 }