]> git.notmuchmail.org Git - notmuch/blobdiff - lib/query.cc
Remove code repetition in the doc ID bitmap code.
[notmuch] / lib / query.cc
index ea521dd822faac03719be787d4bd82f708ca2f77..c155470afd82128b37a512af892c08b86a1df3a2 100644 (file)
 
 #include <glib.h> /* GHashTable, GPtrArray */
 
-#include <xapian.h>
-
 struct _notmuch_query {
     notmuch_database_t *notmuch;
     const char *query_string;
     notmuch_sort_t sort;
 };
 
-struct _notmuch_messages {
+typedef struct _notmuch_mset_messages {
+    notmuch_messages_t base;
     notmuch_database_t *notmuch;
     Xapian::MSetIterator iterator;
     Xapian::MSetIterator iterator_end;
+} notmuch_mset_messages_t;
+
+struct _notmuch_doc_id_set {
+    unsigned int *bitmap;
+    unsigned int bound;
 };
 
+#define DOCIDSET_WORD(bit) ((bit) / sizeof (unsigned int))
+#define DOCIDSET_BIT(bit) ((bit) % sizeof (unsigned int))
+
 struct _notmuch_threads {
-    notmuch_database_t *notmuch;
-    GPtrArray *threads;
-    unsigned int index;
+    notmuch_query_t *query;
+
+    /* The ordered list of doc ids matched by the query. */
+    GArray *doc_ids;
+    /* Our iterator's current position in doc_ids. */
+    unsigned int doc_id_pos;
+    /* The set of matched docid's that have not been assigned to a
+     * thread. Initially, this contains every docid in doc_ids. */
+    notmuch_doc_id_set_t match_set;
 };
 
 notmuch_query_t *
@@ -66,27 +79,60 @@ notmuch_query_create (notmuch_database_t *notmuch,
     return query;
 }
 
+const char *
+notmuch_query_get_query_string (notmuch_query_t *query)
+{
+    return query->query_string;
+}
+
 void
 notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
 {
     query->sort = sort;
 }
 
+notmuch_sort_t
+notmuch_query_get_sort (notmuch_query_t *query)
+{
+    return query->sort;
+}
+
+/* We end up having to call the destructors explicitly because we had
+ * to use "placement new" in order to initialize C++ objects within a
+ * block that we allocated with talloc. So C++ is making talloc
+ * slightly less simple to use, (we wouldn't need
+ * talloc_set_destructor at all otherwise).
+ */
+static int
+_notmuch_messages_destructor (notmuch_mset_messages_t *messages)
+{
+    messages->iterator.~MSetIterator ();
+    messages->iterator_end.~MSetIterator ();
+
+    return 0;
+}
+
 notmuch_messages_t *
-notmuch_query_search_messages (notmuch_query_t *query,
-                              int first,
-                              int max_messages)
+notmuch_query_search_messages (notmuch_query_t *query)
 {
     notmuch_database_t *notmuch = query->notmuch;
     const char *query_string = query->query_string;
-    notmuch_message_list_t *message_list;
-    Xapian::MSetIterator i;
+    notmuch_mset_messages_t *messages;
 
-    message_list = _notmuch_message_list_create (query);
-    if (unlikely (message_list == NULL))
+    messages = talloc (query, notmuch_mset_messages_t);
+    if (unlikely (messages == NULL))
        return NULL;
 
     try {
+
+       messages->base.is_of_list_type = FALSE;
+       messages->base.iterator = NULL;
+       messages->notmuch = notmuch;
+       new (&messages->iterator) Xapian::MSetIterator ();
+       new (&messages->iterator_end) Xapian::MSetIterator ();
+
+       talloc_set_destructor (messages, _notmuch_messages_destructor);
+
        Xapian::Enquire enquire (*notmuch->xapian_db);
        Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
                                                   _find_prefix ("type"),
@@ -100,7 +146,9 @@ notmuch_query_search_messages (notmuch_query_t *query,
                              Xapian::QueryParser::FLAG_WILDCARD |
                              Xapian::QueryParser::FLAG_PURE_NOT);
 
-       if (strcmp (query_string, "") == 0) {
+       if (strcmp (query_string, "") == 0 ||
+           strcmp (query_string, "*") == 0)
+       {
            final_query = mail_query;
        } else {
            string_query = notmuch->query_parser->
@@ -109,6 +157,8 @@ notmuch_query_search_messages (notmuch_query_t *query,
                                         mail_query, string_query);
        }
 
+       enquire.set_weighting_scheme (Xapian::BoolWeight());
+
        switch (query->sort) {
        case NOTMUCH_SORT_OLDEST_FIRST:
            enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE);
@@ -119,6 +169,8 @@ notmuch_query_search_messages (notmuch_query_t *query,
        case NOTMUCH_SORT_MESSAGE_ID:
            enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, FALSE);
            break;
+        case NOTMUCH_SORT_UNSORTED:
+           break;
        }
 
 #if DEBUG_QUERY
@@ -127,34 +179,122 @@ notmuch_query_search_messages (notmuch_query_t *query,
 
        enquire.set_query (final_query);
 
-       if (max_messages == -1)
-           max_messages = notmuch->xapian_db->get_doccount ();
-       mset = enquire.get_mset (first, max_messages);
-
-       for (i = mset.begin (); i != mset.end (); i++) {
-           notmuch_message_t *message;
-           notmuch_private_status_t status;
-
-           message = _notmuch_message_create (message_list, notmuch,
-                                              *i, &status);
-           if (message == NULL)
-           {
-               if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
-                   INTERNAL_ERROR ("A message iterator contains a "
-                                   "non-existent document ID.\n");
-               break;
-           }
-
-           _notmuch_message_list_add_message (message_list, message);
-       }
+       mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+       messages->iterator = mset.begin ();
+       messages->iterator_end = mset.end ();
+
+       return &messages->base;
 
     } catch (const Xapian::Error &error) {
-       fprintf (stderr, "A Xapian exception occurred: %s\n",
+       fprintf (stderr, "A Xapian exception occurred performing query: %s\n",
                 error.get_msg().c_str());
        fprintf (stderr, "Query string was: %s\n", query->query_string);
+       notmuch->exception_reported = TRUE;
+       talloc_free (messages);
+       return NULL;
+    }
+}
+
+notmuch_bool_t
+_notmuch_mset_messages_valid (notmuch_messages_t *messages)
+{
+    notmuch_mset_messages_t *mset_messages;
+
+    mset_messages = (notmuch_mset_messages_t *) messages;
+
+    return (mset_messages->iterator != mset_messages->iterator_end);
+}
+
+static Xapian::docid
+_notmuch_mset_messages_get_doc_id (notmuch_messages_t *messages)
+{
+    notmuch_mset_messages_t *mset_messages;
+
+    mset_messages = (notmuch_mset_messages_t *) messages;
+
+    if (! _notmuch_mset_messages_valid (&mset_messages->base))
+       return 0;
+
+    return *mset_messages->iterator;
+}
+
+notmuch_message_t *
+_notmuch_mset_messages_get (notmuch_messages_t *messages)
+{
+    notmuch_message_t *message;
+    Xapian::docid doc_id;
+    notmuch_private_status_t status;
+    notmuch_mset_messages_t *mset_messages;
+
+    mset_messages = (notmuch_mset_messages_t *) messages;
+
+    if (! _notmuch_mset_messages_valid (&mset_messages->base))
+       return NULL;
+
+    doc_id = *mset_messages->iterator;
+
+    message = _notmuch_message_create (mset_messages,
+                                      mset_messages->notmuch, doc_id,
+                                      &status);
+
+    if (message == NULL &&
+       status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
+    {
+       INTERNAL_ERROR ("a messages iterator contains a non-existent document ID.\n");
     }
 
-    return _notmuch_messages_create (message_list);
+    return message;
+}
+
+void
+_notmuch_mset_messages_move_to_next (notmuch_messages_t *messages)
+{
+    notmuch_mset_messages_t *mset_messages;
+
+    mset_messages = (notmuch_mset_messages_t *) messages;
+
+    mset_messages->iterator++;
+}
+
+static notmuch_bool_t
+_notmuch_doc_id_set_init (void *ctx,
+                         notmuch_doc_id_set_t *doc_ids,
+                         GArray *arr, unsigned int bound)
+{
+    size_t count = (bound + sizeof (doc_ids->bitmap[0]) - 1) /
+       sizeof (doc_ids->bitmap[0]);
+    unsigned int *bitmap = talloc_zero_array (ctx, unsigned int, count);
+
+    if (bitmap == NULL)
+       return FALSE;
+
+    doc_ids->bitmap = bitmap;
+    doc_ids->bound = bound;
+
+    for (unsigned int i = 0; i < arr->len; i++) {
+       unsigned int doc_id = g_array_index (arr, unsigned int, i);
+       bitmap[DOCIDSET_WORD(doc_id)] |= 1 << DOCIDSET_BIT(doc_id);
+    }
+
+    return TRUE;
+}
+
+notmuch_bool_t
+_notmuch_doc_id_set_contains (notmuch_doc_id_set_t *doc_ids,
+                             unsigned int doc_id)
+{
+    if (doc_id >= doc_ids->bound)
+       return FALSE;
+    return doc_ids->bitmap[DOCIDSET_WORD(doc_id)] & (1 << DOCIDSET_BIT(doc_id));
+}
+
+void
+_notmuch_doc_id_set_remove (notmuch_doc_id_set_t *doc_ids,
+                            unsigned int doc_id)
+{
+    if (doc_id < doc_ids->bound)
+       doc_ids->bitmap[DOCIDSET_WORD(doc_id)] &= ~(1 << DOCIDSET_BIT(doc_id));
 }
 
 /* Glib objects force use to use a talloc destructor as well, (but not
@@ -164,84 +304,50 @@ notmuch_query_search_messages (notmuch_query_t *query,
 static int
 _notmuch_threads_destructor (notmuch_threads_t *threads)
 {
-    g_ptr_array_free (threads->threads, TRUE);
+    if (threads->doc_ids)
+       g_array_unref (threads->doc_ids);
 
     return 0;
 }
 
 notmuch_threads_t *
-notmuch_query_search_threads (notmuch_query_t *query,
-                             int first,
-                             int max_threads)
+notmuch_query_search_threads (notmuch_query_t *query)
 {
     notmuch_threads_t *threads;
-    notmuch_thread_t *thread;
-    const char *thread_id;
     notmuch_messages_t *messages;
-    notmuch_message_t *message;
-    GHashTable *seen;
-    int messages_seen = 0, threads_seen = 0;
+    Xapian::docid max_doc_id = 0;
 
     threads = talloc (query, notmuch_threads_t);
     if (threads == NULL)
        return NULL;
-
-    threads->notmuch = query->notmuch;
-    threads->threads = g_ptr_array_new ();
-    threads->index = 0;
-
+    threads->doc_ids = NULL;
     talloc_set_destructor (threads, _notmuch_threads_destructor);
 
-    seen = g_hash_table_new_full (g_str_hash, g_str_equal,
-                                 free, NULL);
-
-    while (max_threads < 0 || threads_seen < first + max_threads)
-    {
-       int messages_seen_previously = messages_seen;
-
-       for (messages = notmuch_query_search_messages (query,
-                                                      messages_seen,
-                                                      max_threads);
-            notmuch_messages_has_more (messages);
-            notmuch_messages_advance (messages))
-       {
-           message = notmuch_messages_get (messages);
-
-           thread_id = notmuch_message_get_thread_id (message);
-
-           if (! g_hash_table_lookup_extended (seen,
-                                               thread_id, NULL,
-                                               (void **) &thread))
-           {
-               if (threads_seen >= first) {
-                   thread = _notmuch_thread_create (query, query->notmuch,
-                                                    thread_id,
-                                                    query->query_string);
-                   g_ptr_array_add (threads->threads, thread);
-               } else {
-                   thread = NULL;
-               }
-
-               g_hash_table_insert (seen, xstrdup (thread_id), thread);
+    threads->query = query;
 
-               threads_seen++;
-           }
-
-           notmuch_message_destroy (message);
+    messages = notmuch_query_search_messages (query);
+    if (messages == NULL) {
+           talloc_free (threads);
+           return NULL;
+    }
 
-           messages_seen++;
+    threads->doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
+    while (notmuch_messages_valid (messages)) {
+       unsigned int doc_id = _notmuch_mset_messages_get_doc_id (messages);
+       g_array_append_val (threads->doc_ids, doc_id);
+       max_doc_id = MAX (max_doc_id, doc_id);
+       notmuch_messages_move_to_next (messages);
+    }
+    threads->doc_id_pos = 0;
 
-           if (max_threads >= 0 && threads_seen >= first + max_threads)
-               break;
-       }
+    talloc_free (messages);
 
-       /* Stop if we're not seeing any more messages. */
-       if (messages_seen == messages_seen_previously)
-           break;
+    if (! _notmuch_doc_id_set_init (threads, &threads->match_set,
+                                   threads->doc_ids, max_doc_id + 1)) {
+       talloc_free (threads);
+       return NULL;
     }
 
-    g_hash_table_unref (seen);
-
     return threads;
 }
 
@@ -252,25 +358,43 @@ notmuch_query_destroy (notmuch_query_t *query)
 }
 
 notmuch_bool_t
-notmuch_threads_has_more (notmuch_threads_t *threads)
+notmuch_threads_valid (notmuch_threads_t *threads)
 {
-    return (threads->index < threads->threads->len);
+    unsigned int doc_id;
+
+    while (threads->doc_id_pos < threads->doc_ids->len) {
+       doc_id = g_array_index (threads->doc_ids, unsigned int,
+                               threads->doc_id_pos);
+       if (_notmuch_doc_id_set_contains (&threads->match_set, doc_id))
+           break;
+
+       threads->doc_id_pos++;
+    }
+
+    return threads->doc_id_pos < threads->doc_ids->len;
 }
 
 notmuch_thread_t *
 notmuch_threads_get (notmuch_threads_t *threads)
 {
-    if (! notmuch_threads_has_more (threads))
+    unsigned int doc_id;
+
+    if (! notmuch_threads_valid (threads))
        return NULL;
 
-    return (notmuch_thread_t *) g_ptr_array_index (threads->threads,
-                                                  threads->index);
+    doc_id = g_array_index (threads->doc_ids, unsigned int,
+                           threads->doc_id_pos);
+    return _notmuch_thread_create (threads->query,
+                                  threads->query->notmuch,
+                                  doc_id,
+                                  &threads->match_set,
+                                  threads->query->sort);
 }
 
 void
-notmuch_threads_advance (notmuch_threads_t *threads)
+notmuch_threads_move_to_next (notmuch_threads_t *threads)
 {
-    threads->index++;
+    threads->doc_id_pos++;
 }
 
 void
@@ -278,3 +402,57 @@ notmuch_threads_destroy (notmuch_threads_t *threads)
 {
     talloc_free (threads);
 }
+
+unsigned
+notmuch_query_count_messages (notmuch_query_t *query)
+{
+    notmuch_database_t *notmuch = query->notmuch;
+    const char *query_string = query->query_string;
+    Xapian::doccount count = 0;
+
+    try {
+       Xapian::Enquire enquire (*notmuch->xapian_db);
+       Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
+                                                  _find_prefix ("type"),
+                                                  "mail"));
+       Xapian::Query string_query, final_query;
+       Xapian::MSet mset;
+       unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
+                             Xapian::QueryParser::FLAG_PHRASE |
+                             Xapian::QueryParser::FLAG_LOVEHATE |
+                             Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
+                             Xapian::QueryParser::FLAG_WILDCARD |
+                             Xapian::QueryParser::FLAG_PURE_NOT);
+
+       if (strcmp (query_string, "") == 0 ||
+           strcmp (query_string, "*") == 0)
+       {
+           final_query = mail_query;
+       } else {
+           string_query = notmuch->query_parser->
+               parse_query (query_string, flags);
+           final_query = Xapian::Query (Xapian::Query::OP_AND,
+                                        mail_query, string_query);
+       }
+
+       enquire.set_weighting_scheme(Xapian::BoolWeight());
+       enquire.set_docid_order(Xapian::Enquire::ASCENDING);
+
+#if DEBUG_QUERY
+       fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
+#endif
+
+       enquire.set_query (final_query);
+
+       mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+       count = mset.get_matches_estimated();
+
+    } catch (const Xapian::Error &error) {
+       fprintf (stderr, "A Xapian exception occurred: %s\n",
+                error.get_msg().c_str());
+       fprintf (stderr, "Query string was: %s\n", query->query_string);
+    }
+
+    return count;
+}