X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=database.cc;h=b392914190bb2f6247d89af371e0a86d56301938;hp=d513b8552bf12bc1fa7752e3275002977f73acec;hb=32ecfe72a1da9002b8617a8575ee1290c8fd3c6f;hpb=2a9b4fce7ce9dc1cb89a7581bc1360fd4bfcdf99 diff --git a/database.cc b/database.cc index d513b855..b3929141 100644 --- a/database.cc +++ b/database.cc @@ -30,24 +30,85 @@ using namespace std; #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) -/* These prefix values are specifically chosen to be compatible - * with sup, (http://sup.rubyforge.org), written by - * William Morgan , and released - * under the GNU GPL v2. - */ - typedef struct { const char *name; const char *prefix; } prefix_t; -prefix_t BOOLEAN_PREFIX[] = { - { "type", "K" }, - { "tag", "L" }, - { "id", "Q" }, - { "thread", "H" }, - { "ref", "R" }, - { "timestamp", "KTS" }, +/* Here's the current schema for our database: + * + * We currently have two different types of documents: mail and timestamps. + * + * Mail document + * ------------- + * A mail document is associated with a particular email message file + * on disk. It is indexed with the following prefixed terms: + * + * Single terms of given prefix: + * + * type: mail + * + * id: Unique ID of mail, (from Message-ID header or generated + * as "notmuch-sha1-. + * + * Multiple terms of given prefix: + * + * ref: The message IDs from all In-Reply-To and References + * headers in the message. + * + * tag: Any tags associated with this message by the user. + * + * thread: The thread ID of all threads to which the mail belongs + * + * A mail document also has two values: + * + * TIMESTAMP: The time_t value corresponding to the message's + * Date header. + * + * MESSAGE_ID: The unique ID of the mail mess (see "id" above) + * + * Timestamp document + * ------------------ + * A timestamp document is used by a client of the notmuch library to + * maintain data necessary to allow for efficient polling of mail + * directories. The notmuch library does no interpretation of + * timestamps, but merely allows the user to store and retrieve + * timestamps as name/value pairs. + * + * The timestamp document is indexed with a single prefixed term: + * + * timestamp: The user's key value (likely a directory name) + * + * and has a single value: + * + * TIMETAMPS: The time_t value from the user. + */ + +/* With these prefix values we follow the conventions published here: + * + * http://xapian.org/docs/omega/termprefixes.html + * + * as much as makes sense. Note that I took some liberty in matching + * the reserved prefix values to notmuch concepts, (for example, 'G' + * is documented as "newsGroup (or similar entity - e.g. a web forum + * name)", for which I think the thread is the closest analogue in + * notmuch. This in spite of the fact that we will eventually be + * storing mailing-list messages where 'G' for "mailing list name" + * might be even a closer analogue. I'm treating the single-character + * prefixes preferentially for core notmuch concepts (which will be + * nearly universal to all mail messages). + */ + +prefix_t BOOLEAN_PREFIX_INTERNAL[] = { + { "type", "T" }, + { "thread", "G" }, + { "ref", "XREFERENCE" }, + { "timestamp", "XTIMESTAMP" }, +}; + +prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { + { "tag", "K" }, + { "id", "Q" } }; const char * @@ -55,9 +116,13 @@ _find_prefix (const char *name) { unsigned int i; - for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++) - if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0) - return BOOLEAN_PREFIX[i].prefix; + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_INTERNAL[i].prefix; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_EXTERNAL[i].prefix; fprintf (stderr, "Internal error: No prefix exists for '%s'\n", name); exit (1); @@ -176,26 +241,34 @@ find_unique_document (notmuch_database_t *notmuch, return NOTMUCH_PRIVATE_STATUS_SUCCESS; } +/* XXX: Should rewrite this to accept a notmuch_message_t* instead of + * a Xapian:Document and then we could just use + * notmuch_message_get_thread_ids instead of duplicating its logic + * here. */ static void insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) { string value_string; - const char *value, *id, *comma; - - value_string = doc.get_value (NOTMUCH_VALUE_THREAD); - value = value_string.c_str(); - if (strlen (value)) { - id = value; - while (*id) { - comma = strchr (id, ','); - if (comma == NULL) - comma = id + strlen (id); - g_hash_table_insert (thread_ids, - strndup (id, comma - id), NULL); - id = comma; - if (*id) - id++; - } + Xapian::TermIterator i; + const char *prefix_str = _find_prefix ("thread"); + char prefix; + + assert (strlen (prefix_str) == 1); + + prefix = *prefix_str; + + i = doc.termlist_begin (); + i.skip_to (prefix_str); + + while (1) { + if (i == doc.termlist_end ()) + break; + value_string = *i; + if (value_string.empty () || value_string[0] != prefix) + break; + g_hash_table_insert (thread_ids, + strdup (value_string.c_str () + 1), NULL); + i++; } } @@ -469,6 +542,7 @@ notmuch_database_open (const char *path) struct stat st; int err; char *local_path = NULL; + unsigned int i; if (path == NULL) path = local_path = notmuch_database_default_path (); @@ -493,9 +567,12 @@ notmuch_database_open (const char *path) notmuch->query_parser = new Xapian::QueryParser; notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); notmuch->query_parser->set_database (*notmuch->xapian_db); - notmuch->query_parser->add_boolean_prefix ("id", _find_prefix ("id")); - notmuch->query_parser->add_boolean_prefix ("tag", _find_prefix ("tag")); - notmuch->query_parser->add_boolean_prefix ("type", _find_prefix ("type")); + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { + prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i]; + notmuch->query_parser->add_boolean_prefix (prefix->name, + prefix->prefix); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); @@ -568,7 +645,8 @@ notmuch_database_set_timestamp (notmuch_database_t *notmuch, try { status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); - doc.add_value (0, Xapian::sortable_serialise (timestamp)); + doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (timestamp)); if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { char *term = talloc_asprintf (NULL, "%s%s", @@ -610,7 +688,7 @@ notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) goto DONE; - ret = Xapian::sortable_unserialise (doc.get_value (0)); + ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); } catch (Xapian::Error &error) { goto DONE; }