X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=database.cc;h=b392914190bb2f6247d89af371e0a86d56301938;hp=15d159ffd13494d6f672491a04b3e3868630238c;hb=32ecfe72a1da9002b8617a8575ee1290c8fd3c6f;hpb=9fc4a365d6cf478563caa012862c58a9b3945f76 diff --git a/database.cc b/database.cc index 15d159ff..b3929141 100644 --- a/database.cc +++ b/database.cc @@ -28,6 +28,108 @@ using namespace std; +#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) + +typedef struct { + const char *name; + const char *prefix; +} prefix_t; + +/* Here's the current schema for our database: + * + * We currently have two different types of documents: mail and timestamps. + * + * Mail document + * ------------- + * A mail document is associated with a particular email message file + * on disk. It is indexed with the following prefixed terms: + * + * Single terms of given prefix: + * + * type: mail + * + * id: Unique ID of mail, (from Message-ID header or generated + * as "notmuch-sha1-. + * + * Multiple terms of given prefix: + * + * ref: The message IDs from all In-Reply-To and References + * headers in the message. + * + * tag: Any tags associated with this message by the user. + * + * thread: The thread ID of all threads to which the mail belongs + * + * A mail document also has two values: + * + * TIMESTAMP: The time_t value corresponding to the message's + * Date header. + * + * MESSAGE_ID: The unique ID of the mail mess (see "id" above) + * + * Timestamp document + * ------------------ + * A timestamp document is used by a client of the notmuch library to + * maintain data necessary to allow for efficient polling of mail + * directories. The notmuch library does no interpretation of + * timestamps, but merely allows the user to store and retrieve + * timestamps as name/value pairs. + * + * The timestamp document is indexed with a single prefixed term: + * + * timestamp: The user's key value (likely a directory name) + * + * and has a single value: + * + * TIMETAMPS: The time_t value from the user. + */ + +/* With these prefix values we follow the conventions published here: + * + * http://xapian.org/docs/omega/termprefixes.html + * + * as much as makes sense. Note that I took some liberty in matching + * the reserved prefix values to notmuch concepts, (for example, 'G' + * is documented as "newsGroup (or similar entity - e.g. a web forum + * name)", for which I think the thread is the closest analogue in + * notmuch. This in spite of the fact that we will eventually be + * storing mailing-list messages where 'G' for "mailing list name" + * might be even a closer analogue. I'm treating the single-character + * prefixes preferentially for core notmuch concepts (which will be + * nearly universal to all mail messages). + */ + +prefix_t BOOLEAN_PREFIX_INTERNAL[] = { + { "type", "T" }, + { "thread", "G" }, + { "ref", "XREFERENCE" }, + { "timestamp", "XTIMESTAMP" }, +}; + +prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { + { "tag", "K" }, + { "id", "Q" } +}; + +const char * +_find_prefix (const char *name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_INTERNAL[i].prefix; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_EXTERNAL[i].prefix; + + fprintf (stderr, "Internal error: No prefix exists for '%s'\n", name); + exit (1); + + return ""; +} + const char * notmuch_status_to_string (notmuch_status_t status) { @@ -40,10 +142,12 @@ notmuch_status_to_string (notmuch_status_t status) return "Something went wrong trying to read or write a file"; case NOTMUCH_STATUS_FILE_NOT_EMAIL: return "File is not an email"; + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + return "Message ID is identical to a message in database"; case NOTMUCH_STATUS_NULL_POINTER: return "Erroneous NULL pointer"; case NOTMUCH_STATUS_TAG_TOO_LONG: - return "Tag value is too long"; + return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)"; default: case NOTMUCH_STATUS_LAST_STATUS: return "Unknown error status value"; @@ -92,32 +196,79 @@ find_doc_ids (notmuch_database_t *notmuch, free (term); } -Xapian::Document -find_message_by_docid (Xapian::Database *db, Xapian::docid docid) +static notmuch_private_status_t +find_unique_doc_id (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + unsigned int *doc_id) +{ + Xapian::PostingIterator i, end; + + find_doc_ids (notmuch, prefix_name, value, &i, &end); + + if (i == end) { + *doc_id = 0; + return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND; + } else { + *doc_id = *i; + return NOTMUCH_PRIVATE_STATUS_SUCCESS; + } +} + +static Xapian::Document +find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id) { - return db->get_document (docid); + return notmuch->xapian_db->get_document (doc_id); } +static notmuch_private_status_t +find_unique_document (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::Document *document, + unsigned int *doc_id) +{ + notmuch_private_status_t status; + + status = find_unique_doc_id (notmuch, prefix_name, value, doc_id); + + if (status) { + *document = Xapian::Document (); + return status; + } + + *document = find_document_for_doc_id (notmuch, *doc_id); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} + +/* XXX: Should rewrite this to accept a notmuch_message_t* instead of + * a Xapian:Document and then we could just use + * notmuch_message_get_thread_ids instead of duplicating its logic + * here. */ static void insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) { string value_string; - const char *value, *id, *comma; - - value_string = doc.get_value (NOTMUCH_VALUE_THREAD); - value = value_string.c_str(); - if (strlen (value)) { - id = value; - while (*id) { - comma = strchr (id, ','); - if (comma == NULL) - comma = id + strlen (id); - g_hash_table_insert (thread_ids, - strndup (id, comma - id), NULL); - id = comma; - if (*id) - id++; - } + Xapian::TermIterator i; + const char *prefix_str = _find_prefix ("thread"); + char prefix; + + assert (strlen (prefix_str) == 1); + + prefix = *prefix_str; + + i = doc.termlist_begin (); + i.skip_to (prefix_str); + + while (1) { + if (i == doc.termlist_end ()) + break; + value_string = *i; + if (value_string.empty () || value_string[0] != prefix) + break; + g_hash_table_insert (thread_ids, + strdup (value_string.c_str () + 1), NULL); + i++; } } @@ -125,14 +276,15 @@ notmuch_message_t * notmuch_database_find_message (notmuch_database_t *notmuch, const char *message_id) { - Xapian::PostingIterator i, end; + notmuch_private_status_t status; + unsigned int doc_id; - find_doc_ids (notmuch, "msgid", message_id, &i, &end); + status = find_unique_doc_id (notmuch, "id", message_id, &doc_id); - if (i == end) + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) return NULL; - return _notmuch_message_create (notmuch, notmuch, *i); + return _notmuch_message_create (notmuch, notmuch, doc_id); } /* Return one or more thread_ids, (as a GPtrArray of strings), for the @@ -147,7 +299,6 @@ find_thread_ids (notmuch_database_t *notmuch, GPtrArray *parents, const char *message_id) { - Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::PostingIterator child, children_end; Xapian::Document doc; GHashTable *thread_ids; @@ -161,7 +312,7 @@ find_thread_ids (notmuch_database_t *notmuch, find_doc_ids (notmuch, "ref", message_id, &child, &children_end); for ( ; child != children_end; child++) { - doc = find_message_by_docid (db, *child); + doc = find_document_for_doc_id (notmuch, *child); insert_thread_id (thread_ids, doc); } @@ -391,6 +542,7 @@ notmuch_database_open (const char *path) struct stat st; int err; char *local_path = NULL; + unsigned int i; if (path == NULL) path = local_path = notmuch_database_default_path (); @@ -415,6 +567,12 @@ notmuch_database_open (const char *path) notmuch->query_parser = new Xapian::QueryParser; notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); notmuch->query_parser->set_database (*notmuch->xapian_db); + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { + prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i]; + notmuch->query_parser->add_boolean_prefix (prefix->name, + prefix->prefix); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); @@ -445,6 +603,103 @@ notmuch_database_get_path (notmuch_database_t *notmuch) return notmuch->path; } +notmuch_private_status_t +find_timestamp_document (notmuch_database_t *notmuch, const char *db_key, + Xapian::Document *doc, unsigned int *doc_id) +{ + return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id); +} + +/* We allow the user to use arbitrarily long keys for timestamps, + * (they're for filesystem paths after all, which have no limit we + * know about). But we have a term-length limit. So if we exceed that, + * we'll use the SHA-1 of the user's key as the actual key for + * constructing a database term. + * + * Caution: This function returns a newly allocated string which the + * caller should free() when finished. + */ +static char * +timestamp_db_key (const char *key) +{ + int term_len = strlen (_find_prefix ("timestamp")) + strlen (key); + + if (term_len > NOTMUCH_TERM_MAX) + return notmuch_sha1_of_string (key); + else + return strdup (key); +} + +notmuch_status_t +notmuch_database_set_timestamp (notmuch_database_t *notmuch, + const char *key, time_t timestamp) +{ + Xapian::Document doc; + unsigned int doc_id; + notmuch_private_status_t status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + char *db_key = NULL; + + db_key = timestamp_db_key (key); + + try { + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + + doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (timestamp)); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + char *term = talloc_asprintf (NULL, "%s%s", + _find_prefix ("timestamp"), db_key); + doc.add_term (term); + talloc_free (term); + + notmuch->xapian_db->add_document (doc); + } else { + notmuch->xapian_db->replace_document (doc_id, doc); + } + + } catch (Xapian::Error &error) { + fprintf (stderr, "A Xapian exception occurred: %s.\n", + error.get_msg().c_str()); + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + if (db_key) + free (db_key); + + return ret; +} + +time_t +notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) +{ + Xapian::Document doc; + unsigned int doc_id; + notmuch_private_status_t status; + char *db_key = NULL; + time_t ret = 0; + + db_key = timestamp_db_key (key); + + try { + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) + goto DONE; + + ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); + } catch (Xapian::Error &error) { + goto DONE; + } + + DONE: + if (db_key) + free (db_key); + + return ret; +} + notmuch_status_t notmuch_database_add_message (notmuch_database_t *notmuch, const char *filename) @@ -522,23 +777,8 @@ notmuch_database_add_message (notmuch_database_t *notmuch, /* Has a message previously been added with the same ID? */ old_filename = notmuch_message_get_filename (message); if (old_filename && strlen (old_filename)) { - /* XXX: This is too noisy to actually print, and what do we - * really expect the user to do? Go manually delete a - * redundant message or merge two similar messages? - * Instead we should handle this transparently. - * - * What we likely want to move to is adding both filenames - * to the database so that subsequent indexing will pick up - * terms from both files. - */ -#if 0 - fprintf (stderr, - "Note: Attempting to add a message with a duplicate message ID:\n" - "Old: %s\n" "New: %s\n", - old_filename, filename); - fprintf (stderr, "The old filename will be used, but any new terms\n" - "from the new message will added to the database.\n"); -#endif + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + goto DONE; } else { _notmuch_message_set_filename (message, filename); _notmuch_message_add_term (message, "type", "mail");