X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=database.cc;h=604062835427f0c539dd16791b24d84aab7d1da0;hp=a6539ffe0fd9869268ea14c43336d4b0b28a4e45;hb=c33eed84f2c1a23d7f3835f7f7d480fde179ff4d;hpb=1ecdef59f5f3b5b1e9f00bbf27349fd5d48d747f diff --git a/database.cc b/database.cc index a6539ffe..60406283 100644 --- a/database.cc +++ b/database.cc @@ -24,223 +24,247 @@ #include -#include /* g_strdup_printf, g_free, GPtrArray, GHashTable */ +#include /* g_free, GPtrArray, GHashTable */ using namespace std; +#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) + +typedef struct { + const char *name; + const char *prefix; +} prefix_t; + +/* Here's the current schema for our database: + * + * We currently have two different types of documents: mail and timestamps. + * + * Mail document + * ------------- + * A mail document is associated with a particular email message file + * on disk. It is indexed with the following prefixed terms: + * + * Single terms of given prefix: + * + * type: mail + * + * id: Unique ID of mail, (from Message-ID header or generated + * as "notmuch-sha1-. + * + * thread: The ID of the thread to which the mail belongs + * + * Multiple terms of given prefix: + * + * ref: All unresolved message IDs from In-Reply-To and + * References headers in the message. (Once a referenced + * message is added to the database and the thread IDs + * are linked the corresponding "ref" term is dropped + * from the message document.) + * + * tag: Any tags associated with this message by the user. + * + * A mail document also has two values: + * + * TIMESTAMP: The time_t value corresponding to the message's + * Date header. + * + * MESSAGE_ID: The unique ID of the mail mess (see "id" above) + * + * Timestamp document + * ------------------ + * A timestamp document is used by a client of the notmuch library to + * maintain data necessary to allow for efficient polling of mail + * directories. The notmuch library does no interpretation of + * timestamps, but merely allows the user to store and retrieve + * timestamps as name/value pairs. + * + * The timestamp document is indexed with a single prefixed term: + * + * timestamp: The user's key value (likely a directory name) + * + * and has a single value: + * + * TIMESTAMP: The time_t value from the user. + */ + +/* With these prefix values we follow the conventions published here: + * + * http://xapian.org/docs/omega/termprefixes.html + * + * as much as makes sense. Note that I took some liberty in matching + * the reserved prefix values to notmuch concepts, (for example, 'G' + * is documented as "newsGroup (or similar entity - e.g. a web forum + * name)", for which I think the thread is the closest analogue in + * notmuch. This in spite of the fact that we will eventually be + * storing mailing-list messages where 'G' for "mailing list name" + * might be even a closer analogue. I'm treating the single-character + * prefixes preferentially for core notmuch concepts (which will be + * nearly universal to all mail messages). + */ + +prefix_t BOOLEAN_PREFIX_INTERNAL[] = { + { "type", "T" }, + { "ref", "XREFERENCE" }, + { "timestamp", "XTIMESTAMP" }, + { "contact", "XCONTACT" } +}; + +prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { + { "thread", "G" }, + { "tag", "K" }, + { "id", "Q" } +}; + +prefix_t PROBABILISTIC_PREFIX[]= { + { "from", "XFROM" }, + { "to", "XTO" }, + { "attachment", "XATTACHMENT" }, + { "subject", "XSUBJECT"} +}; + +int +_internal_error (const char *format, ...) +{ + va_list va_args; + + va_start (va_args, format); + + fprintf (stderr, "Internal error: "); + vfprintf (stderr, format, va_args); + + exit (1); + + return 1; +} + +const char * +_find_prefix (const char *name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_INTERNAL[i].prefix; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_EXTERNAL[i].prefix; + + for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) + if (strcmp (name, PROBABILISTIC_PREFIX[i].name) == 0) + return PROBABILISTIC_PREFIX[i].prefix; + + INTERNAL_ERROR ("No prefix exists for '%s'\n", name); + + return ""; +} + const char * notmuch_status_to_string (notmuch_status_t status) { switch (status) { case NOTMUCH_STATUS_SUCCESS: return "No error occurred"; + case NOTMUCH_STATUS_OUT_OF_MEMORY: + return "Out of memory"; case NOTMUCH_STATUS_XAPIAN_EXCEPTION: return "A Xapian exception occurred"; case NOTMUCH_STATUS_FILE_ERROR: return "Something went wrong trying to read or write a file"; case NOTMUCH_STATUS_FILE_NOT_EMAIL: return "File is not an email"; + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + return "Message ID is identical to a message in database"; case NOTMUCH_STATUS_NULL_POINTER: return "Erroneous NULL pointer"; case NOTMUCH_STATUS_TAG_TOO_LONG: - return "Tag value is too long"; + return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)"; + case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: + return "Unblanced number of calls to notmuch_message_freeze/thaw"; default: case NOTMUCH_STATUS_LAST_STATUS: return "Unknown error status value"; } } -/* "128 bits of thread-id ought to be enough for anybody" */ -#define NOTMUCH_THREAD_ID_BITS 128 -#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4) -typedef struct _thread_id { - char str[NOTMUCH_THREAD_ID_DIGITS + 1]; -} thread_id_t; - -static void -thread_id_generate (thread_id_t *thread_id) -{ - static int seeded = 0; - FILE *dev_random; - uint32_t value; - char *s; - int i; - - if (! seeded) { - dev_random = fopen ("/dev/random", "r"); - if (dev_random == NULL) { - srand (time (NULL)); - } else { - fread ((void *) &value, sizeof (value), 1, dev_random); - srand (value); - fclose (dev_random); - } - seeded = 1; - } - - s = thread_id->str; - for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) { - value = rand (); - sprintf (s, "%08x", value); - s += 8; - } -} - -/* XXX: We should drop this function and convert all callers to call - * _notmuch_message_add_term instead. */ static void -add_term (Xapian::Document doc, - const char *prefix_name, - const char *value) +find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end) { - const char *prefix; + Xapian::PostingIterator i; char *term; - if (value == NULL) - return; + term = talloc_asprintf (notmuch, "%s%s", + _find_prefix (prefix_name), value); - prefix = _find_prefix (prefix_name); + *begin = notmuch->xapian_db->postlist_begin (term); - term = g_strdup_printf ("%s%s", prefix, value); + *end = notmuch->xapian_db->postlist_end (term); - if (strlen (term) <= NOTMUCH_TERM_MAX) - doc.add_term (term); - - g_free (term); + talloc_free (term); } -static void -find_messages_by_term (Xapian::Database *db, - const char *prefix_name, - const char *value, - Xapian::PostingIterator *begin, - Xapian::PostingIterator *end) +static notmuch_private_status_t +find_unique_doc_id (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + unsigned int *doc_id) { - Xapian::PostingIterator i; - char *term; - - term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value); - - *begin = db->postlist_begin (term); + Xapian::PostingIterator i, end; - if (end) - *end = db->postlist_end (term); + find_doc_ids (notmuch, prefix_name, value, &i, &end); - free (term); + if (i == end) { + *doc_id = 0; + return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND; + } else { + *doc_id = *i; + return NOTMUCH_PRIVATE_STATUS_SUCCESS; + } } -Xapian::Document -find_message_by_docid (Xapian::Database *db, Xapian::docid docid) +static Xapian::Document +find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id) { - return db->get_document (docid); + return notmuch->xapian_db->get_document (doc_id); } -static void -insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) +static notmuch_private_status_t +find_unique_document (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::Document *document, + unsigned int *doc_id) { - string value_string; - const char *value, *id, *comma; - - value_string = doc.get_value (NOTMUCH_VALUE_THREAD); - value = value_string.c_str(); - if (strlen (value)) { - id = value; - while (*id) { - comma = strchr (id, ','); - if (comma == NULL) - comma = id + strlen (id); - g_hash_table_insert (thread_ids, - strndup (id, comma - id), NULL); - id = comma; - if (*id) - id++; - } + notmuch_private_status_t status; + + status = find_unique_doc_id (notmuch, prefix_name, value, doc_id); + + if (status) { + *document = Xapian::Document (); + return status; } + + *document = find_document_for_doc_id (notmuch, *doc_id); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; } notmuch_message_t * notmuch_database_find_message (notmuch_database_t *notmuch, const char *message_id) { - Xapian::PostingIterator i, end; + notmuch_private_status_t status; + unsigned int doc_id; - find_messages_by_term (notmuch->xapian_db, - "msgid", message_id, &i, &end); + status = find_unique_doc_id (notmuch, "id", message_id, &doc_id); - if (i == end) + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) return NULL; - return _notmuch_message_create (notmuch, notmuch, *i); -} - -/* Return one or more thread_ids, (as a GPtrArray of strings), for the - * given message based on looking into the database for any messages - * referenced in parents, and also for any messages in the database - * referencing message_id. - * - * Caller should free all strings in the array and the array itself, - * (g_ptr_array_free) when done. */ -static GPtrArray * -find_thread_ids (notmuch_database_t *notmuch, - GPtrArray *parents, - const char *message_id) -{ - Xapian::WritableDatabase *db = notmuch->xapian_db; - Xapian::PostingIterator child, children_end; - Xapian::Document doc; - GHashTable *thread_ids; - GList *keys, *l; - unsigned int i; - const char *parent_message_id; - GPtrArray *result; - - thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal, - free, NULL); - - find_messages_by_term (db, "ref", message_id, &child, &children_end); - for ( ; child != children_end; child++) { - doc = find_message_by_docid (db, *child); - insert_thread_id (thread_ids, doc); - } - - for (i = 0; i < parents->len; i++) { - notmuch_message_t *parent; - notmuch_thread_ids_t *ids; - - parent_message_id = (char *) g_ptr_array_index (parents, i); - parent = notmuch_database_find_message (notmuch, parent_message_id); - if (parent == NULL) - continue; - - for (ids = notmuch_message_get_thread_ids (parent); - notmuch_thread_ids_has_more (ids); - notmuch_thread_ids_advance (ids)) - { - const char *id; - - id = notmuch_thread_ids_get (ids); - g_hash_table_insert (thread_ids, strdup (id), NULL); - } - - notmuch_message_destroy (parent); - } - - result = g_ptr_array_new (); - - keys = g_hash_table_get_keys (thread_ids); - for (l = keys; l; l = l->next) { - char *id = (char *) l->data; - g_ptr_array_add (result, id); - } - g_list_free (keys); - - /* We're done with the hash table, but we've taken the pointers to - * the allocated strings and put them into our result array, so - * tell the hash not to free them on its way out. */ - g_hash_table_steal_all (thread_ids); - g_hash_table_unref (thread_ids); - - return result; + return _notmuch_message_create (notmuch, notmuch, doc_id, NULL); } /* Advance 'str' past any whitespace or RFC 822 comments. A comment is @@ -285,12 +309,11 @@ skip_space_and_comments (const char **str) * If not NULL, then *next will be made to point to the first character * not parsed, (possibly pointing to the final '\0' terminator. * - * Returns a newly allocated string which the caller should free() - * when done with it. + * Returns a newly talloc'ed string belonging to 'ctx'. * * Returns NULL if there is any error parsing the message-id. */ static char * -parse_message_id (const char *message_id, const char **next) +parse_message_id (void *ctx, const char *message_id, const char **next) { const char *s, *end; char *result; @@ -331,7 +354,7 @@ parse_message_id (const char *message_id, const char **next) if (end <= s) return NULL; - result = strndup (s, end - s + 1); + result = talloc_strndup (ctx, s, end - s + 1); /* Finally, collapse any whitespace that is within the message-id * itself. */ @@ -347,10 +370,11 @@ parse_message_id (const char *message_id, const char **next) return result; } -/* Parse a References header value, putting a copy of each referenced - * message-id into 'array'. */ +/* Parse a References header value, putting a (talloc'ed under 'ctx') + * copy of each referenced message-id into 'hash'. */ static void -parse_references (GPtrArray *array, +parse_references (void *ctx, + GHashTable *hash, const char *refs) { char *ref; @@ -359,20 +383,27 @@ parse_references (GPtrArray *array, return; while (*refs) { - ref = parse_message_id (refs, &refs); + ref = parse_message_id (ctx, refs, &refs); if (ref) - g_ptr_array_add (array, ref); + g_hash_table_insert (hash, ref, NULL); } } char * notmuch_database_default_path (void) { + char *path; + if (getenv ("NOTMUCH_BASE")) return strdup (getenv ("NOTMUCH_BASE")); - return g_strdup_printf ("%s/mail", getenv ("HOME")); + if (asprintf (&path, "%s/mail", getenv ("HOME")) == -1) { + fprintf (stderr, "Out of memory.\n"); + return xstrdup(""); + } + + return path; } notmuch_database_t * @@ -400,7 +431,7 @@ notmuch_database_create (const char *path) goto DONE; } - notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); + notmuch_path = talloc_asprintf (NULL, "%s/%s", path, ".notmuch"); err = mkdir (notmuch_path, 0755); @@ -414,7 +445,7 @@ notmuch_database_create (const char *path) DONE: if (notmuch_path) - free (notmuch_path); + talloc_free (notmuch_path); if (local_path) free (local_path); @@ -429,11 +460,16 @@ notmuch_database_open (const char *path) struct stat st; int err; char *local_path = NULL; + unsigned int i; if (path == NULL) path = local_path = notmuch_database_default_path (); - notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); + if (asprintf (¬much_path, "%s/%s", path, ".notmuch") == -1) { + notmuch_path = NULL; + fprintf (stderr, "Out of memory\n"); + goto DONE; + } err = stat (notmuch_path, &st); if (err) { @@ -442,20 +478,44 @@ notmuch_database_open (const char *path) goto DONE; } - xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian"); + if (asprintf (&xapian_path, "%s/%s", notmuch_path, "xapian") == -1) { + xapian_path = NULL; + fprintf (stderr, "Out of memory\n"); + goto DONE; + } notmuch = talloc (NULL, notmuch_database_t); notmuch->path = talloc_strdup (notmuch, path); + if (notmuch->path[strlen (notmuch->path) - 1] == '/') + notmuch->path[strlen (notmuch->path) - 1] = '\0'; + try { notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); notmuch->query_parser = new Xapian::QueryParser; + notmuch->term_gen = new Xapian::TermGenerator; + notmuch->term_gen->set_stemmer (Xapian::Stem ("english")); + notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); notmuch->query_parser->set_database (*notmuch->xapian_db); + notmuch->query_parser->set_stemmer (Xapian::Stem ("english")); + notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME); + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { + prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i]; + notmuch->query_parser->add_boolean_prefix (prefix->name, + prefix->prefix); + } + + for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) { + prefix_t *prefix = &PROBABILISTIC_PREFIX[i]; + notmuch->query_parser->add_prefix (prefix->name, prefix->prefix); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); + notmuch = NULL; } DONE: @@ -472,6 +532,9 @@ notmuch_database_open (const char *path) void notmuch_database_close (notmuch_database_t *notmuch) { + notmuch->xapian_db->flush (); + + delete notmuch->term_gen; delete notmuch->query_parser; delete notmuch->xapian_db; talloc_free (notmuch); @@ -483,23 +546,327 @@ notmuch_database_get_path (notmuch_database_t *notmuch) return notmuch->path; } +static notmuch_private_status_t +find_timestamp_document (notmuch_database_t *notmuch, const char *db_key, + Xapian::Document *doc, unsigned int *doc_id) +{ + return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id); +} + +/* We allow the user to use arbitrarily long keys for timestamps, + * (they're for filesystem paths after all, which have no limit we + * know about). But we have a term-length limit. So if we exceed that, + * we'll use the SHA-1 of the user's key as the actual key for + * constructing a database term. + * + * Caution: This function returns a newly allocated string which the + * caller should free() when finished. + */ +static char * +timestamp_db_key (const char *key) +{ + int term_len = strlen (_find_prefix ("timestamp")) + strlen (key); + + if (term_len > NOTMUCH_TERM_MAX) + return notmuch_sha1_of_string (key); + else + return strdup (key); +} + notmuch_status_t -notmuch_database_add_message (notmuch_database_t *notmuch, - const char *filename) +notmuch_database_set_timestamp (notmuch_database_t *notmuch, + const char *key, time_t timestamp) { - Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::Document doc; - notmuch_message_file_t *message_file; + unsigned int doc_id; + notmuch_private_status_t status; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + char *db_key = NULL; + + db_key = timestamp_db_key (key); + + try { + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + + doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (timestamp)); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + char *term = talloc_asprintf (NULL, "%s%s", + _find_prefix ("timestamp"), db_key); + doc.add_term (term); + talloc_free (term); + + notmuch->xapian_db->add_document (doc); + } else { + notmuch->xapian_db->replace_document (doc_id, doc); + } - GPtrArray *parents, *thread_ids; + } catch (Xapian::Error &error) { + fprintf (stderr, "A Xapian exception occurred: %s.\n", + error.get_msg().c_str()); + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + if (db_key) + free (db_key); + + return ret; +} - const char *refs, *in_reply_to, *date, *header; +time_t +notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) +{ + Xapian::Document doc; + unsigned int doc_id; + notmuch_private_status_t status; + char *db_key = NULL; + time_t ret = 0; + + db_key = timestamp_db_key (key); + + try { + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) + goto DONE; + + ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); + } catch (Xapian::Error &error) { + goto DONE; + } + + DONE: + if (db_key) + free (db_key); + + return ret; +} + +/* Find the thread ID to which the message with 'message_id' belongs. + * + * Returns NULL if no message with message ID 'message_id' is in the + * database. + * + * Otherwise, returns a newly talloced string belonging to 'ctx'. + */ +static const char * +_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + void *ctx, + const char *message_id) +{ + notmuch_message_t *message; + const char *ret = NULL; + + message = notmuch_database_find_message (notmuch, message_id); + if (message == NULL) + goto DONE; + + ret = talloc_steal (ctx, notmuch_message_get_thread_id (message)); + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static notmuch_status_t +_merge_threads (notmuch_database_t *notmuch, + const char *winner_thread_id, + const char *loser_thread_id) +{ + Xapian::PostingIterator loser, loser_end; + notmuch_message_t *message = NULL; + notmuch_private_status_t private_status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); + + for ( ; loser != loser_end; loser++) { + message = _notmuch_message_create (notmuch, notmuch, + *loser, &private_status); + if (message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + _notmuch_message_remove_term (message, "thread", loser_thread_id); + _notmuch_message_add_term (message, "thread", winner_thread_id); + _notmuch_message_sync (message); + + notmuch_message_destroy (message); + message = NULL; + } + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static void +_my_talloc_free_for_g_hash (void *ptr) +{ + talloc_free (ptr); +} + +static notmuch_status_t +_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + const char **thread_id) +{ + GHashTable *parents = NULL; + const char *refs, *in_reply_to; + GList *l, *keys = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + parents = g_hash_table_new_full (g_str_hash, g_str_equal, + _my_talloc_free_for_g_hash, NULL); + + refs = notmuch_message_file_get_header (message_file, "references"); + parse_references (message, parents, refs); + + in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to"); + parse_references (message, parents, in_reply_to); + + keys = g_hash_table_get_keys (parents); + for (l = keys; l; l = l->next) { + char *parent_message_id; + const char *parent_thread_id; + + parent_message_id = (char *) l->data; + parent_thread_id = _resolve_message_id_to_thread_id (notmuch, + message, + parent_message_id); + + if (parent_thread_id == NULL) { + _notmuch_message_add_term (message, "ref", parent_message_id); + } else { + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, parent_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, parent_thread_id)) { + ret = _merge_threads (notmuch, *thread_id, parent_thread_id); + if (ret) + goto DONE; + } + } + } + + DONE: + if (keys) + g_list_free (keys); + if (parents) + g_hash_table_unref (parents); + + return ret; +} + +static notmuch_status_t +_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, + notmuch_message_t *message, + const char **thread_id) +{ + const char *message_id = notmuch_message_get_message_id (message); + Xapian::PostingIterator child, children_end; + notmuch_message_t *child_message = NULL; + const char *child_thread_id; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + + find_doc_ids (notmuch, "ref", message_id, &child, &children_end); + + for ( ; child != children_end; child++) { + + child_message = _notmuch_message_create (message, notmuch, + *child, &private_status); + if (child_message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + child_thread_id = notmuch_message_get_thread_id (child_message); + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, child_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, child_thread_id)) { + _notmuch_message_remove_term (child_message, "ref", + message_id); + _notmuch_message_sync (child_message); + ret = _merge_threads (notmuch, *thread_id, child_thread_id); + if (ret) + goto DONE; + } + + notmuch_message_destroy (child_message); + child_message = NULL; + } + + DONE: + if (child_message) + notmuch_message_destroy (child_message); + + return ret; +} + +/* Given a (mostly empty) 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * + * We first looke at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. We also look in the + * database for existing message that reference 'message'.p + * + * The end result is to call _notmuch_message_add_thread_id with one + * or more thread IDs to which this message belongs, (including + * generating a new thread ID if necessary if the message doesn't + * connect to any existing threads). + */ +static notmuch_status_t +_notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file) +{ + notmuch_status_t status; + const char *thread_id = NULL; + + status = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, + &thread_id); + if (status) + return status; + + status = _notmuch_database_link_message_to_children (notmuch, message, + &thread_id); + if (status) + return status; + + if (thread_id == NULL) + _notmuch_message_ensure_thread_id (message); + + return NOTMUCH_STATUS_SUCCESS; +} + +notmuch_status_t +notmuch_database_add_message (notmuch_database_t *notmuch, + const char *filename, + notmuch_message_t **message_ret) +{ + notmuch_message_file_t *message_file; + notmuch_message_t *message; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + + const char *date, *header; const char *from, *to, *subject; char *message_id; - time_t time_value; - unsigned int i; + if (message_ret) + *message_ret = NULL; message_file = notmuch_message_file_open (filename); if (message_file == NULL) { @@ -518,28 +885,33 @@ notmuch_database_add_message (notmuch_database_t *notmuch, (char *) NULL); try { - doc.set_data (filename); - - add_term (doc, "type", "mail"); - - parents = g_ptr_array_new (); - - refs = notmuch_message_file_get_header (message_file, "references"); - parse_references (parents, refs); + /* Before we do any real work, (especially before doing a + * potential SHA-1 computation on the entire file's contents), + * let's make sure that what we're looking at looks like an + * actual email message. + */ + from = notmuch_message_file_get_header (message_file, "from"); + subject = notmuch_message_file_get_header (message_file, "subject"); + to = notmuch_message_file_get_header (message_file, "to"); - in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to"); - parse_references (parents, in_reply_to); + if (from == NULL && + subject == NULL && + to == NULL) + { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } - for (i = 0; i < parents->len; i++) - add_term (doc, "ref", (char *) g_ptr_array_index (parents, i)); + /* Now that we're sure it's mail, the first order of business + * is to find a message ID (or else create one ourselves). */ header = notmuch_message_file_get_header (message_file, "message-id"); if (header) { - message_id = parse_message_id (header, NULL); + message_id = parse_message_id (message_file, header, NULL); /* So the header value isn't RFC-compliant, but it's * better than no message-id at all. */ if (message_id == NULL) - message_id = xstrdup (header); + message_id = talloc_strdup (message_file, header); } else { /* No message-id at all, let's generate one by taking a * hash over the file's contents. */ @@ -551,68 +923,45 @@ notmuch_database_add_message (notmuch_database_t *notmuch, goto DONE; } - message_id = g_strdup_printf ("notmuch-sha1-%s", sha1); + message_id = talloc_asprintf (message_file, + "notmuch-sha1-%s", sha1); free (sha1); } - thread_ids = find_thread_ids (notmuch, parents, message_id); - - for (i = 0; i < parents->len; i++) - g_free (g_ptr_array_index (parents, i)); - g_ptr_array_free (parents, TRUE); + /* Now that we have a message ID, we get a message object, + * (which may or may not reference an existing document in the + * database). */ - add_term (doc, "msgid", message_id); - doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + /* Use NULL for owner since we want to free this locally. */ + message = _notmuch_message_create_for_message_id (NULL, + notmuch, + message_id, + &private_status); - free (message_id); + talloc_free (message_id); - if (thread_ids->len) { - unsigned int i; - GString *thread_id; - char *id; - - for (i = 0; i < thread_ids->len; i++) { - id = (char *) thread_ids->pdata[i]; - add_term (doc, "thread", id); - if (i == 0) - thread_id = g_string_new (id); - else - g_string_append_printf (thread_id, ",%s", id); + if (message == NULL) + goto DONE; - free (id); - } - doc.add_value (NOTMUCH_VALUE_THREAD, thread_id->str); - g_string_free (thread_id, TRUE); + /* Is this a newly created message object? */ + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + _notmuch_message_set_filename (message, filename); + _notmuch_message_add_term (message, "type", "mail"); } else { - /* If not part of any existing thread, generate a new thread_id. */ - thread_id_t thread_id; - - thread_id_generate (&thread_id); - add_term (doc, "thread", thread_id.str); - doc.add_value (NOTMUCH_VALUE_THREAD, thread_id.str); + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + goto DONE; } - g_ptr_array_free (thread_ids, TRUE); + ret = _notmuch_database_link_message (notmuch, message, message_file); + if (ret) + goto DONE; date = notmuch_message_file_get_header (message_file, "date"); - time_value = notmuch_parse_date (date, NULL); + _notmuch_message_set_date (message, date); - doc.add_value (NOTMUCH_VALUE_DATE, - Xapian::sortable_serialise (time_value)); + _notmuch_message_index_file (message, filename); - from = notmuch_message_file_get_header (message_file, "from"); - subject = notmuch_message_file_get_header (message_file, "subject"); - to = notmuch_message_file_get_header (message_file, "to"); - - if (from == NULL && - subject == NULL && - to == NULL) - { - ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; - goto DONE; - } else { - db->add_document (doc); - } + _notmuch_message_sync (message); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); @@ -621,6 +970,13 @@ notmuch_database_add_message (notmuch_database_t *notmuch, } DONE: + if (message) { + if (ret == NOTMUCH_STATUS_SUCCESS && message_ret) + *message_ret = message; + else + notmuch_message_destroy (message); + } + if (message_file) notmuch_message_file_close (message_file);