X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=database.cc;h=ea20705509060334b181f40db5d5254a326720f0;hp=2f3959fa5f6a3a28168c0738545df2b8ce97cacd;hb=a360670c03475b1489ea5e2327cc3037cc8dff0b;hpb=c5eea2b77ef3fb90c8ddcb953f1086ba5bb123f3 diff --git a/database.cc b/database.cc index 2f3959fa..ea207055 100644 --- a/database.cc +++ b/database.cc @@ -18,122 +18,160 @@ * Author: Carl Worth */ -#include "notmuch-private.h" +#include "database-private.h" #include #include -#include /* g_strdup_printf, g_free, GHashTable */ +#include /* g_strdup_printf, g_free, GPtrArray, GHashTable */ using namespace std; -struct _notmuch_database { - char *path; - Xapian::WritableDatabase *xapian_db; - Xapian::TermGenerator *term_gen; -}; - #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) -/* Xapian complains if we provide a term longer than this. */ -#define NOTMUCH_MAX_TERM 245 - -/* These prefix values are specifically chosen to be compatible - * with sup, (http://sup.rubyforge.org), written by - * William Morgan , and released - * under the GNU GPL v2. - */ - typedef struct { const char *name; const char *prefix; } prefix_t; -prefix_t NORMAL_PREFIX[] = { - { "subject", "S" }, - { "body", "B" }, - { "from_name", "FN" }, - { "to_name", "TN" }, - { "name", "N" }, - { "attachment", "A" } +/* Here's the current schema for our database: + * + * We currently have two different types of documents: mail and timestamps. + * + * Mail document + * ------------- + * A mail document is associated with a particular email message file + * on disk. It is indexed with the following prefixed terms: + * + * Single terms of given prefix: + * + * type: mail + * + * id: Unique ID of mail, (from Message-ID header or generated + * as "notmuch-sha1-. + * + * thread: The ID of the thread to which the mail belongs + * + * Multiple terms of given prefix: + * + * ref: All unresolved message IDs from In-Reply-To and + * References headers in the message. (Once a referenced + * message is added to the database and the thread IDs + * are linked the corresponding "ref" term is dropped + * from the message document.) + * + * tag: Any tags associated with this message by the user. + * + * A mail document also has two values: + * + * TIMESTAMP: The time_t value corresponding to the message's + * Date header. + * + * MESSAGE_ID: The unique ID of the mail mess (see "id" above) + * + * Timestamp document + * ------------------ + * A timestamp document is used by a client of the notmuch library to + * maintain data necessary to allow for efficient polling of mail + * directories. The notmuch library does no interpretation of + * timestamps, but merely allows the user to store and retrieve + * timestamps as name/value pairs. + * + * The timestamp document is indexed with a single prefixed term: + * + * timestamp: The user's key value (likely a directory name) + * + * and has a single value: + * + * TIMETAMPS: The time_t value from the user. + */ + +/* With these prefix values we follow the conventions published here: + * + * http://xapian.org/docs/omega/termprefixes.html + * + * as much as makes sense. Note that I took some liberty in matching + * the reserved prefix values to notmuch concepts, (for example, 'G' + * is documented as "newsGroup (or similar entity - e.g. a web forum + * name)", for which I think the thread is the closest analogue in + * notmuch. This in spite of the fact that we will eventually be + * storing mailing-list messages where 'G' for "mailing list name" + * might be even a closer analogue. I'm treating the single-character + * prefixes preferentially for core notmuch concepts (which will be + * nearly universal to all mail messages). + */ + +prefix_t BOOLEAN_PREFIX_INTERNAL[] = { + { "type", "T" }, + { "thread", "G" }, + { "ref", "XREFERENCE" }, + { "timestamp", "XTIMESTAMP" }, }; -prefix_t BOOLEAN_PREFIX[] = { - { "type", "K" }, - { "from_email", "FE" }, - { "to_email", "TE" }, - { "email", "E" }, - { "date", "D" }, - { "label", "L" }, - { "source_id", "I" }, - { "attachment_extension", "O" }, - { "msgid", "Q" }, - { "thread", "H" }, - { "ref", "R" } +prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { + { "tag", "K" }, + { "id", "Q" } }; -/* Similarly, these value numbers are also chosen to be sup - * compatible. */ +int +_internal_error (const char *format, ...) +{ + va_list va_args; + + va_start (va_args, format); -typedef enum { - NOTMUCH_VALUE_MESSAGE_ID = 0, - NOTMUCH_VALUE_THREAD = 1, - NOTMUCH_VALUE_DATE = 2 -} notmuch_value_t; + vfprintf (stderr, format, va_args); + + exit (1); + + return 1; +} -static const char * -find_prefix (const char *name) +const char * +_find_prefix (const char *name) { unsigned int i; - for (i = 0; i < ARRAY_SIZE (NORMAL_PREFIX); i++) - if (strcmp (name, NORMAL_PREFIX[i].name) == 0) - return NORMAL_PREFIX[i].prefix; + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_INTERNAL[i].prefix; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) + if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0) + return BOOLEAN_PREFIX_EXTERNAL[i].prefix; - for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++) - if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0) - return BOOLEAN_PREFIX[i].prefix; + INTERNAL_ERROR ("No prefix exists for '%s'\n", name); return ""; } -/* "128 bits of thread-id ought to be enough for anybody" */ -#define NOTMUCH_THREAD_ID_BITS 128 -#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4) -typedef struct _thread_id { - char str[NOTMUCH_THREAD_ID_DIGITS + 1]; -} thread_id_t; - -static void -thread_id_generate (thread_id_t *thread_id) +const char * +notmuch_status_to_string (notmuch_status_t status) { - static int seeded = 0; - FILE *dev_random; - uint32_t value; - char *s; - int i; - - if (! seeded) { - dev_random = fopen ("/dev/random", "r"); - if (dev_random == NULL) { - srand (time (NULL)); - } else { - fread ((void *) &value, sizeof (value), 1, dev_random); - srand (value); - fclose (dev_random); - } - seeded = 1; - } - - s = thread_id->str; - for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) { - value = rand (); - sprintf (s, "%08x", value); - s += 8; + switch (status) { + case NOTMUCH_STATUS_SUCCESS: + return "No error occurred"; + case NOTMUCH_STATUS_XAPIAN_EXCEPTION: + return "A Xapian exception occurred"; + case NOTMUCH_STATUS_FILE_ERROR: + return "Something went wrong trying to read or write a file"; + case NOTMUCH_STATUS_FILE_NOT_EMAIL: + return "File is not an email"; + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + return "Message ID is identical to a message in database"; + case NOTMUCH_STATUS_NULL_POINTER: + return "Erroneous NULL pointer"; + case NOTMUCH_STATUS_TAG_TOO_LONG: + return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)"; + default: + case NOTMUCH_STATUS_LAST_STATUS: + return "Unknown error status value"; } } +/* XXX: We should drop this function and convert all callers to call + * _notmuch_message_add_term instead. */ static void add_term (Xapian::Document doc, const char *prefix_name, @@ -145,129 +183,93 @@ add_term (Xapian::Document doc, if (value == NULL) return; - prefix = find_prefix (prefix_name); + prefix = _find_prefix (prefix_name); term = g_strdup_printf ("%s%s", prefix, value); - if (strlen (term) <= NOTMUCH_MAX_TERM) + if (strlen (term) <= NOTMUCH_TERM_MAX) doc.add_term (term); g_free (term); } static void -find_messages_by_term (Xapian::Database *db, - const char *prefix_name, - const char *value, - Xapian::PostingIterator *begin, - Xapian::PostingIterator *end) +find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end) { Xapian::PostingIterator i; char *term; - term = g_strdup_printf ("%s%s", find_prefix (prefix_name), value); + term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value); - *begin = db->postlist_begin (term); + *begin = notmuch->xapian_db->postlist_begin (term); - if (end) - *end = db->postlist_end (term); + *end = notmuch->xapian_db->postlist_end (term); free (term); } -Xapian::Document -find_message_by_docid (Xapian::Database *db, Xapian::docid docid) -{ - return db->get_document (docid); -} - -Xapian::Document -find_message_by_message_id (Xapian::Database *db, const char *message_id) +static notmuch_private_status_t +find_unique_doc_id (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + unsigned int *doc_id) { Xapian::PostingIterator i, end; - find_messages_by_term (db, "msgid", message_id, &i, &end); + find_doc_ids (notmuch, prefix_name, value, &i, &end); - if (i != end) - return find_message_by_docid (db, *i); - else - return Xapian::Document (); + if (i == end) { + *doc_id = 0; + return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND; + } else { + *doc_id = *i; + return NOTMUCH_PRIVATE_STATUS_SUCCESS; + } } -static void -insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) +static Xapian::Document +find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id) { - string value_string; - const char *value, *id, *comma; - - value_string = doc.get_value (NOTMUCH_VALUE_THREAD); - value = value_string.c_str(); - if (strlen (value)) { - id = value; - while (*id) { - comma = strchr (id, ','); - if (comma == NULL) - comma = id + strlen (id); - g_hash_table_insert (thread_ids, - strndup (id, comma - id), NULL); - id = comma; - if (*id) - id++; - } - } + return notmuch->xapian_db->get_document (doc_id); } -/* Return one or more thread_ids, (as a GPtrArray of strings), for the - * given message based on looking into the database for any messages - * referenced in parents, and also for any messages in the database - * referencing message_id. - * - * Caller should free all strings in the array and the array itself, - * (g_ptr_array_free) when done. */ -static GPtrArray * -find_thread_ids (Xapian::Database *db, - GPtrArray *parents, - const char *message_id) +static notmuch_private_status_t +find_unique_document (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::Document *document, + unsigned int *doc_id) { - Xapian::PostingIterator child, children_end; - Xapian::Document doc; - GHashTable *thread_ids; - GList *keys, *l; - unsigned int i; - const char *parent_message_id; - GPtrArray *result; + notmuch_private_status_t status; - thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal, - free, NULL); + status = find_unique_doc_id (notmuch, prefix_name, value, doc_id); - find_messages_by_term (db, "ref", message_id, &child, &children_end); - for ( ; child != children_end; child++) { - doc = find_message_by_docid (db, *child); - insert_thread_id (thread_ids, doc); + if (status) { + *document = Xapian::Document (); + return status; } - for (i = 0; i < parents->len; i++) { - parent_message_id = (char *) g_ptr_array_index (parents, i); - doc = find_message_by_message_id (db, parent_message_id); - insert_thread_id (thread_ids, doc); - } + *document = find_document_for_doc_id (notmuch, *doc_id); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} - result = g_ptr_array_new (); +notmuch_message_t * +notmuch_database_find_message (notmuch_database_t *notmuch, + const char *message_id) +{ + notmuch_private_status_t status; + unsigned int doc_id; - keys = g_hash_table_get_keys (thread_ids); - for (l = keys; l; l = l->next) { - char *id = (char *) l->data; - g_ptr_array_add (result, id); - } - g_list_free (keys); + status = find_unique_doc_id (notmuch, "id", message_id, &doc_id); - /* We're done with the hash table, but we've taken the pointers to - * the allocated strings and put them into our result array, so - * tell the hash not to free them on its way out. */ - g_hash_table_steal_all (thread_ids); - g_hash_table_unref (thread_ids); + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) + return NULL; - return result; + return _notmuch_message_create (notmuch, notmuch, doc_id, NULL); } /* Advance 'str' past any whitespace or RFC 822 comments. A comment is @@ -320,6 +322,7 @@ static char * parse_message_id (const char *message_id, const char **next) { const char *s, *end; + char *result; if (message_id == NULL) return NULL; @@ -354,16 +357,29 @@ parse_message_id (const char *message_id, const char **next) if (end > s && *end == '>') end--; - if (end > s) - return strndup (s, end - s + 1); - else + if (end <= s) return NULL; + + result = strndup (s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; } /* Parse a References header value, putting a copy of each referenced - * message-id into 'array'. */ + * message-id into 'hash'. */ static void -parse_references (GPtrArray *array, +parse_references (GHashTable *hash, const char *refs) { char *ref; @@ -375,28 +391,42 @@ parse_references (GPtrArray *array, ref = parse_message_id (refs, &refs); if (ref) - g_ptr_array_add (array, ref); + g_hash_table_insert (hash, ref, NULL); } } +char * +notmuch_database_default_path (void) +{ + if (getenv ("NOTMUCH_BASE")) + return strdup (getenv ("NOTMUCH_BASE")); + + return g_strdup_printf ("%s/mail", getenv ("HOME")); +} + notmuch_database_t * notmuch_database_create (const char *path) { - char *notmuch_path; + notmuch_database_t *notmuch = NULL; + char *notmuch_path = NULL; struct stat st; int err; + char *local_path = NULL; + + if (path == NULL) + path = local_path = notmuch_database_default_path (); err = stat (path, &st); if (err) { fprintf (stderr, "Error: Cannot create database at %s: %s.\n", path, strerror (errno)); - return NULL; + goto DONE; } if (! S_ISDIR (st.st_mode)) { fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n", path); - return NULL; + goto DONE; } notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); @@ -406,50 +436,71 @@ notmuch_database_create (const char *path) if (err) { fprintf (stderr, "Error: Cannot create directory %s: %s.\n", notmuch_path, strerror (errno)); - free (notmuch_path); - return NULL; + goto DONE; } - free (notmuch_path); + notmuch = notmuch_database_open (path); - return notmuch_database_open (path); + DONE: + if (notmuch_path) + free (notmuch_path); + if (local_path) + free (local_path); + + return notmuch; } notmuch_database_t * notmuch_database_open (const char *path) { - notmuch_database_t *notmuch; - char *notmuch_path, *xapian_path; + notmuch_database_t *notmuch = NULL; + char *notmuch_path = NULL, *xapian_path = NULL; struct stat st; int err; + char *local_path = NULL; + unsigned int i; + + if (path == NULL) + path = local_path = notmuch_database_default_path (); notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); err = stat (notmuch_path, &st); if (err) { - fprintf (stderr, "Error: Cannot stat %s: %s\n", - notmuch_path, strerror (err)); - free (notmuch_path); - return NULL; + fprintf (stderr, "Error opening database at %s: %s\n", + notmuch_path, strerror (errno)); + goto DONE; } xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian"); - free (notmuch_path); - /* C++ is so nasty in requiring these casts. I'm almost tempted to - * write a C wrapper for Xapian... */ - notmuch = (notmuch_database_t *) xmalloc (sizeof (notmuch_database_t)); - notmuch->path = xstrdup (path); + notmuch = talloc (NULL, notmuch_database_t); + notmuch->path = talloc_strdup (notmuch, path); try { notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); + notmuch->query_parser = new Xapian::QueryParser; + notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); + notmuch->query_parser->set_database (*notmuch->xapian_db); + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { + prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i]; + notmuch->query_parser->add_boolean_prefix (prefix->name, + prefix->prefix); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); } - free (xapian_path); + DONE: + if (local_path) + free (local_path); + if (notmuch_path) + free (notmuch_path); + if (xapian_path) + free (xapian_path); return notmuch; } @@ -457,9 +508,9 @@ notmuch_database_open (const char *path) void notmuch_database_close (notmuch_database_t *notmuch) { + delete notmuch->query_parser; delete notmuch->xapian_db; - free (notmuch->path); - free (notmuch); + talloc_free (notmuch); } const char * @@ -468,41 +519,333 @@ notmuch_database_get_path (notmuch_database_t *notmuch) return notmuch->path; } +notmuch_private_status_t +find_timestamp_document (notmuch_database_t *notmuch, const char *db_key, + Xapian::Document *doc, unsigned int *doc_id) +{ + return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id); +} + +/* We allow the user to use arbitrarily long keys for timestamps, + * (they're for filesystem paths after all, which have no limit we + * know about). But we have a term-length limit. So if we exceed that, + * we'll use the SHA-1 of the user's key as the actual key for + * constructing a database term. + * + * Caution: This function returns a newly allocated string which the + * caller should free() when finished. + */ +static char * +timestamp_db_key (const char *key) +{ + int term_len = strlen (_find_prefix ("timestamp")) + strlen (key); + + if (term_len > NOTMUCH_TERM_MAX) + return notmuch_sha1_of_string (key); + else + return strdup (key); +} + notmuch_status_t -notmuch_database_add_message (notmuch_database_t *notmuch, - const char *filename) +notmuch_database_set_timestamp (notmuch_database_t *notmuch, + const char *key, time_t timestamp) { - Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::Document doc; - notmuch_message_t *message; + unsigned int doc_id; + notmuch_private_status_t status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + char *db_key = NULL; - GPtrArray *parents, *thread_ids; + db_key = timestamp_db_key (key); - const char *refs, *in_reply_to, *date, *header; - char *message_id; + try { + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); - time_t time_value; - unsigned int i; + doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (timestamp)); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + char *term = talloc_asprintf (NULL, "%s%s", + _find_prefix ("timestamp"), db_key); + doc.add_term (term); + talloc_free (term); + + notmuch->xapian_db->add_document (doc); + } else { + notmuch->xapian_db->replace_document (doc_id, doc); + } + + } catch (Xapian::Error &error) { + fprintf (stderr, "A Xapian exception occurred: %s.\n", + error.get_msg().c_str()); + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + if (db_key) + free (db_key); + + return ret; +} - message = notmuch_message_open (filename); +time_t +notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) +{ + Xapian::Document doc; + unsigned int doc_id; + notmuch_private_status_t status; + char *db_key = NULL; + time_t ret = 0; + + db_key = timestamp_db_key (key); try { - doc = Xapian::Document (); + status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + + if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) + goto DONE; + + ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); + } catch (Xapian::Error &error) { + goto DONE; + } + + DONE: + if (db_key) + free (db_key); + + return ret; +} + +/* Find the thread ID to which the message with 'message_id' belongs. + * + * Returns NULL if no message with message ID 'message_id' is in the + * database. + * + * Otherwise, returns a newly talloced string belonging to 'ctx'. + */ +const char * +_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + void *ctx, + const char *message_id) +{ + notmuch_message_t *message; + const char *ret = NULL; + + message = notmuch_database_find_message (notmuch, message_id); + if (message == NULL) + goto DONE; + + ret = talloc_steal (ctx, notmuch_message_get_thread_id (message)); + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static notmuch_status_t +_merge_threads (notmuch_database_t *notmuch, + const char *winner_thread_id, + const char *loser_thread_id) +{ + Xapian::PostingIterator loser, loser_end; + notmuch_message_t *message = NULL; + notmuch_private_status_t private_status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); + + for ( ; loser != loser_end; loser++) { + message = _notmuch_message_create (notmuch, notmuch, + *loser, &private_status); + if (message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + _notmuch_message_remove_term (message, "thread", loser_thread_id); + _notmuch_message_add_term (message, "thread", winner_thread_id); + _notmuch_message_sync (message); + + notmuch_message_destroy (message); + message = NULL; + } + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static notmuch_status_t +_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + const char **thread_id) +{ + GHashTable *parents = NULL; + const char *refs, *in_reply_to; + GList *l, *keys = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + parents = g_hash_table_new_full (g_str_hash, g_str_equal, + free, NULL); + + refs = notmuch_message_file_get_header (message_file, "references"); + parse_references (parents, refs); + + in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to"); + parse_references (parents, in_reply_to); + + keys = g_hash_table_get_keys (parents); + for (l = keys; l; l = l->next) { + char *parent_message_id; + const char *parent_thread_id; + + parent_message_id = (char *) l->data; + parent_thread_id = _resolve_message_id_to_thread_id (notmuch, + message, + parent_message_id); - doc.set_data (filename); + if (parent_thread_id == NULL) { + _notmuch_message_add_term (message, "ref", parent_message_id); + } else { + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, parent_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, parent_thread_id)) { + ret = _merge_threads (notmuch, *thread_id, parent_thread_id); + if (ret) + goto DONE; + } + } + } + + DONE: + if (keys) + g_list_free (keys); + if (parents) + g_hash_table_unref (parents); + + return ret; +} + +static notmuch_status_t +_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, + notmuch_message_t *message, + const char **thread_id) +{ + const char *message_id = notmuch_message_get_message_id (message); + Xapian::PostingIterator child, children_end; + notmuch_message_t *child_message = NULL; + const char *child_thread_id; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + + find_doc_ids (notmuch, "ref", message_id, &child, &children_end); + + for ( ; child != children_end; child++) { - parents = g_ptr_array_new (); + child_message = _notmuch_message_create (message, notmuch, + *child, &private_status); + if (child_message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + child_thread_id = notmuch_message_get_thread_id (child_message); + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, child_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, child_thread_id)) { + _notmuch_message_remove_term (child_message, "ref", + message_id); + _notmuch_message_sync (child_message); + ret = _merge_threads (notmuch, *thread_id, child_thread_id); + if (ret) + goto DONE; + } + + notmuch_message_destroy (child_message); + child_message = NULL; + } + + DONE: + if (child_message) + notmuch_message_destroy (child_message); + + return ret; +} + +/* Given a (mostly empty) 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * + * We first looke at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. We also look in the + * database for existing message that reference 'message'.p + * + * The end result is to call _notmuch_message_add_thread_id with one + * or more thread IDs to which this message belongs, (including + * generating a new thread ID if necessary if the message doesn't + * connect to any existing threads). + */ +static notmuch_status_t +_notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file) +{ + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + const char *thread_id = NULL; - refs = notmuch_message_get_header (message, "references"); - parse_references (parents, refs); + _notmuch_database_link_message_to_parents (notmuch, message, + message_file, + &thread_id); - in_reply_to = notmuch_message_get_header (message, "in-reply-to"); - parse_references (parents, in_reply_to); + ret = _notmuch_database_link_message_to_children (notmuch, message, + &thread_id); - for (i = 0; i < parents->len; i++) - add_term (doc, "ref", (char *) g_ptr_array_index (parents, i)); + if (thread_id == NULL) + _notmuch_message_ensure_thread_id (message); - header = notmuch_message_get_header (message, "message-id"); + return ret; +} + +notmuch_status_t +notmuch_database_add_message (notmuch_database_t *notmuch, + const char *filename) +{ + notmuch_message_file_t *message_file; + notmuch_message_t *message; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + const char *date, *header; + const char *from, *to, *subject, *old_filename; + char *message_id; + + message_file = notmuch_message_file_open (filename); + if (message_file == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + notmuch_message_file_restrict_headers (message_file, + "date", + "from", + "in-reply-to", + "message-id", + "references", + "subject", + "to", + (char *) NULL); + + try { + /* The first order of business is to find/create a message ID. */ + + header = notmuch_message_file_get_header (message_file, "message-id"); if (header) { message_id = parse_message_id (header, NULL); /* So the header value isn't RFC-compliant, but it's @@ -510,64 +853,76 @@ notmuch_database_add_message (notmuch_database_t *notmuch, if (message_id == NULL) message_id = xstrdup (header); } else { - /* XXX: Should generate a message_id here, (such as a SHA1 - * sum of the message itself) */ - message_id = NULL; - } - - thread_ids = find_thread_ids (db, parents, message_id); + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. */ + char *sha1 = notmuch_sha1_of_file (filename); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } - for (i = 0; i < parents->len; i++) - g_free (g_ptr_array_index (parents, i)); - g_ptr_array_free (parents, TRUE); - if (message_id) { - add_term (doc, "msgid", message_id); - doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + message_id = g_strdup_printf ("notmuch-sha1-%s", sha1); + free (sha1); } - if (thread_ids->len) { - unsigned int i; - GString *thread_id; - char *id; + /* Now that we have a message ID, we get a message object, + * (which may or may not reference an existing document in the + * database). */ + + /* Use NULL for owner since we want to free this locally. */ + message = _notmuch_message_create_for_message_id (NULL, + notmuch, + message_id, + &ret); + free (message_id); - for (i = 0; i < thread_ids->len; i++) { - id = (char *) thread_ids->pdata[i]; - add_term (doc, "thread", id); - if (i == 0) - thread_id = g_string_new (id); - else - g_string_append_printf (thread_id, ",%s", id); + if (message == NULL) + goto DONE; - free (id); - } - g_ptr_array_free (thread_ids, TRUE); - doc.add_value (NOTMUCH_VALUE_THREAD, thread_id->str); - g_string_free (thread_id, TRUE); - } else if (message_id) { - /* If not part of any existing thread, generate a new thread_id. */ - thread_id_t thread_id; - - thread_id_generate (&thread_id); - add_term (doc, "thread", thread_id.str); - doc.add_value (NOTMUCH_VALUE_THREAD, thread_id.str); + /* Has a message previously been added with the same ID? */ + old_filename = notmuch_message_get_filename (message); + if (old_filename && strlen (old_filename)) { + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + goto DONE; + } else { + _notmuch_message_set_filename (message, filename); + _notmuch_message_add_term (message, "type", "mail"); } - free (message_id); + ret = _notmuch_database_link_message (notmuch, message, message_file); + if (ret) + goto DONE; - date = notmuch_message_get_header (message, "date"); - time_value = notmuch_parse_date (date, NULL); + date = notmuch_message_file_get_header (message_file, "date"); + _notmuch_message_set_date (message, date); - doc.add_value (NOTMUCH_VALUE_DATE, - Xapian::sortable_serialise (time_value)); + from = notmuch_message_file_get_header (message_file, "from"); + subject = notmuch_message_file_get_header (message_file, "subject"); + to = notmuch_message_file_get_header (message_file, "to"); - db->add_document (doc); + if (from == NULL && + subject == NULL && + to == NULL) + { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } else { + _notmuch_message_sync (message); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); - return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + goto DONE; } - notmuch_message_close (message); + DONE: + if (message) + notmuch_message_destroy (message); + if (message_file) + notmuch_message_file_close (message_file); - return NOTMUCH_STATUS_SUCCESS; + return ret; }