From: David Bremner Date: Thu, 10 Aug 2017 11:08:49 +0000 (-0400) Subject: Merge tag 'debian/0.25-5' X-Git-Tag: 0.26_rc0~207 X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=commitdiff_plain;h=178d62cf9c9959fe603c9ffef9fa90f65b67dcd5;hp=68dc19659a5c6175dc45b11d3ef50d46bc942102 Merge tag 'debian/0.25-5' uploaded to unstable --- diff --git a/Makefile.local b/Makefile.local index 6bc78ef8..af12ca7f 100644 --- a/Makefile.local +++ b/Makefile.local @@ -225,6 +225,7 @@ notmuch_client_srcs = \ notmuch-dump.c \ notmuch-insert.c \ notmuch-new.c \ + notmuch-reindex.c \ notmuch-reply.c \ notmuch-restore.c \ notmuch-search.c \ diff --git a/doc/conf.py b/doc/conf.py index a3d82696..aa864b3c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -95,6 +95,10 @@ man_pages = [ u'incorporate new mail into the notmuch database', [notmuch_authors], 1), + ('man1/notmuch-reindex', 'notmuch-reindex', + u're-index matching messages', + [notmuch_authors], 1), + ('man1/notmuch-reply', 'notmuch-reply', u'constructs a reply template for a set of messages', [notmuch_authors], 1), diff --git a/doc/index.rst b/doc/index.rst index 344606d9..aa6c9f40 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -18,6 +18,7 @@ Contents: man5/notmuch-hooks man1/notmuch-insert man1/notmuch-new + man1/notmuch-reindex man1/notmuch-reply man1/notmuch-restore man1/notmuch-search diff --git a/doc/man1/notmuch-reindex.rst b/doc/man1/notmuch-reindex.rst new file mode 100644 index 00000000..e39cc4ee --- /dev/null +++ b/doc/man1/notmuch-reindex.rst @@ -0,0 +1,29 @@ +=============== +notmuch-reindex +=============== + +SYNOPSIS +======== + +**notmuch** **reindex** [*option* ...] <*search-term*> ... + +DESCRIPTION +=========== + +Re-index all messages matching the search terms. + +See **notmuch-search-terms(7)** for details of the supported syntax for +<*search-term*\ >. + +The **reindex** command searches for all messages matching the +supplied search terms, and re-creates the full-text index on these +messages using the supplied options. + +SEE ALSO +======== + +**notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**, +**notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**, +**notmuch-new(1)**, +**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**, +**notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)** diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst index 4e660a6f..fd6dcadd 100644 --- a/doc/man1/notmuch-search.rst +++ b/doc/man1/notmuch-search.rst @@ -42,7 +42,9 @@ Supported options for **search** include the search terms. The summary includes the thread ID, date, the number of messages in the thread (both the number matched and the total number), the authors of the thread and - the subject. + the subject. In the case where a thread contains multiple files for + some messages, the total number of files is printed in parentheses + (see below for an example). **threads** Output the thread IDs of all threads with any message @@ -135,6 +137,19 @@ Supported options for **search** include prefix. The prefix matches messages based on filenames. This option filters filenames of the matching messages. +EXAMPLE +======= + +The following shows an example of the summary output format, with one +message having multiple filenames. + +:: + + % notmuch search date:today.. and tag:bad-news + thread:0000000000063c10 Today [1/1] Some Persun; To the bone (inbox unread) + thread:0000000000063c25 Today [1/1(2)] Ann Other; Bears (inbox unread) + thread:0000000000063c00 Today [1/1] A Thurd; Bites, stings, sad feelings (inbox unread) + EXIT STATUS =========== diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst index cb350d1a..40fd335b 100644 --- a/doc/man1/notmuch.rst +++ b/doc/man1/notmuch.rst @@ -163,8 +163,8 @@ SEE ALSO **notmuch-address(1)**, **notmuch-compact(1)**, **notmuch-config(1)**, **notmuch-count(1)**, **notmuch-dump(1)**, **notmuch-hooks(5)**, -**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reply(1)**, -**notmuch-restore(1)**, **notmuch-search(1)**, +**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reindex(1)**, +**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)** The notmuch website: **https://notmuchmail.org** diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst index 47cab48d..dd76972e 100644 --- a/doc/man7/notmuch-search-terms.rst +++ b/doc/man7/notmuch-search-terms.rst @@ -9,6 +9,8 @@ SYNOPSIS **notmuch** **dump** [--format=(batch-tag|sup)] [--] [--output=<*file*>] [--] [<*search-term*> ...] +**notmuch** **reindex** [option ...] <*search-term*> ... + **notmuch** **search** [option ...] <*search-term*> ... **notmuch** **show** [option ...] <*search-term*> ... @@ -421,5 +423,6 @@ SEE ALSO **notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**, **notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**, -**notmuch-new(1)**, **notmuch-reply(1)**, **notmuch-restore(1)**, -**notmuch-search(1)**, **notmuch-show(1)**, **notmuch-tag(1)** +**notmuch-new(1)**, **notmuch-reindex(1)**, **notmuch-reply(1)**, +**notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-show(1)**, +**notmuch-tag(1)** diff --git a/emacs/notmuch.el b/emacs/notmuch.el index 0aeff560..44402f8a 100644 --- a/emacs/notmuch.el +++ b/emacs/notmuch.el @@ -49,7 +49,20 @@ ;; Have fun, and let us know if you have any comment, questions, or ;; kudos: Notmuch list (subscription is not ;; required, but is available from https://notmuchmail.org). - +;; +;; Note for MELPA users (and others tracking the development version +;; of notmuch-emacs): +;; +;; This emacs package needs a fairly closely matched version of the +;; notmuch program. If you use the MELPA version of notmuch.el (as +;; opposed to MELPA stable), you should be prepared to track the +;; master development branch (i.e. build from git) for the notmuch +;; program as well. Upgrading notmuch-emacs too far beyond the notmuch +;; program can CAUSE YOUR EMAIL TO STOP WORKING. +;; +;; TL;DR: notmuch-emacs from MELPA and notmuch from distro packages is +;; NOT SUPPORTED. +;; ;;; Code: (eval-when-compile (require 'cl)) diff --git a/lib/Makefile.local b/lib/Makefile.local index bf6e0649..0b5c4b08 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -38,6 +38,7 @@ libnotmuch_c_srcs = \ $(dir)/filenames.c \ $(dir)/string-list.c \ $(dir)/message-file.c \ + $(dir)/message-id.c \ $(dir)/messages.c \ $(dir)/sha1.c \ $(dir)/built-with.c \ @@ -50,6 +51,7 @@ libnotmuch_cxx_srcs = \ $(dir)/directory.cc \ $(dir)/index.cc \ $(dir)/message.cc \ + $(dir)/add-message.cc \ $(dir)/message-property.cc \ $(dir)/query.cc \ $(dir)/query-fp.cc \ diff --git a/lib/add-message.cc b/lib/add-message.cc new file mode 100644 index 00000000..711ed9fa --- /dev/null +++ b/lib/add-message.cc @@ -0,0 +1,577 @@ +#include "database-private.h" + +/* Parse a References header value, putting a (talloc'ed under 'ctx') + * copy of each referenced message-id into 'hash'. + * + * We explicitly avoid including any reference identical to + * 'message_id' in the result (to avoid mass confusion when a single + * message references itself cyclically---and yes, mail messages are + * not infrequent in the wild that do this---don't ask me why). + * + * Return the last reference parsed, if it is not equal to message_id. + */ +static char * +parse_references (void *ctx, + const char *message_id, + GHashTable *hash, + const char *refs) +{ + char *ref, *last_ref = NULL; + + if (refs == NULL || *refs == '\0') + return NULL; + + while (*refs) { + ref = _notmuch_message_id_parse (ctx, refs, &refs); + + if (ref && strcmp (ref, message_id)) { + g_hash_table_add (hash, ref); + last_ref = ref; + } + } + + /* The return value of this function is used to add a parent + * reference to the database. We should avoid making a message + * its own parent, thus the above check. + */ + return talloc_strdup(ctx, last_ref); +} + +static const char * +_notmuch_database_generate_thread_id (notmuch_database_t *notmuch) +{ + /* 16 bytes (+ terminator) for hexadecimal representation of + * a 64-bit integer. */ + static char thread_id[17]; + Xapian::WritableDatabase *db; + + db = static_cast (notmuch->xapian_db); + + notmuch->last_thread_id++; + + sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id); + + db->set_metadata ("last_thread_id", thread_id); + + return thread_id; +} + +static char * +_get_metadata_thread_id_key (void *ctx, const char *message_id) +{ + if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) + message_id = _notmuch_message_id_compressed (ctx, message_id); + + return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s", + message_id); +} + + +static notmuch_status_t +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret); + + +/* Find the thread ID to which the message with 'message_id' belongs. + * + * Note: 'thread_id_ret' must not be NULL! + * On success '*thread_id_ret' is set to a newly talloced string belonging to + * 'ctx'. + * + * Note: If there is no message in the database with the given + * 'message_id' then a new thread_id will be allocated for this + * message ID and stored in the database metadata so that the + * thread ID can be looked up if the message is added to the database + * later. + */ +static notmuch_status_t +_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret) +{ + notmuch_private_status_t status; + notmuch_message_t *message; + + if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) + return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, + thread_id_ret); + + /* Look for this message (regular or ghost) */ + message = _notmuch_message_create_for_message_id ( + notmuch, message_id, &status); + if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { + /* Message exists */ + *thread_id_ret = talloc_steal ( + ctx, notmuch_message_get_thread_id (message)); + } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + /* Message did not exist. Give it a fresh thread ID and + * populate this message as a ghost message. */ + *thread_id_ret = talloc_strdup ( + ctx, _notmuch_database_generate_thread_id (notmuch)); + if (! *thread_id_ret) { + status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; + } else { + status = _notmuch_message_initialize_ghost (message, *thread_id_ret); + if (status == 0) + /* Commit the new ghost message */ + _notmuch_message_sync (message); + } + } else { + /* Create failed. Fall through. */ + } + + notmuch_message_destroy (message); + + return COERCE_STATUS (status, "Error creating ghost message"); +} + +/* Pre-ghost messages _resolve_message_id_to_thread_id */ +static notmuch_status_t +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret) +{ + notmuch_status_t status; + notmuch_message_t *message; + std::string thread_id_string; + char *metadata_key; + Xapian::WritableDatabase *db; + + status = notmuch_database_find_message (notmuch, message_id, &message); + + if (status) + return status; + + if (message) { + *thread_id_ret = talloc_steal (ctx, + notmuch_message_get_thread_id (message)); + + notmuch_message_destroy (message); + + return NOTMUCH_STATUS_SUCCESS; + } + + /* Message has not been seen yet. + * + * We may have seen a reference to it already, in which case, we + * can return the thread ID stored in the metadata. Otherwise, we + * generate a new thread ID and store it there. + */ + db = static_cast (notmuch->xapian_db); + metadata_key = _get_metadata_thread_id_key (ctx, message_id); + thread_id_string = notmuch->xapian_db->get_metadata (metadata_key); + + if (thread_id_string.empty()) { + *thread_id_ret = talloc_strdup (ctx, + _notmuch_database_generate_thread_id (notmuch)); + db->set_metadata (metadata_key, *thread_id_ret); + } else { + *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str()); + } + + talloc_free (metadata_key); + + return NOTMUCH_STATUS_SUCCESS; +} + +static notmuch_status_t +_merge_threads (notmuch_database_t *notmuch, + const char *winner_thread_id, + const char *loser_thread_id) +{ + Xapian::PostingIterator loser, loser_end; + notmuch_message_t *message = NULL; + notmuch_private_status_t private_status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + _notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); + + for ( ; loser != loser_end; loser++) { + message = _notmuch_message_create (notmuch, notmuch, + *loser, &private_status); + if (message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + _notmuch_message_remove_term (message, "thread", loser_thread_id); + _notmuch_message_add_term (message, "thread", winner_thread_id); + _notmuch_message_sync (message); + + notmuch_message_destroy (message); + message = NULL; + } + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static void +_my_talloc_free_for_g_hash (void *ptr) +{ + talloc_free (ptr); +} + +notmuch_status_t +_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + const char **thread_id) +{ + GHashTable *parents = NULL; + const char *refs, *in_reply_to, *in_reply_to_message_id; + const char *last_ref_message_id, *this_message_id; + GList *l, *keys = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + parents = g_hash_table_new_full (g_str_hash, g_str_equal, + _my_talloc_free_for_g_hash, NULL); + this_message_id = notmuch_message_get_message_id (message); + + refs = _notmuch_message_file_get_header (message_file, "references"); + last_ref_message_id = parse_references (message, + this_message_id, + parents, refs); + + in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to"); + in_reply_to_message_id = parse_references (message, + this_message_id, + parents, in_reply_to); + + /* For the parent of this message, use the last message ID of the + * References header, if available. If not, fall back to the + * first message ID in the In-Reply-To header. */ + if (last_ref_message_id) { + _notmuch_message_add_term (message, "replyto", + last_ref_message_id); + } else if (in_reply_to_message_id) { + _notmuch_message_add_term (message, "replyto", + in_reply_to_message_id); + } + + keys = g_hash_table_get_keys (parents); + for (l = keys; l; l = l->next) { + char *parent_message_id; + const char *parent_thread_id = NULL; + + parent_message_id = (char *) l->data; + + _notmuch_message_add_term (message, "reference", + parent_message_id); + + ret = _resolve_message_id_to_thread_id (notmuch, + message, + parent_message_id, + &parent_thread_id); + if (ret) + goto DONE; + + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, parent_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, parent_thread_id)) { + ret = _merge_threads (notmuch, *thread_id, parent_thread_id); + if (ret) + goto DONE; + } + } + + DONE: + if (keys) + g_list_free (keys); + if (parents) + g_hash_table_unref (parents); + + return ret; +} + +static notmuch_status_t +_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, + notmuch_message_t *message, + const char **thread_id) +{ + const char *message_id = notmuch_message_get_message_id (message); + Xapian::PostingIterator child, children_end; + notmuch_message_t *child_message = NULL; + const char *child_thread_id; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + + _notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end); + + for ( ; child != children_end; child++) { + + child_message = _notmuch_message_create (message, notmuch, + *child, &private_status); + if (child_message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + child_thread_id = notmuch_message_get_thread_id (child_message); + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, child_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, child_thread_id)) { + _notmuch_message_remove_term (child_message, "reference", + message_id); + _notmuch_message_sync (child_message); + ret = _merge_threads (notmuch, *thread_id, child_thread_id); + if (ret) + goto DONE; + } + + notmuch_message_destroy (child_message); + child_message = NULL; + } + + DONE: + if (child_message) + notmuch_message_destroy (child_message); + + return ret; +} + +/* Fetch and clear the stored thread_id for message, or NULL if none. */ +static char * +_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + notmuch_message_t *message) +{ + const char *message_id; + std::string stored_id; + char *metadata_key; + + message_id = notmuch_message_get_message_id (message); + metadata_key = _get_metadata_thread_id_key (ctx, message_id); + + /* Check if we have already seen related messages to this one. + * If we have then use the thread_id that we stored at that time. + */ + stored_id = notmuch->xapian_db->get_metadata (metadata_key); + if (stored_id.empty ()) { + return NULL; + } else { + Xapian::WritableDatabase *db; + + db = static_cast (notmuch->xapian_db); + + /* Clear the metadata for this message ID. We don't need it + * anymore. */ + db->set_metadata (metadata_key, ""); + + return talloc_strdup (ctx, stored_id.c_str ()); + } +} + +/* Given a blank or ghost 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * + * First, if is_ghost, this retrieves the thread ID already stored in + * the message (which will be the case if a message was previously + * added that referenced this one). If the message is blank + * (!is_ghost), it doesn't have a thread ID yet (we'll generate one + * later in this function). If the database does not support ghost + * messages, this checks for a thread ID stored in database metadata + * for this message ID. + * + * Second, we look at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. + * + * Finally, we look in the database for existing message that + * reference 'message'. + * + * In all cases, we assign to the current message the first thread ID + * found. We will also merge any existing, distinct threads where this + * message belongs to both, (which is not uncommon when messages are + * processed out of order). + * + * Finally, if no thread ID has been found through referenced messages, we + * call _notmuch_message_generate_thread_id to generate a new thread + * ID. This should only happen for new, top-level messages, (no + * References or In-Reply-To header in this message, and no previously + * added message refers to this message). + */ +static notmuch_status_t +_notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + notmuch_bool_t is_ghost) +{ + void *local = talloc_new (NULL); + notmuch_status_t status; + const char *thread_id = NULL; + + /* Check if the message already had a thread ID */ + if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { + if (is_ghost) + thread_id = notmuch_message_get_thread_id (message); + } else { + thread_id = _consume_metadata_thread_id (local, notmuch, message); + if (thread_id) + _notmuch_message_add_term (message, "thread", thread_id); + } + + status = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, + &thread_id); + if (status) + goto DONE; + + if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) { + /* In general, it shouldn't be necessary to link children, + * since the earlier indexing of those children will have + * stored a thread ID for the missing parent. However, prior + * to ghost messages, these stored thread IDs were NOT + * rewritten during thread merging (and there was no + * performant way to do so), so if indexed children were + * pulled into a different thread ID by a merge, it was + * necessary to pull them *back* into the stored thread ID of + * the parent. With ghost messages, we just rewrite the + * stored thread IDs during merging, so this workaround isn't + * necessary. */ + status = _notmuch_database_link_message_to_children (notmuch, message, + &thread_id); + if (status) + goto DONE; + } + + /* If not part of any existing thread, generate a new thread ID. */ + if (thread_id == NULL) { + thread_id = _notmuch_database_generate_thread_id (notmuch); + + _notmuch_message_add_term (message, "thread", thread_id); + } + + DONE: + talloc_free (local); + + return status; +} + +notmuch_status_t +notmuch_database_add_message (notmuch_database_t *notmuch, + const char *filename, + notmuch_message_t **message_ret) +{ + notmuch_message_file_t *message_file; + notmuch_message_t *message = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; + notmuch_private_status_t private_status; + notmuch_bool_t is_ghost = FALSE, is_new = FALSE; + + const char *date; + const char *from, *to, *subject; + char *message_id = NULL; + + if (message_ret) + *message_ret = NULL; + + ret = _notmuch_database_ensure_writable (notmuch); + if (ret) + return ret; + + message_file = _notmuch_message_file_open (notmuch, filename); + if (message_file == NULL) + return NOTMUCH_STATUS_FILE_ERROR; + + /* Adding a message may change many documents. Do this all + * atomically. */ + ret = notmuch_database_begin_atomic (notmuch); + if (ret) + goto DONE; + + ret = _notmuch_message_file_get_headers (message_file, + &from, &subject, &to, &date, + &message_id); + if (ret) + goto DONE; + + try { + /* Now that we have a message ID, we get a message object, + * (which may or may not reference an existing document in the + * database). */ + + message = _notmuch_message_create_for_message_id (notmuch, + message_id, + &private_status); + + talloc_free (message_id); + + /* We cannot call notmuch_message_get_flag for a new message */ + switch (private_status) { + case NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND: + is_ghost = FALSE; + is_new = TRUE; + break; + case NOTMUCH_PRIVATE_STATUS_SUCCESS: + is_ghost = notmuch_message_get_flag (message, NOTMUCH_MESSAGE_FLAG_GHOST); + is_new = FALSE; + break; + default: + ret = COERCE_STATUS (private_status, + "Unexpected status value from _notmuch_message_create_for_message_id"); + goto DONE; + } + + _notmuch_message_add_filename (message, filename); + + if (is_new || is_ghost) { + _notmuch_message_add_term (message, "type", "mail"); + if (is_ghost) + /* Convert ghost message to a regular message */ + _notmuch_message_remove_term (message, "type", "ghost"); + } + + ret = _notmuch_database_link_message (notmuch, message, + message_file, is_ghost); + if (ret) + goto DONE; + + if (is_new || is_ghost) + _notmuch_message_set_header_values (message, date, from, subject); + + ret = _notmuch_message_index_file (message, message_file); + if (ret) + goto DONE; + + if (! is_new && !is_ghost) + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + + _notmuch_message_sync (message); + } catch (const Xapian::Error &error) { + _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + goto DONE; + } + + DONE: + if (message) { + if ((ret == NOTMUCH_STATUS_SUCCESS || + ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret) + *message_ret = message; + else + notmuch_message_destroy (message); + } + + if (message_file) + _notmuch_message_file_close (message_file); + + ret2 = notmuch_database_end_atomic (notmuch); + if ((ret == NOTMUCH_STATUS_SUCCESS || + ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && + ret2 != NOTMUCH_STATUS_SUCCESS) + ret = ret2; + + return ret; +} diff --git a/lib/database-private.h b/lib/database-private.h index 727b1d61..5555554b 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -246,4 +246,10 @@ _notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i, Xapian::TermIterator &end, const char *prefix); +void +_notmuch_database_find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end); #endif diff --git a/lib/database.cc b/lib/database.cc index 5b13f541..8f0e22a8 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -463,12 +463,12 @@ find_doc_ids_for_term (notmuch_database_t *notmuch, *end = notmuch->xapian_db->postlist_end (term); } -static void -find_doc_ids (notmuch_database_t *notmuch, - const char *prefix_name, - const char *value, - Xapian::PostingIterator *begin, - Xapian::PostingIterator *end) +void +_notmuch_database_find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end) { char *term; @@ -488,7 +488,7 @@ _notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch, { Xapian::PostingIterator i, end; - find_doc_ids (notmuch, prefix_name, value, &i, &end); + _notmuch_database_find_doc_ids (notmuch, prefix_name, value, &i, &end); if (i == end) { *doc_id = 0; @@ -568,147 +568,6 @@ notmuch_database_find_message (notmuch_database_t *notmuch, } } -/* Advance 'str' past any whitespace or RFC 822 comments. A comment is - * a (potentially nested) parenthesized sequence with '\' used to - * escape any character (including parentheses). - * - * If the sequence to be skipped continues to the end of the string, - * then 'str' will be left pointing at the final terminating '\0' - * character. - */ -static void -skip_space_and_comments (const char **str) -{ - const char *s; - - s = *str; - while (*s && (isspace (*s) || *s == '(')) { - while (*s && isspace (*s)) - s++; - if (*s == '(') { - int nesting = 1; - s++; - while (*s && nesting) { - if (*s == '(') { - nesting++; - } else if (*s == ')') { - nesting--; - } else if (*s == '\\') { - if (*(s+1)) - s++; - } - s++; - } - } - } - - *str = s; -} - -/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 - * comments, and the '<' and '>' delimiters. - * - * If not NULL, then *next will be made to point to the first character - * not parsed, (possibly pointing to the final '\0' terminator. - * - * Returns a newly talloc'ed string belonging to 'ctx'. - * - * Returns NULL if there is any error parsing the message-id. */ -static char * -_parse_message_id (void *ctx, const char *message_id, const char **next) -{ - const char *s, *end; - char *result; - - if (message_id == NULL || *message_id == '\0') - return NULL; - - s = message_id; - - skip_space_and_comments (&s); - - /* Skip any unstructured text as well. */ - while (*s && *s != '<') - s++; - - if (*s == '<') { - s++; - } else { - if (next) - *next = s; - return NULL; - } - - skip_space_and_comments (&s); - - end = s; - while (*end && *end != '>') - end++; - if (next) { - if (*end) - *next = end + 1; - else - *next = end; - } - - if (end > s && *end == '>') - end--; - if (end <= s) - return NULL; - - result = talloc_strndup (ctx, s, end - s + 1); - - /* Finally, collapse any whitespace that is within the message-id - * itself. */ - { - char *r; - int len; - - for (r = result, len = strlen (r); *r; r++, len--) - if (*r == ' ' || *r == '\t') - memmove (r, r+1, len); - } - - return result; -} - -/* Parse a References header value, putting a (talloc'ed under 'ctx') - * copy of each referenced message-id into 'hash'. - * - * We explicitly avoid including any reference identical to - * 'message_id' in the result (to avoid mass confusion when a single - * message references itself cyclically---and yes, mail messages are - * not infrequent in the wild that do this---don't ask me why). - * - * Return the last reference parsed, if it is not equal to message_id. - */ -static char * -parse_references (void *ctx, - const char *message_id, - GHashTable *hash, - const char *refs) -{ - char *ref, *last_ref = NULL; - - if (refs == NULL || *refs == '\0') - return NULL; - - while (*refs) { - ref = _parse_message_id (ctx, refs, &refs); - - if (ref && strcmp (ref, message_id)) { - g_hash_table_add (hash, ref); - last_ref = ref; - } - } - - /* The return value of this function is used to add a parent - * reference to the database. We should avoid making a message - * its own parent, thus the above check. - */ - return talloc_strdup(ctx, last_ref); -} - notmuch_status_t notmuch_database_create (const char *path, notmuch_database_t **database) { @@ -2044,583 +1903,6 @@ _notmuch_database_generate_doc_id (notmuch_database_t *notmuch) return notmuch->last_doc_id; } -static const char * -_notmuch_database_generate_thread_id (notmuch_database_t *notmuch) -{ - /* 16 bytes (+ terminator) for hexadecimal representation of - * a 64-bit integer. */ - static char thread_id[17]; - Xapian::WritableDatabase *db; - - db = static_cast (notmuch->xapian_db); - - notmuch->last_thread_id++; - - sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id); - - db->set_metadata ("last_thread_id", thread_id); - - return thread_id; -} - -static char * -_get_metadata_thread_id_key (void *ctx, const char *message_id) -{ - if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) - message_id = _notmuch_message_id_compressed (ctx, message_id); - - return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s", - message_id); -} - -static notmuch_status_t -_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret); - -/* Find the thread ID to which the message with 'message_id' belongs. - * - * Note: 'thread_id_ret' must not be NULL! - * On success '*thread_id_ret' is set to a newly talloced string belonging to - * 'ctx'. - * - * Note: If there is no message in the database with the given - * 'message_id' then a new thread_id will be allocated for this - * message ID and stored in the database metadata so that the - * thread ID can be looked up if the message is added to the database - * later. - */ -static notmuch_status_t -_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret) -{ - notmuch_private_status_t status; - notmuch_message_t *message; - - if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) - return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, - thread_id_ret); - - /* Look for this message (regular or ghost) */ - message = _notmuch_message_create_for_message_id ( - notmuch, message_id, &status); - if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { - /* Message exists */ - *thread_id_ret = talloc_steal ( - ctx, notmuch_message_get_thread_id (message)); - } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - /* Message did not exist. Give it a fresh thread ID and - * populate this message as a ghost message. */ - *thread_id_ret = talloc_strdup ( - ctx, _notmuch_database_generate_thread_id (notmuch)); - if (! *thread_id_ret) { - status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; - } else { - status = _notmuch_message_initialize_ghost (message, *thread_id_ret); - if (status == 0) - /* Commit the new ghost message */ - _notmuch_message_sync (message); - } - } else { - /* Create failed. Fall through. */ - } - - notmuch_message_destroy (message); - - return COERCE_STATUS (status, "Error creating ghost message"); -} - -/* Pre-ghost messages _resolve_message_id_to_thread_id */ -static notmuch_status_t -_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret) -{ - notmuch_status_t status; - notmuch_message_t *message; - string thread_id_string; - char *metadata_key; - Xapian::WritableDatabase *db; - - status = notmuch_database_find_message (notmuch, message_id, &message); - - if (status) - return status; - - if (message) { - *thread_id_ret = talloc_steal (ctx, - notmuch_message_get_thread_id (message)); - - notmuch_message_destroy (message); - - return NOTMUCH_STATUS_SUCCESS; - } - - /* Message has not been seen yet. - * - * We may have seen a reference to it already, in which case, we - * can return the thread ID stored in the metadata. Otherwise, we - * generate a new thread ID and store it there. - */ - db = static_cast (notmuch->xapian_db); - metadata_key = _get_metadata_thread_id_key (ctx, message_id); - thread_id_string = notmuch->xapian_db->get_metadata (metadata_key); - - if (thread_id_string.empty()) { - *thread_id_ret = talloc_strdup (ctx, - _notmuch_database_generate_thread_id (notmuch)); - db->set_metadata (metadata_key, *thread_id_ret); - } else { - *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str()); - } - - talloc_free (metadata_key); - - return NOTMUCH_STATUS_SUCCESS; -} - -static notmuch_status_t -_merge_threads (notmuch_database_t *notmuch, - const char *winner_thread_id, - const char *loser_thread_id) -{ - Xapian::PostingIterator loser, loser_end; - notmuch_message_t *message = NULL; - notmuch_private_status_t private_status; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - - find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); - - for ( ; loser != loser_end; loser++) { - message = _notmuch_message_create (notmuch, notmuch, - *loser, &private_status); - if (message == NULL) { - ret = COERCE_STATUS (private_status, - "Cannot find document for doc_id from query"); - goto DONE; - } - - _notmuch_message_remove_term (message, "thread", loser_thread_id); - _notmuch_message_add_term (message, "thread", winner_thread_id); - _notmuch_message_sync (message); - - notmuch_message_destroy (message); - message = NULL; - } - - DONE: - if (message) - notmuch_message_destroy (message); - - return ret; -} - -static void -_my_talloc_free_for_g_hash (void *ptr) -{ - talloc_free (ptr); -} - -static notmuch_status_t -_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, - notmuch_message_t *message, - notmuch_message_file_t *message_file, - const char **thread_id) -{ - GHashTable *parents = NULL; - const char *refs, *in_reply_to, *in_reply_to_message_id; - const char *last_ref_message_id, *this_message_id; - GList *l, *keys = NULL; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - - parents = g_hash_table_new_full (g_str_hash, g_str_equal, - _my_talloc_free_for_g_hash, NULL); - this_message_id = notmuch_message_get_message_id (message); - - refs = _notmuch_message_file_get_header (message_file, "references"); - last_ref_message_id = parse_references (message, - this_message_id, - parents, refs); - - in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to"); - in_reply_to_message_id = parse_references (message, - this_message_id, - parents, in_reply_to); - - /* For the parent of this message, use the last message ID of the - * References header, if available. If not, fall back to the - * first message ID in the In-Reply-To header. */ - if (last_ref_message_id) { - _notmuch_message_add_term (message, "replyto", - last_ref_message_id); - } else if (in_reply_to_message_id) { - _notmuch_message_add_term (message, "replyto", - in_reply_to_message_id); - } - - keys = g_hash_table_get_keys (parents); - for (l = keys; l; l = l->next) { - char *parent_message_id; - const char *parent_thread_id = NULL; - - parent_message_id = (char *) l->data; - - _notmuch_message_add_term (message, "reference", - parent_message_id); - - ret = _resolve_message_id_to_thread_id (notmuch, - message, - parent_message_id, - &parent_thread_id); - if (ret) - goto DONE; - - if (*thread_id == NULL) { - *thread_id = talloc_strdup (message, parent_thread_id); - _notmuch_message_add_term (message, "thread", *thread_id); - } else if (strcmp (*thread_id, parent_thread_id)) { - ret = _merge_threads (notmuch, *thread_id, parent_thread_id); - if (ret) - goto DONE; - } - } - - DONE: - if (keys) - g_list_free (keys); - if (parents) - g_hash_table_unref (parents); - - return ret; -} - -static notmuch_status_t -_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, - notmuch_message_t *message, - const char **thread_id) -{ - const char *message_id = notmuch_message_get_message_id (message); - Xapian::PostingIterator child, children_end; - notmuch_message_t *child_message = NULL; - const char *child_thread_id; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - notmuch_private_status_t private_status; - - find_doc_ids (notmuch, "reference", message_id, &child, &children_end); - - for ( ; child != children_end; child++) { - - child_message = _notmuch_message_create (message, notmuch, - *child, &private_status); - if (child_message == NULL) { - ret = COERCE_STATUS (private_status, - "Cannot find document for doc_id from query"); - goto DONE; - } - - child_thread_id = notmuch_message_get_thread_id (child_message); - if (*thread_id == NULL) { - *thread_id = talloc_strdup (message, child_thread_id); - _notmuch_message_add_term (message, "thread", *thread_id); - } else if (strcmp (*thread_id, child_thread_id)) { - _notmuch_message_remove_term (child_message, "reference", - message_id); - _notmuch_message_sync (child_message); - ret = _merge_threads (notmuch, *thread_id, child_thread_id); - if (ret) - goto DONE; - } - - notmuch_message_destroy (child_message); - child_message = NULL; - } - - DONE: - if (child_message) - notmuch_message_destroy (child_message); - - return ret; -} - -/* Fetch and clear the stored thread_id for message, or NULL if none. */ -static char * -_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, - notmuch_message_t *message) -{ - const char *message_id; - string stored_id; - char *metadata_key; - - message_id = notmuch_message_get_message_id (message); - metadata_key = _get_metadata_thread_id_key (ctx, message_id); - - /* Check if we have already seen related messages to this one. - * If we have then use the thread_id that we stored at that time. - */ - stored_id = notmuch->xapian_db->get_metadata (metadata_key); - if (stored_id.empty ()) { - return NULL; - } else { - Xapian::WritableDatabase *db; - - db = static_cast (notmuch->xapian_db); - - /* Clear the metadata for this message ID. We don't need it - * anymore. */ - db->set_metadata (metadata_key, ""); - - return talloc_strdup (ctx, stored_id.c_str ()); - } -} - -/* Given a blank or ghost 'message' and its corresponding - * 'message_file' link it to existing threads in the database. - * - * First, if is_ghost, this retrieves the thread ID already stored in - * the message (which will be the case if a message was previously - * added that referenced this one). If the message is blank - * (!is_ghost), it doesn't have a thread ID yet (we'll generate one - * later in this function). If the database does not support ghost - * messages, this checks for a thread ID stored in database metadata - * for this message ID. - * - * Second, we look at 'message_file' and its link-relevant headers - * (References and In-Reply-To) for message IDs. - * - * Finally, we look in the database for existing message that - * reference 'message'. - * - * In all cases, we assign to the current message the first thread ID - * found. We will also merge any existing, distinct threads where this - * message belongs to both, (which is not uncommon when messages are - * processed out of order). - * - * Finally, if no thread ID has been found through referenced messages, we - * call _notmuch_message_generate_thread_id to generate a new thread - * ID. This should only happen for new, top-level messages, (no - * References or In-Reply-To header in this message, and no previously - * added message refers to this message). - */ -static notmuch_status_t -_notmuch_database_link_message (notmuch_database_t *notmuch, - notmuch_message_t *message, - notmuch_message_file_t *message_file, - notmuch_bool_t is_ghost) -{ - void *local = talloc_new (NULL); - notmuch_status_t status; - const char *thread_id = NULL; - - /* Check if the message already had a thread ID */ - if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { - if (is_ghost) - thread_id = notmuch_message_get_thread_id (message); - } else { - thread_id = _consume_metadata_thread_id (local, notmuch, message); - if (thread_id) - _notmuch_message_add_term (message, "thread", thread_id); - } - - status = _notmuch_database_link_message_to_parents (notmuch, message, - message_file, - &thread_id); - if (status) - goto DONE; - - if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) { - /* In general, it shouldn't be necessary to link children, - * since the earlier indexing of those children will have - * stored a thread ID for the missing parent. However, prior - * to ghost messages, these stored thread IDs were NOT - * rewritten during thread merging (and there was no - * performant way to do so), so if indexed children were - * pulled into a different thread ID by a merge, it was - * necessary to pull them *back* into the stored thread ID of - * the parent. With ghost messages, we just rewrite the - * stored thread IDs during merging, so this workaround isn't - * necessary. */ - status = _notmuch_database_link_message_to_children (notmuch, message, - &thread_id); - if (status) - goto DONE; - } - - /* If not part of any existing thread, generate a new thread ID. */ - if (thread_id == NULL) { - thread_id = _notmuch_database_generate_thread_id (notmuch); - - _notmuch_message_add_term (message, "thread", thread_id); - } - - DONE: - talloc_free (local); - - return status; -} - -notmuch_status_t -notmuch_database_add_message (notmuch_database_t *notmuch, - const char *filename, - notmuch_message_t **message_ret) -{ - notmuch_message_file_t *message_file; - notmuch_message_t *message = NULL; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; - notmuch_private_status_t private_status; - notmuch_bool_t is_ghost = false; - - const char *date, *header; - const char *from, *to, *subject; - char *message_id = NULL; - - if (message_ret) - *message_ret = NULL; - - ret = _notmuch_database_ensure_writable (notmuch); - if (ret) - return ret; - - message_file = _notmuch_message_file_open (notmuch, filename); - if (message_file == NULL) - return NOTMUCH_STATUS_FILE_ERROR; - - /* Adding a message may change many documents. Do this all - * atomically. */ - ret = notmuch_database_begin_atomic (notmuch); - if (ret) - goto DONE; - - /* Parse message up front to get better error status. */ - ret = _notmuch_message_file_parse (message_file); - if (ret) - goto DONE; - - /* Before we do any real work, (especially before doing a - * potential SHA-1 computation on the entire file's contents), - * let's make sure that what we're looking at looks like an - * actual email message. - */ - from = _notmuch_message_file_get_header (message_file, "from"); - subject = _notmuch_message_file_get_header (message_file, "subject"); - to = _notmuch_message_file_get_header (message_file, "to"); - - if ((from == NULL || *from == '\0') && - (subject == NULL || *subject == '\0') && - (to == NULL || *to == '\0')) { - ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; - goto DONE; - } - - /* Now that we're sure it's mail, the first order of business - * is to find a message ID (or else create one ourselves). - */ - header = _notmuch_message_file_get_header (message_file, "message-id"); - if (header && *header != '\0') { - message_id = _parse_message_id (message_file, header, NULL); - - /* So the header value isn't RFC-compliant, but it's - * better than no message-id at all. - */ - if (message_id == NULL) - message_id = talloc_strdup (message_file, header); - } - - if (message_id == NULL ) { - /* No message-id at all, let's generate one by taking a - * hash over the file's contents. - */ - char *sha1 = _notmuch_sha1_of_file (filename); - - /* If that failed too, something is really wrong. Give up. */ - if (sha1 == NULL) { - ret = NOTMUCH_STATUS_FILE_ERROR; - goto DONE; - } - - message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1); - free (sha1); - } - - try { - /* Now that we have a message ID, we get a message object, - * (which may or may not reference an existing document in the - * database). */ - - message = _notmuch_message_create_for_message_id (notmuch, - message_id, - &private_status); - - talloc_free (message_id); - - if (message == NULL) { - ret = COERCE_STATUS (private_status, - "Unexpected status value from _notmuch_message_create_for_message_id"); - goto DONE; - } - - _notmuch_message_add_filename (message, filename); - - /* Is this a newly created message object or a ghost - * message? We have to be slightly careful: if this is a - * blank message, it's not safe to call - * notmuch_message_get_flag yet. */ - if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || - (is_ghost = notmuch_message_get_flag ( - message, NOTMUCH_MESSAGE_FLAG_GHOST))) { - _notmuch_message_add_term (message, "type", "mail"); - if (is_ghost) - /* Convert ghost message to a regular message */ - _notmuch_message_remove_term (message, "type", "ghost"); - - ret = _notmuch_database_link_message (notmuch, message, - message_file, is_ghost); - if (ret) - goto DONE; - - date = _notmuch_message_file_get_header (message_file, "date"); - _notmuch_message_set_header_values (message, date, from, subject); - - ret = _notmuch_message_index_file (message, message_file); - if (ret) - goto DONE; - } else { - ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; - } - - _notmuch_message_sync (message); - } catch (const Xapian::Error &error) { - _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n", - error.get_msg().c_str()); - notmuch->exception_reported = TRUE; - ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; - goto DONE; - } - - DONE: - if (message) { - if ((ret == NOTMUCH_STATUS_SUCCESS || - ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret) - *message_ret = message; - else - notmuch_message_destroy (message); - } - - if (message_file) - _notmuch_message_file_close (message_file); - - ret2 = notmuch_database_end_atomic (notmuch); - if ((ret == NOTMUCH_STATUS_SUCCESS || - ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && - ret2 != NOTMUCH_STATUS_SUCCESS) - ret = ret2; - - return ret; -} - notmuch_status_t notmuch_database_remove_message (notmuch_database_t *notmuch, const char *filename) diff --git a/lib/message-file.c b/lib/message-file.c index d7acf0d5..b90df305 100644 --- a/lib/message-file.c +++ b/lib/message-file.c @@ -92,6 +92,12 @@ _notmuch_message_file_open (notmuch_database_t *notmuch, return _notmuch_message_file_open_ctx (notmuch, NULL, filename); } +const char * +_notmuch_message_file_get_filename (notmuch_message_file_t *message_file) +{ + return message_file->filename; +} + void _notmuch_message_file_close (notmuch_message_file_t *message) { @@ -345,3 +351,83 @@ _notmuch_message_file_get_header (notmuch_message_file_t *message, return decoded; } + +notmuch_status_t +_notmuch_message_file_get_headers (notmuch_message_file_t *message_file, + const char **from_out, + const char **subject_out, + const char **to_out, + const char **date_out, + char **message_id_out) +{ + notmuch_status_t ret; + const char *header; + const char *from, *to, *subject, *date; + char *message_id = NULL; + + /* Parse message up front to get better error status. */ + ret = _notmuch_message_file_parse (message_file); + if (ret) + goto DONE; + + /* Before we do any real work, (especially before doing a + * potential SHA-1 computation on the entire file's contents), + * let's make sure that what we're looking at looks like an + * actual email message. + */ + from = _notmuch_message_file_get_header (message_file, "from"); + subject = _notmuch_message_file_get_header (message_file, "subject"); + to = _notmuch_message_file_get_header (message_file, "to"); + date = _notmuch_message_file_get_header (message_file, "date"); + + if ((from == NULL || *from == '\0') && + (subject == NULL || *subject == '\0') && + (to == NULL || *to == '\0')) { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } + + /* Now that we're sure it's mail, the first order of business + * is to find a message ID (or else create one ourselves). + */ + header = _notmuch_message_file_get_header (message_file, "message-id"); + if (header && *header != '\0') { + message_id = _notmuch_message_id_parse (message_file, header, NULL); + + /* So the header value isn't RFC-compliant, but it's + * better than no message-id at all. + */ + if (message_id == NULL) + message_id = talloc_strdup (message_file, header); + } + + if (message_id == NULL ) { + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. + */ + char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file)); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1); + free (sha1); + } + DONE: + if (ret == NOTMUCH_STATUS_SUCCESS) { + if (from_out) + *from_out = from; + if (subject_out) + *subject_out = subject; + if (to_out) + *to_out = to; + if (date_out) + *date_out = date; + if (message_id_out) + *message_id_out = message_id; + } + return ret; +} diff --git a/lib/message-id.c b/lib/message-id.c new file mode 100644 index 00000000..d7541d50 --- /dev/null +++ b/lib/message-id.c @@ -0,0 +1,96 @@ +#include "notmuch-private.h" + +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') { + nesting++; + } else if (*s == ')') { + nesting--; + } else if (*s == '\\') { + if (*(s+1)) + s++; + } + s++; + } + } + } + + *str = s; +} + +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL || *message_id == '\0') + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = talloc_strndup (ctx, s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} diff --git a/lib/message.cc b/lib/message.cc index f78e5a9d..539d3320 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -579,7 +579,9 @@ void _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) { Xapian::TermIterator i; - size_t prefix_len = strlen (prefix); + size_t prefix_len = 0; + + prefix_len = strlen (prefix); while (1) { i = message->doc.termlist_begin (); @@ -599,6 +601,59 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) } } + +/* Remove all terms generated by indexing, i.e. not tags or + * properties, along with any automatic tags*/ +notmuch_private_status_t +_notmuch_message_remove_indexed_terms (notmuch_message_t *message) +{ + Xapian::TermIterator i; + + const std::string + id_prefix = _find_prefix ("id"), + property_prefix = _find_prefix ("property"), + tag_prefix = _find_prefix ("tag"), + type_prefix = _find_prefix ("type"); + + for (i = message->doc.termlist_begin (); + i != message->doc.termlist_end (); i++) { + + const std::string term = *i; + + if (term.compare (0, type_prefix.size (), type_prefix) == 0) + continue; + + if (term.compare (0, id_prefix.size (), id_prefix) == 0) + continue; + + if (term.compare (0, property_prefix.size (), property_prefix) == 0) + continue; + + if (term.compare (0, tag_prefix.size (), tag_prefix) == 0 && + term.compare (1, strlen("encrypted"), "encrypted") != 0 && + term.compare (1, strlen("signed"), "signed") != 0 && + term.compare (1, strlen("attachment"), "attachment") != 0) + continue; + + try { + message->doc.remove_term ((*i)); + message->modified = TRUE; + } catch (const Xapian::InvalidArgumentError) { + /* Ignore failure to remove non-existent term. */ + } catch (const Xapian::Error &error) { + notmuch_database_t *notmuch = message->notmuch; + + if (!notmuch->exception_reported) { + _notmuch_database_log(_notmuch_message_database (message), "A Xapian exception occurred creating message: %s\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + } + return NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION; + } + } + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} + /* Return true if p points at "new" or "cur". */ static bool is_maildir (const char *p) { @@ -646,6 +701,7 @@ _notmuch_message_add_folder_terms (notmuch_message_t *message, talloc_free (folder); + message->modified = TRUE; return NOTMUCH_STATUS_SUCCESS; } @@ -946,6 +1002,14 @@ notmuch_message_get_filenames (notmuch_message_t *message) return _notmuch_filenames_create (message, message->filename_list); } +int +notmuch_message_count_files (notmuch_message_t *message) +{ + _notmuch_message_ensure_filename_list (message); + + return _notmuch_string_list_length (message->filename_list); +} + notmuch_bool_t notmuch_message_get_flag (notmuch_message_t *message, notmuch_message_flag_t flag) @@ -1872,3 +1936,107 @@ _notmuch_message_frozen (notmuch_message_t *message) { return message->frozen; } + +notmuch_status_t +notmuch_message_reindex (notmuch_message_t *message, + notmuch_param_t unused (*indexopts)) +{ + notmuch_database_t *notmuch = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + notmuch_filenames_t *orig_filenames = NULL; + const char *orig_thread_id = NULL; + notmuch_message_file_t *message_file = NULL; + + int found = 0; + + if (message == NULL) + return NOTMUCH_STATUS_NULL_POINTER; + + /* Save in case we need to delete message */ + orig_thread_id = notmuch_message_get_thread_id (message); + if (!orig_thread_id) { + /* XXX TODO: make up new error return? */ + INTERNAL_ERROR ("message without thread-id"); + } + + /* strdup it because the metadata may be invalidated */ + orig_thread_id = talloc_strdup (message, orig_thread_id); + + notmuch = _notmuch_message_database (message); + + ret = _notmuch_database_ensure_writable (notmuch); + if (ret) + return ret; + + orig_filenames = notmuch_message_get_filenames (message); + + private_status = _notmuch_message_remove_indexed_terms (message); + if (private_status) { + ret = COERCE_STATUS(private_status, "error removing terms"); + goto DONE; + } + + /* re-add the filenames with the associated indexopts */ + for (; notmuch_filenames_valid (orig_filenames); + notmuch_filenames_move_to_next (orig_filenames)) { + + const char *date; + const char *from, *to, *subject; + char *message_id = NULL; + const char *thread_id = NULL; + + const char *filename = notmuch_filenames_get (orig_filenames); + + message_file = _notmuch_message_file_open (notmuch, filename); + if (message_file == NULL) + continue; + + ret = _notmuch_message_file_get_headers (message_file, + &from, &subject, &to, &date, + &message_id); + if (ret) + goto DONE; + + /* XXX TODO: deal with changing message id? */ + + _notmuch_message_add_filename (message, filename); + + ret = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, + &thread_id); + if (ret) + goto DONE; + + if (thread_id == NULL) + thread_id = orig_thread_id; + + _notmuch_message_add_term (message, "thread", thread_id); + _notmuch_message_set_header_values (message, date, from, subject); + + ret = _notmuch_message_index_file (message, message_file); + + if (ret == NOTMUCH_STATUS_FILE_ERROR) + continue; + if (ret) + goto DONE; + + found++; + _notmuch_message_file_close (message_file); + message_file = NULL; + } + if (found == 0) { + /* put back thread id to help cleanup */ + _notmuch_message_add_term (message, "thread", orig_thread_id); + ret = _notmuch_message_delete (message); + } else { + _notmuch_message_sync (message); + } + + DONE: + if (message_file) + _notmuch_message_file_close (message_file); + + /* XXX TODO destroy orig_filenames? */ + return ret; +} diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 5dfebf5d..b187a80f 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -425,6 +425,23 @@ const char * _notmuch_message_file_get_header (notmuch_message_file_t *message, const char *header); +notmuch_status_t +_notmuch_message_file_get_headers (notmuch_message_file_t *message_file, + const char **from_out, + const char **subject_out, + const char **to_out, + const char **date_out, + char **message_id_out); + +const char * +_notmuch_message_file_get_filename (notmuch_message_file_t *message); + +/* add-message.cc */ +notmuch_status_t +_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + const char **thread_id); /* index.cc */ notmuch_status_t @@ -492,6 +509,20 @@ notmuch_status_t _notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsigned *count_out); +/* message-id.c */ + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimiters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly talloc'ed string belonging to 'ctx'. + * + * Returns NULL if there is any error parsing the message-id. */ +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next); + /* message.cc */ @@ -501,6 +532,8 @@ _notmuch_message_add_reply (notmuch_message_t *message, notmuch_database_t * _notmuch_message_database (notmuch_message_t *message); +void +_notmuch_message_remove_unprefixed_terms (notmuch_message_t *message); /* sha1.c */ char * @@ -525,6 +558,12 @@ typedef struct _notmuch_string_list { notmuch_string_list_t * _notmuch_string_list_create (const void *ctx); +/* + * return the number of strings in 'list' + */ +int +_notmuch_string_list_length (notmuch_string_list_t *list); + /* Add 'string' to 'list'. * * The list will create its own talloced copy of 'string'. diff --git a/lib/notmuch.h b/lib/notmuch.h index 17f0872e..f78b3473 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -219,6 +219,7 @@ typedef struct _notmuch_tags notmuch_tags_t; typedef struct _notmuch_directory notmuch_directory_t; typedef struct _notmuch_filenames notmuch_filenames_t; typedef struct _notmuch_config_list notmuch_config_list_t; +typedef struct _notmuch_param notmuch_param_t; #endif /* __DOXYGEN__ */ /** @@ -1096,6 +1097,18 @@ notmuch_thread_get_thread_id (notmuch_thread_t *thread); int notmuch_thread_get_total_messages (notmuch_thread_t *thread); +/** + * Get the total number of files in 'thread'. + * + * This sums notmuch_message_count_files over all messages in the + * thread + * @returns Non-negative integer + * @since libnotmuch 5.0 (notmuch 0.25) + */ + +int +notmuch_thread_get_total_files (notmuch_thread_t *thread); + /** * Get a notmuch_messages_t iterator for the top-level messages in * 'thread' in oldest-first order. @@ -1341,6 +1354,14 @@ notmuch_message_get_thread_id (notmuch_message_t *message); notmuch_messages_t * notmuch_message_get_replies (notmuch_message_t *message); +/** + * Get the total number of files associated with a message. + * @returns Non-negative integer + * @since libnotmuch 5.0 (notmuch 0.25) + */ +int +notmuch_message_count_files (notmuch_message_t *message); + /** * Get a filename for the email corresponding to 'message'. * @@ -1373,6 +1394,20 @@ notmuch_message_get_filename (notmuch_message_t *message); notmuch_filenames_t * notmuch_message_get_filenames (notmuch_message_t *message); +/** + * Re-index the e-mail corresponding to 'message' using the supplied index options + * + * Returns the status of the re-index operation. (see the return + * codes documented in notmuch_database_add_message) + * + * After reindexing, the user should discard the message object passed + * in here by calling notmuch_message_destroy, since it refers to the + * original message, not to the reindexed message. + */ +notmuch_status_t +notmuch_message_reindex (notmuch_message_t *message, + notmuch_param_t *indexopts); + /** * Message flags. */ diff --git a/lib/string-list.c b/lib/string-list.c index 43ebe499..9c3ae7ef 100644 --- a/lib/string-list.c +++ b/lib/string-list.c @@ -42,6 +42,12 @@ _notmuch_string_list_create (const void *ctx) return list; } +int +_notmuch_string_list_length (notmuch_string_list_t *list) +{ + return list->length; +} + void _notmuch_string_list_append (notmuch_string_list_t *list, const char *string) diff --git a/lib/thread.cc b/lib/thread.cc index 1a1ecfa5..e17ef63e 100644 --- a/lib/thread.cc +++ b/lib/thread.cc @@ -44,6 +44,7 @@ struct _notmuch_thread { GHashTable *message_hash; int total_messages; + int total_files; int matched_messages; time_t oldest; time_t newest; @@ -266,6 +267,7 @@ _thread_add_message (notmuch_thread_t *thread, _notmuch_message_list_add_message (thread->message_list, talloc_steal (thread, message)); thread->total_messages++; + thread->total_files += notmuch_message_count_files (message); g_hash_table_insert (thread->message_hash, xstrdup (notmuch_message_get_message_id (message)), @@ -495,6 +497,7 @@ _notmuch_thread_create (void *ctx, free, NULL); thread->total_messages = 0; + thread->total_files = 0; thread->matched_messages = 0; thread->oldest = 0; thread->newest = 0; @@ -566,6 +569,12 @@ notmuch_thread_get_total_messages (notmuch_thread_t *thread) return thread->total_messages; } +int +notmuch_thread_get_total_files (notmuch_thread_t *thread) +{ + return thread->total_files; +} + int notmuch_thread_get_matched_messages (notmuch_thread_t *thread) { diff --git a/notmuch-client.h b/notmuch-client.h index ae37360b..1d3c0829 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -200,6 +200,9 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]); int notmuch_insert_command (notmuch_config_t *config, int argc, char *argv[]); +int +notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]); + int notmuch_reply_command (notmuch_config_t *config, int argc, char *argv[]); diff --git a/notmuch-reindex.c b/notmuch-reindex.c new file mode 100644 index 00000000..44223042 --- /dev/null +++ b/notmuch-reindex.c @@ -0,0 +1,134 @@ +/* notmuch - Not much of an email program, (just index and search) + * + * Copyright © 2016 Daniel Kahn Gillmor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Daniel Kahn Gillmor + */ + +#include "notmuch-client.h" +#include "string-util.h" + +static volatile sig_atomic_t interrupted; + +static void +handle_sigint (unused (int sig)) +{ + static char msg[] = "Stopping... \n"; + + /* This write is "opportunistic", so it's okay to ignore the + * result. It is not required for correctness, and if it does + * fail or produce a short write, we want to get out of the signal + * handler as quickly as possible, not retry it. */ + IGNORE_RESULT (write (2, msg, sizeof (msg) - 1)); + interrupted = 1; +} + +/* reindex all messages matching 'query_string' using the passed-in indexopts + */ +static int +reindex_query (notmuch_database_t *notmuch, const char *query_string, + notmuch_param_t *indexopts) +{ + notmuch_query_t *query; + notmuch_messages_t *messages; + notmuch_message_t *message; + notmuch_status_t status; + + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + query = notmuch_query_create (notmuch, query_string); + if (query == NULL) { + fprintf (stderr, "Out of memory.\n"); + return 1; + } + + /* reindexing is not interested in any special sort order */ + notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED); + + status = notmuch_query_search_messages (query, &messages); + if (print_status_query ("notmuch reindex", query, status)) + return status; + + ret = notmuch_database_begin_atomic (notmuch); + for (; + notmuch_messages_valid (messages) && ! interrupted; + notmuch_messages_move_to_next (messages)) { + message = notmuch_messages_get (messages); + + ret = notmuch_message_reindex(message, indexopts); + if (ret != NOTMUCH_STATUS_SUCCESS) + break; + } + + if (!ret) + ret = notmuch_database_end_atomic (notmuch); + + notmuch_query_destroy (query); + + return ret || interrupted; +} + +int +notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]) +{ + char *query_string = NULL; + notmuch_database_t *notmuch; + struct sigaction action; + int opt_index; + int ret; + notmuch_param_t *indexopts = NULL; + + /* Set up our handler for SIGINT */ + memset (&action, 0, sizeof (struct sigaction)); + action.sa_handler = handle_sigint; + sigemptyset (&action.sa_mask); + action.sa_flags = SA_RESTART; + sigaction (SIGINT, &action, NULL); + + notmuch_opt_desc_t options[] = { + { NOTMUCH_OPT_INHERIT, (void *) ¬much_shared_options, NULL, 0, 0 }, + { 0, 0, 0, 0, 0 } + }; + + opt_index = parse_arguments (argc, argv, options, 1); + if (opt_index < 0) + return EXIT_FAILURE; + + notmuch_process_shared_options (argv[0]); + + if (notmuch_database_open (notmuch_config_get_database_path (config), + NOTMUCH_DATABASE_MODE_READ_WRITE, ¬much)) + return EXIT_FAILURE; + + notmuch_exit_if_unmatched_db_uuid (notmuch); + + query_string = query_string_from_args (config, argc-opt_index, argv+opt_index); + if (query_string == NULL) { + fprintf (stderr, "Out of memory\n"); + return EXIT_FAILURE; + } + + if (*query_string == '\0') { + fprintf (stderr, "Error: notmuch reindex requires at least one search term.\n"); + return EXIT_FAILURE; + } + + ret = reindex_query (notmuch, query_string, indexopts); + + notmuch_database_destroy (notmuch); + + return ret || interrupted ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/notmuch-search.c b/notmuch-search.c index 019e14ee..380e9d8f 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -160,6 +160,7 @@ do_search_threads (search_context_t *ctx) const char *subject = notmuch_thread_get_subject (thread); const char *thread_id = notmuch_thread_get_thread_id (thread); int matched = notmuch_thread_get_matched_messages (thread); + int files = notmuch_thread_get_total_files (thread); int total = notmuch_thread_get_total_messages (thread); const char *relative_date = NULL; notmuch_bool_t first_tag = TRUE; @@ -175,13 +176,23 @@ do_search_threads (search_context_t *ctx) if (format->is_text_printer) { /* Special case for the text formatter */ - printf ("thread:%s %12s [%d/%d] %s; %s (", + printf ("thread:%s %12s ", thread_id, - relative_date, + relative_date); + if (total == files) + printf ("[%d/%d] %s; %s (", matched, total, sanitize_string (ctx_quote, authors), sanitize_string (ctx_quote, subject)); + else + printf ("[%d/%d(%d)] %s; %s (", + matched, + total, + files, + sanitize_string (ctx_quote, authors), + sanitize_string (ctx_quote, subject)); + } else { /* Structured Output */ format->map_key (format, "thread"); format->string (format, thread_id); diff --git a/notmuch.c b/notmuch.c index 8e332ce6..201c7454 100644 --- a/notmuch.c +++ b/notmuch.c @@ -123,6 +123,8 @@ static command_t commands[] = { "Restore the tags from the given dump file (see 'dump')." }, { "compact", notmuch_compact_command, NOTMUCH_CONFIG_OPEN, "Compact the notmuch database." }, + { "reindex", notmuch_reindex_command, NOTMUCH_CONFIG_OPEN, + "Re-index all messages matching the search terms." }, { "config", notmuch_config_command, NOTMUCH_CONFIG_OPEN, "Get or set settings in the notmuch configuration file." }, { "help", notmuch_help_command, NOTMUCH_CONFIG_CREATE, /* create but don't save config */ diff --git a/performance-test/M04-reindex.sh b/performance-test/M04-reindex.sh new file mode 100755 index 00000000..d36e061b --- /dev/null +++ b/performance-test/M04-reindex.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +test_description='reindex' + +. ./perf-test-lib.sh || exit 1 + +memory_start + +memory_run 'reindex *' "notmuch reindex '*'" + +memory_done diff --git a/performance-test/T03-reindex.sh b/performance-test/T03-reindex.sh new file mode 100755 index 00000000..7af2d22d --- /dev/null +++ b/performance-test/T03-reindex.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +test_description='tagging' + +. ./perf-test-lib.sh || exit 1 + +time_start + +time_run 'reindex *' "notmuch reindex '*'" +time_run 'reindex *' "notmuch reindex '*'" +time_run 'reindex *' "notmuch reindex '*'" + +time_done diff --git a/test/T080-search.sh b/test/T080-search.sh index d2d71ca9..3bb3dced 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -111,7 +111,7 @@ thread:XXX 2009-11-18 [3/3] Adrian Perez de Castro, Keith Packard, Carl Worth; thread:XXX 2009-11-18 [3/3] Israel Herraiz, Keith Packard, Carl Worth; [notmuch] New to the list (inbox unread) thread:XXX 2009-11-18 [3/3] Jan Janak, Carl Worth; [notmuch] What a great idea! (inbox unread) thread:XXX 2009-11-18 [2/2] Jan Janak, Carl Worth; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread) -thread:XXX 2009-11-18 [3/3] Aron Griffis, Keith Packard, Carl Worth; [notmuch] archive (inbox unread) +thread:XXX 2009-11-18 [3/3(4)] Aron Griffis, Keith Packard, Carl Worth; [notmuch] archive (inbox unread) thread:XXX 2009-11-18 [2/2] Keith Packard, Carl Worth; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread) thread:XXX 2009-11-18 [7/7] Lars Kellogg-Stedman, Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread) thread:XXX 2009-11-18 [5/5] Mikhail Gusarov, Carl Worth, Keith Packard; [notmuch] [PATCH 1/2] Close message file after parsing message headers (inbox unread) diff --git a/test/T100-search-by-folder.sh b/test/T100-search-by-folder.sh index 2844ec61..79c266e4 100755 --- a/test/T100-search-by-folder.sh +++ b/test/T100-search-by-folder.sh @@ -15,7 +15,7 @@ add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"' test_begin_subtest "Single-world folder: specification (multiple results)" output=$(notmuch search folder:bad folder:bad/news folder:things/bad | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) -thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread) thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" test_begin_subtest "Top level folder" @@ -24,7 +24,7 @@ test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; T test_begin_subtest "Two-word path to narrow results to one" output=$(notmuch search folder:bad/news | notmuch_search_sanitize) -test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)" +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread)" test_begin_subtest "Folder search with --output=files" output=$(notmuch search --output=files folder:bad/news | notmuch_search_files_sanitize) diff --git a/test/T160-json.sh b/test/T160-json.sh index ac51895e..07955a2b 100755 --- a/test/T160-json.sh +++ b/test/T160-json.sh @@ -71,8 +71,8 @@ test_begin_subtest "Format version: too high" test_expect_code 21 "notmuch search --format-version=999 \\*" test_begin_subtest "Show message: multiple filenames" -add_message "[id]=message-id@example.com [filename]=copy1" -add_message "[id]=message-id@example.com [filename]=copy2" +add_message '[id]=message-id@example.com [filename]=copy1 [date]="Fri, 05 Jan 2001 15:43:52 +0000"' +add_message '[id]=message-id@example.com [filename]=copy2 [date]="Fri, 05 Jan 2001 15:43:52 +0000"' cat < EXPECTED [ [ diff --git a/test/T340-maildir-sync.sh b/test/T340-maildir-sync.sh index 6d956635..8024d86f 100755 --- a/test/T340-maildir-sync.sh +++ b/test/T340-maildir-sync.sh @@ -153,14 +153,14 @@ cp "$MAIL_DIR/cur/duplicated-message:2," "$MAIL_DIR/cur/duplicated-message-copy: NOTMUCH_NEW > output notmuch search subject:"Duplicated message" | notmuch_search_sanitize >> output test_expect_equal "$(< output)" "No new mail. -thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Duplicated message (inbox replied)" +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Duplicated message (inbox replied)" test_begin_subtest "Adding duplicate message without flags does not remove tags" cp "$MAIL_DIR/cur/duplicated-message-copy:2,RS" "$MAIL_DIR/cur/duplicated-message-another-copy:2," NOTMUCH_NEW > output notmuch search subject:"Duplicated message" | notmuch_search_sanitize >> output test_expect_equal "$(< output)" "No new mail. -thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Duplicated message (inbox replied)" +thread:XXX 2001-01-05 [1/1(3)] Notmuch Test Suite; Duplicated message (inbox replied)" test_begin_subtest "Tag changes modify flags of multiple files" notmuch tag -replied subject:"Duplicated message" @@ -181,11 +181,33 @@ test_expect_equal "$(cd $MAIL_DIR/cur/; ls non-compliant*)" "non-compliant-maild test_begin_subtest "Files in new/ get default synchronized tags" OLDCONFIG=$(notmuch config get new.tags) -notmuch config set new.tags test +notmuch config set new.tags test;unread add_message [subject]='"File in new/"' [dir]=new [filename]='file-in-new' notmuch config set new.tags $OLDCONFIG notmuch search 'subject:"File in new"' | notmuch_search_sanitize > output test_expect_equal "$(< output)" \ "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; File in new/ (test unread)" +test_begin_subtest "unread is not mandatory in new/" +test_subtest_known_broken +OLDCONFIG=$(notmuch config get new.tags) +notmuch config set new.tags test +add_message [subject]='"File in new/"' [dir]=new [filename]='file-in-new' +notmuch config set new.tags $OLDCONFIG +notmuch search 'subject:"File in new"' | notmuch_search_sanitize > output +test_expect_equal "$(< output)" \ +"thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; File in new/ (test)" + +for tag in draft flagged passed replied; do + + test_begin_subtest "$tag is valid in new.tags" + test_subtest_known_broken + OLDCONFIG=$(notmuch config get new.tags) + notmuch config set new.tags "$tag" + add_message [subject]="\"$tag sync in new\"" [dir]=new + notmuch config set new.tags $OLDCONFIG + notmuch search "subject:\"$tag sync in new\"" | notmuch_search_sanitize > output + test_expect_equal "$(< output)" \ + "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; $tag sync in new ($tag)" +done test_done diff --git a/test/T370-search-folder-coherence.sh b/test/T370-search-folder-coherence.sh index d1cb45ec..8748b3d0 100755 --- a/test/T370-search-folder-coherence.sh +++ b/test/T370-search-folder-coherence.sh @@ -32,7 +32,7 @@ test_expect_equal_file EXPECTED OUTPUT test_begin_subtest "Test matches folder:spam" output=$(notmuch search folder:spam) -test_expect_equal "$output" "thread:0000000000000001 2001-01-05 [1/1] Notmuch Test Suite; Single new message (inbox unread)" +test_expect_equal "$output" "thread:0000000000000001 2001-01-05 [1/1(2)] Notmuch Test Suite; Single new message (inbox unread)" test_begin_subtest "Remove folder:spam copy of email" rm $dir/spam/$(basename $file_x) diff --git a/test/T500-search-date.sh b/test/T500-search-date.sh index f10207f8..fc4ecdc3 100755 --- a/test/T500-search-date.sh +++ b/test/T500-search-date.sh @@ -23,7 +23,7 @@ notmuch search date:18-Nov-2009_02:19:26-0800..2009-11-18_04:49:52-06:00 | notmu cat <EXPECTED thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread) thread:XXX 2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread) -thread:XXX 2009-11-18 [1/3] Carl Worth| Aron Griffis, Keith Packard; [notmuch] archive (inbox unread) +thread:XXX 2009-11-18 [1/3(4)] Carl Worth| Aron Griffis, Keith Packard; [notmuch] archive (inbox unread) thread:XXX 2009-11-18 [1/2] Carl Worth| Keith Packard; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread) EOF test_expect_equal_file EXPECTED OUTPUT diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh index b7bdda11..d5def764 100755 --- a/test/T650-regexp-query.sh +++ b/test/T650-regexp-query.sh @@ -29,7 +29,7 @@ test_expect_equal_file EXPECTED OUTPUT test_begin_subtest "unanchored folder:// specification" output=$(notmuch search folder:/bad/ | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) -thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread) thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" test_begin_subtest "anchored folder:// search" @@ -39,7 +39,7 @@ test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; T test_begin_subtest "unanchored path:// specification" output=$(notmuch search path:/bad/ | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) -thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread) thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" test_begin_subtest "anchored path:// search" diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh new file mode 100755 index 00000000..ea5e1d6a --- /dev/null +++ b/test/T670-duplicate-mid.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +test_description="duplicate message ids" +. ./test-lib.sh || exit 1 + +add_message '[id]="duplicate"' '[subject]="message 1" [filename]=copy1' +add_message '[id]="duplicate"' '[subject]="message 2" [filename]=copy2' + +test_begin_subtest 'First subject preserved' +cat < EXPECTED +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; message 1 (inbox unread) +EOF +notmuch search id:duplicate | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'Search for second subject' +cat <EXPECTED +MAIL_DIR/copy1 +MAIL_DIR/copy2 +EOF +notmuch search --output=files subject:'"message 2"' | notmuch_dir_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +add_message '[id]="duplicate"' '[body]="sekrit" [filename]=copy3' +test_begin_subtest 'search for body in duplicate file' +cat <EXPECTED +MAIL_DIR/copy1 +MAIL_DIR/copy2 +MAIL_DIR/copy3 +EOF +notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +rm ${MAIL_DIR}/copy3 +test_begin_subtest 'reindex drops terms in duplicate file' +cp /dev/null EXPECTED +notmuch reindex '*' +notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_done diff --git a/test/T700-reindex.sh b/test/T700-reindex.sh new file mode 100755 index 00000000..051fbb3c --- /dev/null +++ b/test/T700-reindex.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +test_description='reindexing messages' +. ./test-lib.sh || exit 1 + +add_email_corpus + +notmuch tag +usertag1 '*' + +notmuch search '*' | notmuch_search_sanitize > initial-threads +notmuch search --output=messages '*' > initial-message-ids +notmuch dump > initial-dump + +test_begin_subtest 'reindex preserves threads' +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file initial-threads OUTPUT + +test_begin_subtest 'reindex after removing duplicate file preserves threads' +# remove one copy +sed 's,3/3(4),3/3,' < initial-threads > EXPECTED +mv $MAIL_DIR/bar/18:2, duplicate-msg-1.eml +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'reindex preserves message-ids' +notmuch reindex '*' +notmuch search --output=messages '*' > OUTPUT +test_expect_equal_file initial-message-ids OUTPUT + +test_begin_subtest 'reindex preserves tags' +notmuch reindex '*' +notmuch dump > OUTPUT +test_expect_equal_file initial-dump OUTPUT + +test_begin_subtest 'reindex moves a message between threads' +notmuch search --output=threads id:87iqd9rn3l.fsf@vertex.dottedmag > EXPECTED +# re-parent +sed -i 's/1258471718-6781-1-git-send-email-dottedmag@dottedmag.net/87iqd9rn3l.fsf@vertex.dottedmag/' $MAIL_DIR/02:2,* +notmuch reindex id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net +notmuch search --output=threads id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'reindex detects removal of all files' +notmuch search --output=messages not id:20091117232137.GA7669@griffis1.net> EXPECTED +# remove both copies +mv $MAIL_DIR/cur/51:2,* duplicate-message-2.eml +notmuch reindex id:20091117232137.GA7669@griffis1.net +notmuch search --output=messages '*' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "reindex preserves properties" +cat < prop-dump +#= 1258471718-6781-1-git-send-email-dottedmag@dottedmag.net userprop=userval +#= 1258471718-6781-2-git-send-email-dottedmag@dottedmag.net userprop=userval +#= 1258491078-29658-1-git-send-email-dottedmag@dottedmag.net userprop=userval1 +#= 20091117190054.GU3165@dottiness.seas.harvard.edu userprop=userval +#= 20091117203301.GV3165@dottiness.seas.harvard.edu userprop=userval3 +#= 87fx8can9z.fsf@vertex.dottedmag userprop=userval2 +#= 87iqd9rn3l.fsf@vertex.dottedmag userprop=userval +#= 87lji4lx9v.fsf@yoom.home.cworth.org userprop=userval3 +#= 87lji5cbwo.fsf@yoom.home.cworth.org userprop=userval +#= cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com userprop=userval +EOF +notmuch restore < prop-dump +notmuch reindex '*' +notmuch dump | grep '^#=' | sort > OUTPUT +test_expect_equal_file prop-dump OUTPUT +test_done + +add_email_corpus lkml + +test_begin_subtest "reindex of lkml corpus preserves threads" +notmuch search '*' | notmuch_search_sanitize > EXPECTED +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_done