From: Carl Worth Date: Fri, 4 Jun 2010 19:39:36 +0000 (-0700) Subject: Add support (and tests) for messages with really long message IDs. X-Git-Tag: 0.4~117 X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=commitdiff_plain;h=7b78eb4af6e87532795d09bd82152002ab4a74b1 Add support (and tests) for messages with really long message IDs. Scott Henson reported an internal error that occurred when he tried to add a message that referenced another message with a message ID well over 300 characters in length. The bug here was running into a Xapian limit for the length of metadata key names, (which is even more restrictive than the Xapian limit for the length of terms). We fix this by noticing long message ID values and instead using a message ID of the form "notmuch-sha1-". That is, we use SHA1 to generate a compressed, (but still unique), version of the message ID. We add support to the test suite to exercise this fix. The tests add a message referencing the long message ID, then add the message with the long message ID, then finally add another message referencing the long ID. Each of these tests exercise different code paths where the special handling is implemented. A final test ensures that all three messages are stitched together into a single thread---guaranteeing that the three code paths all act consistently. --- diff --git a/lib/database.cc b/lib/database.cc index dd1fc637..6affc205 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -57,8 +57,12 @@ typedef struct { * * type: mail * - * id: Unique ID of mail, (from Message-ID header or generated - * as "notmuch-sha1-. + * id: Unique ID of mail. This is from the Message-ID header + * if present and not too long (see NOTMUCH_MESSAGE_ID_MAX). + * If it's present and too long, then we use + * "notmuch-sha1-". + * If this header is not present, we use + * "notmuch-sha1-". * * thread: The ID of the thread to which the mail belongs * @@ -145,9 +149,11 @@ typedef struct { * * thread_id_* A pre-allocated thread ID for a particular * message. This is actually an arbitarily large - * family of metadata name. Any particular name - * is formed by concatenating "thread_id_" with a - * message ID. The value stored is a thread ID. + * family of metadata name. Any particular name is + * formed by concatenating "thread_id_" with a message + * ID (or the SHA1 sum of a message ID if it is very + * long---see description of 'id' in the mail + * document). The value stored is a thread ID. * * These thread ID metadata values are stored * whenever a message references a parent message @@ -334,6 +340,23 @@ find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id) return notmuch->xapian_db->get_document (doc_id); } +/* Generate a compressed version of 'message_id' of the form: + * + * notmuch-sha1- + */ +static char * +_message_id_compressed (void *ctx, const char *message_id) +{ + char *sha1, *compressed; + + sha1 = notmuch_sha1_of_string (message_id); + + compressed = talloc_asprintf (ctx, "notmuch-sha1-%s", sha1); + free (sha1); + + return compressed; +} + notmuch_message_t * notmuch_database_find_message (notmuch_database_t *notmuch, const char *message_id) @@ -341,6 +364,9 @@ notmuch_database_find_message (notmuch_database_t *notmuch, notmuch_private_status_t status; unsigned int doc_id; + if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) + message_id = _message_id_compressed (notmuch, message_id); + try { status = _notmuch_database_find_unique_doc_id (notmuch, "id", message_id, &doc_id); @@ -1217,7 +1243,11 @@ _notmuch_database_generate_thread_id (notmuch_database_t *notmuch) static char * _get_metadata_thread_id_key (void *ctx, const char *message_id) { - return talloc_asprintf (ctx, "thread_id_%s", message_id); + if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) + message_id = _message_id_compressed (ctx, message_id); + + return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s", + message_id); } /* Find the thread ID to which the message with 'message_id' belongs. @@ -1570,10 +1600,12 @@ notmuch_database_add_message (notmuch_database_t *notmuch, if (message_id == NULL) message_id = talloc_strdup (message_file, header); - /* Reject a Message ID that's too long. */ - if (message_id && strlen (message_id) + 1 > NOTMUCH_TERM_MAX) { + /* If a message ID is too long, substitute its sha1 instead. */ + if (message_id && strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) { + char *compressed = _message_id_compressed (message_file, + message_id); talloc_free (message_id); - message_id = NULL; + message_id = compressed; } } diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index f9774a48..5a0cf925 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -108,6 +108,16 @@ typedef enum { * programmatically. */ #define NOTMUCH_TERM_MAX 245 +#define NOTMUCH_METADATA_THREAD_ID_PREFIX "thread_id_" + +/* For message IDs we have to be even more restrictive. Beyond fitting + * into the term limit, we also use message IDs to construct + * metadata-key values. And the documentation says that these should + * be restricted to about 200 characters. (The actual limit for the + * chert backend at least is 252.) + */ +#define NOTMUCH_MESSAGE_ID_MAX (200 - sizeof (NOTMUCH_METADATA_THREAD_ID_PREFIX)) + typedef enum _notmuch_private_status { /* First, copy all the public status values. */ NOTMUCH_PRIVATE_STATUS_SUCCESS = NOTMUCH_STATUS_SUCCESS, diff --git a/test/notmuch-test b/test/notmuch-test index 6f3fe0a9..eaaaf89b 100755 --- a/test/notmuch-test +++ b/test/notmuch-test @@ -1120,6 +1120,33 @@ References: <${gen_msg_id}> On Tue, 05 Jan 2010 15:43:56 -0800, Sender wrote: > from guessing test" +printf "\nTesting messages with ridiculously-long message IDs...\n" +printf " Referencing long ID before adding...\t\t" +generate_message '[subject]="Reference of ridiculously-long message ID"' \ + '[references]=\' + +output=$(NOTMUCH_NEW) +pass_if_equal "$output" "Added 1 new message to the database." + +printf " Adding message with long ID...\t\t\t" +generate_message '[subject]="A ridiculously-long message ID"' \ + '[id]=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-' + +output=$(NOTMUCH_NEW) +pass_if_equal "$output" "Added 1 new message to the database." + +printf " Referencing long ID after adding...\t\t" +generate_message '[subject]="Reply to ridiculously-long message ID"' \ + '[in-reply-to]=\' + +output=$(NOTMUCH_NEW) +pass_if_equal "$output" "Added 1 new message to the database." + +printf " Ensure all messages were threaded together...\t" + +output=$($NOTMUCH search 'subject:"a ridiculously-long message ID"' | notmuch_search_sanitize) +pass_if_equal "$output" "thread:XXX 2001-01-05 [1/3] Notmuch Test Suite; A ridiculously-long message ID (inbox unread)" + echo "" echo "Notmuch test suite complete."