X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=lib%2Fdatabase.cc;h=5b677d42007fcae27e2632701d2c4073a664e44b;hp=4e49e53f9c883a1d14ca0daa9e27e022f582d7b1;hb=c50891f449269b7c6f235c510a472be832b95cc1;hpb=4d35c3544d7bb0143cb1a17cc197cfe271670bf8 diff --git a/lib/database.cc b/lib/database.cc index 4e49e53f..5b677d42 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -42,7 +42,8 @@ typedef struct { * Mail document * ------------- * A mail document is associated with a particular email message file - * on disk. It is indexed with the following prefixed terms: + * on disk. It is indexed with the following prefixed terms which the + * database uses to construct threads, etc.: * * Single terms of given prefix: * @@ -53,15 +54,14 @@ typedef struct { * * thread: The ID of the thread to which the mail belongs * + * replyto: The ID from the In-Reply-To header of the mail (if any). + * * Multiple terms of given prefix: * - * ref: All unresolved message IDs from In-Reply-To and - * References headers in the message. (Once a referenced - * message is added to the database and the thread IDs - * are linked the corresponding "ref" term is dropped - * from the message document.) + * reference: All message IDs from In-Reply-To and Re ferences + * headers in the message. * - * tag: Any tags associated with this message by the user. + * tag: Any tags associated with this message by the user. * * A mail document also has two values: * @@ -70,6 +70,11 @@ typedef struct { * * MESSAGE_ID: The unique ID of the mail mess (see "id" above) * + * In addition, terms from the content of the message are added with + * "from", "to", "attachment", and "subject" prefixes for use by the + * user in searching. But the database doesn't really care itself + * about any of these. + * * Timestamp document * ------------------ * A timestamp document is used by a client of the notmuch library to @@ -104,7 +109,7 @@ typedef struct { prefix_t BOOLEAN_PREFIX_INTERNAL[] = { { "type", "T" }, - { "ref", "XREFERENCE" }, + { "reference", "XREFERENCE" }, { "replyto", "XREPLYTO" }, { "timestamp", "XTIMESTAMP" }, }; @@ -312,8 +317,8 @@ skip_space_and_comments (const char **str) * Returns a newly talloc'ed string belonging to 'ctx'. * * Returns NULL if there is any error parsing the message-id. */ -static char * -parse_message_id (void *ctx, const char *message_id, const char **next) +char * +_parse_message_id (void *ctx, const char *message_id, const char **next) { const char *s, *end; char *result; @@ -371,9 +376,16 @@ parse_message_id (void *ctx, const char *message_id, const char **next) } /* Parse a References header value, putting a (talloc'ed under 'ctx') - * copy of each referenced message-id into 'hash'. */ + * copy of each referenced message-id into 'hash'. + * + * We explicitly avoid including any reference identical to + * 'message_id' in the result (to avoid mass confusion when a single + * message references itself cyclically---and yes, mail messages are + * not infrequent in the wild that do this---don't ask me why). +*/ static void parse_references (void *ctx, + const char *message_id, GHashTable *hash, const char *refs) { @@ -383,9 +395,9 @@ parse_references (void *ctx, return; while (*refs) { - ref = parse_message_id (ctx, refs, &refs); + ref = _parse_message_id (ctx, refs, &refs); - if (ref) + if (ref && strcmp (ref, message_id)) g_hash_table_insert (hash, ref, NULL); } } @@ -697,7 +709,7 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, const char **thread_id) { GHashTable *parents = NULL; - const char *refs, *in_reply_to; + const char *refs, *in_reply_to, *in_reply_to_message_id; GList *l, *keys = NULL; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; @@ -705,12 +717,21 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, _my_talloc_free_for_g_hash, NULL); refs = notmuch_message_file_get_header (message_file, "references"); - parse_references (message, parents, refs); + parse_references (message, notmuch_message_get_message_id (message), + parents, refs); in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to"); - parse_references (message, parents, in_reply_to); - _notmuch_message_add_term (message, "replyto", - parse_message_id (message, in_reply_to, NULL)); + parse_references (message, notmuch_message_get_message_id (message), + parents, in_reply_to); + + /* Carefully avoid adding any self-referential in-reply-to term. */ + in_reply_to_message_id = _parse_message_id (message, in_reply_to, NULL); + if (strcmp (in_reply_to_message_id, + notmuch_message_get_message_id (message))) + { + _notmuch_message_add_term (message, "replyto", + _parse_message_id (message, in_reply_to, NULL)); + } keys = g_hash_table_get_keys (parents); for (l = keys; l; l = l->next) { @@ -723,7 +744,8 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, parent_message_id); if (parent_thread_id == NULL) { - _notmuch_message_add_term (message, "ref", parent_message_id); + _notmuch_message_add_term (message, "reference", + parent_message_id); } else { if (*thread_id == NULL) { *thread_id = talloc_strdup (message, parent_thread_id); @@ -757,7 +779,7 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch, notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; notmuch_private_status_t private_status; - find_doc_ids (notmuch, "ref", message_id, &child, &children_end); + find_doc_ids (notmuch, "reference", message_id, &child, &children_end); for ( ; child != children_end; child++) { @@ -774,7 +796,7 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch, *thread_id = talloc_strdup (message, child_thread_id); _notmuch_message_add_term (message, "thread", *thread_id); } else if (strcmp (*thread_id, child_thread_id)) { - _notmuch_message_remove_term (child_message, "ref", + _notmuch_message_remove_term (child_message, "reference", message_id); _notmuch_message_sync (child_message); ret = _merge_threads (notmuch, *thread_id, child_thread_id); @@ -796,7 +818,7 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch, /* Given a (mostly empty) 'message' and its corresponding * 'message_file' link it to existing threads in the database. * - * We first looke at 'message_file' and its link-relevant headers + * We first look at 'message_file' and its link-relevant headers * (References and In-Reply-To) for message IDs. We also look in the * database for existing message that reference 'message'.p * @@ -886,7 +908,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, header = notmuch_message_file_get_header (message_file, "message-id"); if (header) { - message_id = parse_message_id (message_file, header, NULL); + message_id = _parse_message_id (message_file, header, NULL); /* So the header value isn't RFC-compliant, but it's * better than no message-id at all. */ if (message_id == NULL) @@ -911,9 +933,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, * (which may or may not reference an existing document in the * database). */ - /* Use NULL for owner since we want to free this locally. */ - message = _notmuch_message_create_for_message_id (NULL, - notmuch, + message = _notmuch_message_create_for_message_id (notmuch, message_id, &private_status);