1 #include "database-private.h"
3 /* Advance 'str' past any whitespace or RFC 822 comments. A comment is
4 * a (potentially nested) parenthesized sequence with '\' used to
5 * escape any character (including parentheses).
7 * If the sequence to be skipped continues to the end of the string,
8 * then 'str' will be left pointing at the final terminating '\0'
12 skip_space_and_comments (const char **str)
17 while (*s && (isspace (*s) || *s == '(')) {
18 while (*s && isspace (*s))
23 while (*s && nesting) {
26 } else if (*s == ')') {
28 } else if (*s == '\\') {
40 /* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
41 * comments, and the '<' and '>' delimiters.
43 * If not NULL, then *next will be made to point to the first character
44 * not parsed, (possibly pointing to the final '\0' terminator.
46 * Returns a newly talloc'ed string belonging to 'ctx'.
48 * Returns NULL if there is any error parsing the message-id. */
50 _parse_message_id (void *ctx, const char *message_id, const char **next)
55 if (message_id == NULL || *message_id == '\0')
60 skip_space_and_comments (&s);
62 /* Skip any unstructured text as well. */
63 while (*s && *s != '<')
74 skip_space_and_comments (&s);
77 while (*end && *end != '>')
86 if (end > s && *end == '>')
91 result = talloc_strndup (ctx, s, end - s + 1);
93 /* Finally, collapse any whitespace that is within the message-id
99 for (r = result, len = strlen (r); *r; r++, len--)
100 if (*r == ' ' || *r == '\t')
101 memmove (r, r+1, len);
107 /* Parse a References header value, putting a (talloc'ed under 'ctx')
108 * copy of each referenced message-id into 'hash'.
110 * We explicitly avoid including any reference identical to
111 * 'message_id' in the result (to avoid mass confusion when a single
112 * message references itself cyclically---and yes, mail messages are
113 * not infrequent in the wild that do this---don't ask me why).
115 * Return the last reference parsed, if it is not equal to message_id.
118 parse_references (void *ctx,
119 const char *message_id,
123 char *ref, *last_ref = NULL;
125 if (refs == NULL || *refs == '\0')
129 ref = _parse_message_id (ctx, refs, &refs);
131 if (ref && strcmp (ref, message_id)) {
132 g_hash_table_add (hash, ref);
137 /* The return value of this function is used to add a parent
138 * reference to the database. We should avoid making a message
139 * its own parent, thus the above check.
141 return talloc_strdup(ctx, last_ref);
145 _notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
147 /* 16 bytes (+ terminator) for hexadecimal representation of
148 * a 64-bit integer. */
149 static char thread_id[17];
150 Xapian::WritableDatabase *db;
152 db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
154 notmuch->last_thread_id++;
156 sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
158 db->set_metadata ("last_thread_id", thread_id);
164 _get_metadata_thread_id_key (void *ctx, const char *message_id)
166 if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
167 message_id = _notmuch_message_id_compressed (ctx, message_id);
169 return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
174 static notmuch_status_t
175 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
177 const char *message_id,
178 const char **thread_id_ret);
181 /* Find the thread ID to which the message with 'message_id' belongs.
183 * Note: 'thread_id_ret' must not be NULL!
184 * On success '*thread_id_ret' is set to a newly talloced string belonging to
187 * Note: If there is no message in the database with the given
188 * 'message_id' then a new thread_id will be allocated for this
189 * message ID and stored in the database metadata so that the
190 * thread ID can be looked up if the message is added to the database
193 static notmuch_status_t
194 _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
196 const char *message_id,
197 const char **thread_id_ret)
199 notmuch_private_status_t status;
200 notmuch_message_t *message;
202 if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
203 return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
206 /* Look for this message (regular or ghost) */
207 message = _notmuch_message_create_for_message_id (
208 notmuch, message_id, &status);
209 if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
211 *thread_id_ret = talloc_steal (
212 ctx, notmuch_message_get_thread_id (message));
213 } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
214 /* Message did not exist. Give it a fresh thread ID and
215 * populate this message as a ghost message. */
216 *thread_id_ret = talloc_strdup (
217 ctx, _notmuch_database_generate_thread_id (notmuch));
218 if (! *thread_id_ret) {
219 status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
221 status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
223 /* Commit the new ghost message */
224 _notmuch_message_sync (message);
227 /* Create failed. Fall through. */
230 notmuch_message_destroy (message);
232 return COERCE_STATUS (status, "Error creating ghost message");
235 /* Pre-ghost messages _resolve_message_id_to_thread_id */
236 static notmuch_status_t
237 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
239 const char *message_id,
240 const char **thread_id_ret)
242 notmuch_status_t status;
243 notmuch_message_t *message;
244 std::string thread_id_string;
246 Xapian::WritableDatabase *db;
248 status = notmuch_database_find_message (notmuch, message_id, &message);
254 *thread_id_ret = talloc_steal (ctx,
255 notmuch_message_get_thread_id (message));
257 notmuch_message_destroy (message);
259 return NOTMUCH_STATUS_SUCCESS;
262 /* Message has not been seen yet.
264 * We may have seen a reference to it already, in which case, we
265 * can return the thread ID stored in the metadata. Otherwise, we
266 * generate a new thread ID and store it there.
268 db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
269 metadata_key = _get_metadata_thread_id_key (ctx, message_id);
270 thread_id_string = notmuch->xapian_db->get_metadata (metadata_key);
272 if (thread_id_string.empty()) {
273 *thread_id_ret = talloc_strdup (ctx,
274 _notmuch_database_generate_thread_id (notmuch));
275 db->set_metadata (metadata_key, *thread_id_ret);
277 *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str());
280 talloc_free (metadata_key);
282 return NOTMUCH_STATUS_SUCCESS;
285 static notmuch_status_t
286 _merge_threads (notmuch_database_t *notmuch,
287 const char *winner_thread_id,
288 const char *loser_thread_id)
290 Xapian::PostingIterator loser, loser_end;
291 notmuch_message_t *message = NULL;
292 notmuch_private_status_t private_status;
293 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
295 _notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end);
297 for ( ; loser != loser_end; loser++) {
298 message = _notmuch_message_create (notmuch, notmuch,
299 *loser, &private_status);
300 if (message == NULL) {
301 ret = COERCE_STATUS (private_status,
302 "Cannot find document for doc_id from query");
306 _notmuch_message_remove_term (message, "thread", loser_thread_id);
307 _notmuch_message_add_term (message, "thread", winner_thread_id);
308 _notmuch_message_sync (message);
310 notmuch_message_destroy (message);
316 notmuch_message_destroy (message);
322 _my_talloc_free_for_g_hash (void *ptr)
327 static notmuch_status_t
328 _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
329 notmuch_message_t *message,
330 notmuch_message_file_t *message_file,
331 const char **thread_id)
333 GHashTable *parents = NULL;
334 const char *refs, *in_reply_to, *in_reply_to_message_id;
335 const char *last_ref_message_id, *this_message_id;
336 GList *l, *keys = NULL;
337 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
339 parents = g_hash_table_new_full (g_str_hash, g_str_equal,
340 _my_talloc_free_for_g_hash, NULL);
341 this_message_id = notmuch_message_get_message_id (message);
343 refs = _notmuch_message_file_get_header (message_file, "references");
344 last_ref_message_id = parse_references (message,
348 in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
349 in_reply_to_message_id = parse_references (message,
351 parents, in_reply_to);
353 /* For the parent of this message, use the last message ID of the
354 * References header, if available. If not, fall back to the
355 * first message ID in the In-Reply-To header. */
356 if (last_ref_message_id) {
357 _notmuch_message_add_term (message, "replyto",
358 last_ref_message_id);
359 } else if (in_reply_to_message_id) {
360 _notmuch_message_add_term (message, "replyto",
361 in_reply_to_message_id);
364 keys = g_hash_table_get_keys (parents);
365 for (l = keys; l; l = l->next) {
366 char *parent_message_id;
367 const char *parent_thread_id = NULL;
369 parent_message_id = (char *) l->data;
371 _notmuch_message_add_term (message, "reference",
374 ret = _resolve_message_id_to_thread_id (notmuch,
381 if (*thread_id == NULL) {
382 *thread_id = talloc_strdup (message, parent_thread_id);
383 _notmuch_message_add_term (message, "thread", *thread_id);
384 } else if (strcmp (*thread_id, parent_thread_id)) {
385 ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
395 g_hash_table_unref (parents);
400 static notmuch_status_t
401 _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
402 notmuch_message_t *message,
403 const char **thread_id)
405 const char *message_id = notmuch_message_get_message_id (message);
406 Xapian::PostingIterator child, children_end;
407 notmuch_message_t *child_message = NULL;
408 const char *child_thread_id;
409 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
410 notmuch_private_status_t private_status;
412 _notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end);
414 for ( ; child != children_end; child++) {
416 child_message = _notmuch_message_create (message, notmuch,
417 *child, &private_status);
418 if (child_message == NULL) {
419 ret = COERCE_STATUS (private_status,
420 "Cannot find document for doc_id from query");
424 child_thread_id = notmuch_message_get_thread_id (child_message);
425 if (*thread_id == NULL) {
426 *thread_id = talloc_strdup (message, child_thread_id);
427 _notmuch_message_add_term (message, "thread", *thread_id);
428 } else if (strcmp (*thread_id, child_thread_id)) {
429 _notmuch_message_remove_term (child_message, "reference",
431 _notmuch_message_sync (child_message);
432 ret = _merge_threads (notmuch, *thread_id, child_thread_id);
437 notmuch_message_destroy (child_message);
438 child_message = NULL;
443 notmuch_message_destroy (child_message);
448 /* Fetch and clear the stored thread_id for message, or NULL if none. */
450 _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
451 notmuch_message_t *message)
453 const char *message_id;
454 std::string stored_id;
457 message_id = notmuch_message_get_message_id (message);
458 metadata_key = _get_metadata_thread_id_key (ctx, message_id);
460 /* Check if we have already seen related messages to this one.
461 * If we have then use the thread_id that we stored at that time.
463 stored_id = notmuch->xapian_db->get_metadata (metadata_key);
464 if (stored_id.empty ()) {
467 Xapian::WritableDatabase *db;
469 db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
471 /* Clear the metadata for this message ID. We don't need it
473 db->set_metadata (metadata_key, "");
475 return talloc_strdup (ctx, stored_id.c_str ());
479 /* Given a blank or ghost 'message' and its corresponding
480 * 'message_file' link it to existing threads in the database.
482 * First, if is_ghost, this retrieves the thread ID already stored in
483 * the message (which will be the case if a message was previously
484 * added that referenced this one). If the message is blank
485 * (!is_ghost), it doesn't have a thread ID yet (we'll generate one
486 * later in this function). If the database does not support ghost
487 * messages, this checks for a thread ID stored in database metadata
488 * for this message ID.
490 * Second, we look at 'message_file' and its link-relevant headers
491 * (References and In-Reply-To) for message IDs.
493 * Finally, we look in the database for existing message that
494 * reference 'message'.
496 * In all cases, we assign to the current message the first thread ID
497 * found. We will also merge any existing, distinct threads where this
498 * message belongs to both, (which is not uncommon when messages are
499 * processed out of order).
501 * Finally, if no thread ID has been found through referenced messages, we
502 * call _notmuch_message_generate_thread_id to generate a new thread
503 * ID. This should only happen for new, top-level messages, (no
504 * References or In-Reply-To header in this message, and no previously
505 * added message refers to this message).
507 static notmuch_status_t
508 _notmuch_database_link_message (notmuch_database_t *notmuch,
509 notmuch_message_t *message,
510 notmuch_message_file_t *message_file,
511 notmuch_bool_t is_ghost)
513 void *local = talloc_new (NULL);
514 notmuch_status_t status;
515 const char *thread_id = NULL;
517 /* Check if the message already had a thread ID */
518 if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
520 thread_id = notmuch_message_get_thread_id (message);
522 thread_id = _consume_metadata_thread_id (local, notmuch, message);
524 _notmuch_message_add_term (message, "thread", thread_id);
527 status = _notmuch_database_link_message_to_parents (notmuch, message,
533 if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
534 /* In general, it shouldn't be necessary to link children,
535 * since the earlier indexing of those children will have
536 * stored a thread ID for the missing parent. However, prior
537 * to ghost messages, these stored thread IDs were NOT
538 * rewritten during thread merging (and there was no
539 * performant way to do so), so if indexed children were
540 * pulled into a different thread ID by a merge, it was
541 * necessary to pull them *back* into the stored thread ID of
542 * the parent. With ghost messages, we just rewrite the
543 * stored thread IDs during merging, so this workaround isn't
545 status = _notmuch_database_link_message_to_children (notmuch, message,
551 /* If not part of any existing thread, generate a new thread ID. */
552 if (thread_id == NULL) {
553 thread_id = _notmuch_database_generate_thread_id (notmuch);
555 _notmuch_message_add_term (message, "thread", thread_id);
565 notmuch_database_add_message (notmuch_database_t *notmuch,
566 const char *filename,
567 notmuch_message_t **message_ret)
569 notmuch_message_file_t *message_file;
570 notmuch_message_t *message = NULL;
571 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
572 notmuch_private_status_t private_status;
573 notmuch_bool_t is_ghost = FALSE, is_new = FALSE;
575 const char *date, *header;
576 const char *from, *to, *subject;
577 char *message_id = NULL;
582 ret = _notmuch_database_ensure_writable (notmuch);
586 message_file = _notmuch_message_file_open (notmuch, filename);
587 if (message_file == NULL)
588 return NOTMUCH_STATUS_FILE_ERROR;
590 /* Adding a message may change many documents. Do this all
592 ret = notmuch_database_begin_atomic (notmuch);
596 /* Parse message up front to get better error status. */
597 ret = _notmuch_message_file_parse (message_file);
601 /* Before we do any real work, (especially before doing a
602 * potential SHA-1 computation on the entire file's contents),
603 * let's make sure that what we're looking at looks like an
604 * actual email message.
606 from = _notmuch_message_file_get_header (message_file, "from");
607 subject = _notmuch_message_file_get_header (message_file, "subject");
608 to = _notmuch_message_file_get_header (message_file, "to");
610 if ((from == NULL || *from == '\0') &&
611 (subject == NULL || *subject == '\0') &&
612 (to == NULL || *to == '\0')) {
613 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
617 /* Now that we're sure it's mail, the first order of business
618 * is to find a message ID (or else create one ourselves).
620 header = _notmuch_message_file_get_header (message_file, "message-id");
621 if (header && *header != '\0') {
622 message_id = _parse_message_id (message_file, header, NULL);
624 /* So the header value isn't RFC-compliant, but it's
625 * better than no message-id at all.
627 if (message_id == NULL)
628 message_id = talloc_strdup (message_file, header);
631 if (message_id == NULL ) {
632 /* No message-id at all, let's generate one by taking a
633 * hash over the file's contents.
635 char *sha1 = _notmuch_sha1_of_file (filename);
637 /* If that failed too, something is really wrong. Give up. */
639 ret = NOTMUCH_STATUS_FILE_ERROR;
643 message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1);
648 /* Now that we have a message ID, we get a message object,
649 * (which may or may not reference an existing document in the
652 message = _notmuch_message_create_for_message_id (notmuch,
656 talloc_free (message_id);
658 /* We cannot call notmuch_message_get_flag for a new message */
659 switch (private_status) {
660 case NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
664 case NOTMUCH_PRIVATE_STATUS_SUCCESS:
665 is_ghost = notmuch_message_get_flag (message, NOTMUCH_MESSAGE_FLAG_GHOST);
669 ret = COERCE_STATUS (private_status,
670 "Unexpected status value from _notmuch_message_create_for_message_id");
674 _notmuch_message_add_filename (message, filename);
676 if (is_new || is_ghost) {
677 _notmuch_message_add_term (message, "type", "mail");
679 /* Convert ghost message to a regular message */
680 _notmuch_message_remove_term (message, "type", "ghost");
681 ret = _notmuch_database_link_message (notmuch, message,
682 message_file, is_ghost);
686 date = _notmuch_message_file_get_header (message_file, "date");
687 _notmuch_message_set_header_values (message, date, from, subject);
689 ret = _notmuch_message_index_file (message, message_file);
693 ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
696 _notmuch_message_sync (message);
697 } catch (const Xapian::Error &error) {
698 _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n",
699 error.get_msg().c_str());
700 notmuch->exception_reported = TRUE;
701 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
707 if ((ret == NOTMUCH_STATUS_SUCCESS ||
708 ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret)
709 *message_ret = message;
711 notmuch_message_destroy (message);
715 _notmuch_message_file_close (message_file);
717 ret2 = notmuch_database_end_atomic (notmuch);
718 if ((ret == NOTMUCH_STATUS_SUCCESS ||
719 ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) &&
720 ret2 != NOTMUCH_STATUS_SUCCESS)