1 /* database.cc - The database interfaces of the notmuch mail library
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
21 #include "database-private.h"
27 #include <glib.h> /* g_strdup_printf, g_free, GPtrArray, GHashTable */
31 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
38 /* With these prefix values we follow the conventions published here:
40 * http://xapian.org/docs/omega/termprefixes.html
42 * as much as makes sense. Note that I took some liberty in matching
43 * the reserved prefix values to notmuch concepts, (for example, 'G'
44 * is documented as "newsGroup (or similar entity - e.g. a web forum
45 * name)", for which I think the thread is the closest analogue in
46 * notmuch. This in spite of the fact that we will eventually be
47 * storing mailing-list messages where 'G' for "mailing list name"
48 * might be even a closer analogue. I'm treating the single-character
49 * prefixes preferentially for core notmuch concepts (which will be
50 * nearly universal to all mail messages).
53 prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
56 { "ref", "XREFERENCE" },
57 { "timestamp", "XTIMESTAMP" },
60 prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
66 _find_prefix (const char *name)
70 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++)
71 if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0)
72 return BOOLEAN_PREFIX_INTERNAL[i].prefix;
74 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++)
75 if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0)
76 return BOOLEAN_PREFIX_EXTERNAL[i].prefix;
78 fprintf (stderr, "Internal error: No prefix exists for '%s'\n", name);
85 notmuch_status_to_string (notmuch_status_t status)
88 case NOTMUCH_STATUS_SUCCESS:
89 return "No error occurred";
90 case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
91 return "A Xapian exception occurred";
92 case NOTMUCH_STATUS_FILE_ERROR:
93 return "Something went wrong trying to read or write a file";
94 case NOTMUCH_STATUS_FILE_NOT_EMAIL:
95 return "File is not an email";
96 case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
97 return "Message ID is identical to a message in database";
98 case NOTMUCH_STATUS_NULL_POINTER:
99 return "Erroneous NULL pointer";
100 case NOTMUCH_STATUS_TAG_TOO_LONG:
101 return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)";
103 case NOTMUCH_STATUS_LAST_STATUS:
104 return "Unknown error status value";
108 /* XXX: We should drop this function and convert all callers to call
109 * _notmuch_message_add_term instead. */
111 add_term (Xapian::Document doc,
112 const char *prefix_name,
121 prefix = _find_prefix (prefix_name);
123 term = g_strdup_printf ("%s%s", prefix, value);
125 if (strlen (term) <= NOTMUCH_TERM_MAX)
132 find_doc_ids (notmuch_database_t *notmuch,
133 const char *prefix_name,
135 Xapian::PostingIterator *begin,
136 Xapian::PostingIterator *end)
138 Xapian::PostingIterator i;
141 term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value);
143 *begin = notmuch->xapian_db->postlist_begin (term);
145 *end = notmuch->xapian_db->postlist_end (term);
150 static notmuch_private_status_t
151 find_unique_doc_id (notmuch_database_t *notmuch,
152 const char *prefix_name,
154 unsigned int *doc_id)
156 Xapian::PostingIterator i, end;
158 find_doc_ids (notmuch, prefix_name, value, &i, &end);
162 return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND;
165 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
169 static Xapian::Document
170 find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
172 return notmuch->xapian_db->get_document (doc_id);
175 static notmuch_private_status_t
176 find_unique_document (notmuch_database_t *notmuch,
177 const char *prefix_name,
179 Xapian::Document *document,
180 unsigned int *doc_id)
182 notmuch_private_status_t status;
184 status = find_unique_doc_id (notmuch, prefix_name, value, doc_id);
187 *document = Xapian::Document ();
191 *document = find_document_for_doc_id (notmuch, *doc_id);
192 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
196 insert_thread_id (GHashTable *thread_ids, Xapian::Document doc)
199 const char *value, *id, *comma;
201 value_string = doc.get_value (NOTMUCH_VALUE_THREAD);
202 value = value_string.c_str();
203 if (strlen (value)) {
206 comma = strchr (id, ',');
208 comma = id + strlen (id);
209 g_hash_table_insert (thread_ids,
210 strndup (id, comma - id), NULL);
219 notmuch_database_find_message (notmuch_database_t *notmuch,
220 const char *message_id)
222 notmuch_private_status_t status;
225 status = find_unique_doc_id (notmuch, "id", message_id, &doc_id);
227 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
230 return _notmuch_message_create (notmuch, notmuch, doc_id);
233 /* Return one or more thread_ids, (as a GPtrArray of strings), for the
234 * given message based on looking into the database for any messages
235 * referenced in parents, and also for any messages in the database
236 * referencing message_id.
238 * Caller should free all strings in the array and the array itself,
239 * (g_ptr_array_free) when done. */
241 find_thread_ids (notmuch_database_t *notmuch,
243 const char *message_id)
245 Xapian::PostingIterator child, children_end;
246 Xapian::Document doc;
247 GHashTable *thread_ids;
250 const char *parent_message_id;
253 thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal,
256 find_doc_ids (notmuch, "ref", message_id, &child, &children_end);
257 for ( ; child != children_end; child++) {
258 doc = find_document_for_doc_id (notmuch, *child);
259 insert_thread_id (thread_ids, doc);
262 for (i = 0; i < parents->len; i++) {
263 notmuch_message_t *parent;
264 notmuch_thread_ids_t *ids;
266 parent_message_id = (char *) g_ptr_array_index (parents, i);
267 parent = notmuch_database_find_message (notmuch, parent_message_id);
271 for (ids = notmuch_message_get_thread_ids (parent);
272 notmuch_thread_ids_has_more (ids);
273 notmuch_thread_ids_advance (ids))
277 id = notmuch_thread_ids_get (ids);
278 g_hash_table_insert (thread_ids, strdup (id), NULL);
281 notmuch_message_destroy (parent);
284 result = g_ptr_array_new ();
286 keys = g_hash_table_get_keys (thread_ids);
287 for (l = keys; l; l = l->next) {
288 char *id = (char *) l->data;
289 g_ptr_array_add (result, id);
293 /* We're done with the hash table, but we've taken the pointers to
294 * the allocated strings and put them into our result array, so
295 * tell the hash not to free them on its way out. */
296 g_hash_table_steal_all (thread_ids);
297 g_hash_table_unref (thread_ids);
302 /* Advance 'str' past any whitespace or RFC 822 comments. A comment is
303 * a (potentially nested) parenthesized sequence with '\' used to
304 * escape any character (including parentheses).
306 * If the sequence to be skipped continues to the end of the string,
307 * then 'str' will be left pointing at the final terminating '\0'
311 skip_space_and_comments (const char **str)
316 while (*s && (isspace (*s) || *s == '(')) {
317 while (*s && isspace (*s))
322 while (*s && nesting) {
338 /* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
339 * comments, and the '<' and '>' delimeters.
341 * If not NULL, then *next will be made to point to the first character
342 * not parsed, (possibly pointing to the final '\0' terminator.
344 * Returns a newly allocated string which the caller should free()
347 * Returns NULL if there is any error parsing the message-id. */
349 parse_message_id (const char *message_id, const char **next)
354 if (message_id == NULL)
359 skip_space_and_comments (&s);
361 /* Skip any unstructured text as well. */
362 while (*s && *s != '<')
373 skip_space_and_comments (&s);
376 while (*end && *end != '>')
385 if (end > s && *end == '>')
390 result = strndup (s, end - s + 1);
392 /* Finally, collapse any whitespace that is within the message-id
398 for (r = result, len = strlen (r); *r; r++, len--)
399 if (*r == ' ' || *r == '\t')
400 memmove (r, r+1, len);
406 /* Parse a References header value, putting a copy of each referenced
407 * message-id into 'array'. */
409 parse_references (GPtrArray *array,
418 ref = parse_message_id (refs, &refs);
421 g_ptr_array_add (array, ref);
426 notmuch_database_default_path (void)
428 if (getenv ("NOTMUCH_BASE"))
429 return strdup (getenv ("NOTMUCH_BASE"));
431 return g_strdup_printf ("%s/mail", getenv ("HOME"));
435 notmuch_database_create (const char *path)
437 notmuch_database_t *notmuch = NULL;
438 char *notmuch_path = NULL;
441 char *local_path = NULL;
444 path = local_path = notmuch_database_default_path ();
446 err = stat (path, &st);
448 fprintf (stderr, "Error: Cannot create database at %s: %s.\n",
449 path, strerror (errno));
453 if (! S_ISDIR (st.st_mode)) {
454 fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n",
459 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
461 err = mkdir (notmuch_path, 0755);
464 fprintf (stderr, "Error: Cannot create directory %s: %s.\n",
465 notmuch_path, strerror (errno));
469 notmuch = notmuch_database_open (path);
481 notmuch_database_open (const char *path)
483 notmuch_database_t *notmuch = NULL;
484 char *notmuch_path = NULL, *xapian_path = NULL;
487 char *local_path = NULL;
491 path = local_path = notmuch_database_default_path ();
493 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
495 err = stat (notmuch_path, &st);
497 fprintf (stderr, "Error opening database at %s: %s\n",
498 notmuch_path, strerror (errno));
502 xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian");
504 notmuch = talloc (NULL, notmuch_database_t);
505 notmuch->path = talloc_strdup (notmuch, path);
508 notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
509 Xapian::DB_CREATE_OR_OPEN);
510 notmuch->query_parser = new Xapian::QueryParser;
511 notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
512 notmuch->query_parser->set_database (*notmuch->xapian_db);
514 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
515 prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i];
516 notmuch->query_parser->add_boolean_prefix (prefix->name,
519 } catch (const Xapian::Error &error) {
520 fprintf (stderr, "A Xapian exception occurred: %s\n",
521 error.get_msg().c_str());
536 notmuch_database_close (notmuch_database_t *notmuch)
538 delete notmuch->query_parser;
539 delete notmuch->xapian_db;
540 talloc_free (notmuch);
544 notmuch_database_get_path (notmuch_database_t *notmuch)
546 return notmuch->path;
549 notmuch_private_status_t
550 find_timestamp_document (notmuch_database_t *notmuch, const char *db_key,
551 Xapian::Document *doc, unsigned int *doc_id)
553 return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id);
556 /* We allow the user to use arbitrarily long keys for timestamps,
557 * (they're for filesystem paths after all, which have no limit we
558 * know about). But we have a term-length limit. So if we exceed that,
559 * we'll use the SHA-1 of the user's key as the actual key for
560 * constructing a database term.
562 * Caution: This function returns a newly allocated string which the
563 * caller should free() when finished.
566 timestamp_db_key (const char *key)
568 int term_len = strlen (_find_prefix ("timestamp")) + strlen (key);
570 if (term_len > NOTMUCH_TERM_MAX)
571 return notmuch_sha1_of_string (key);
577 notmuch_database_set_timestamp (notmuch_database_t *notmuch,
578 const char *key, time_t timestamp)
580 Xapian::Document doc;
582 notmuch_private_status_t status;
583 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
586 db_key = timestamp_db_key (key);
589 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
591 doc.add_value (0, Xapian::sortable_serialise (timestamp));
593 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
594 char *term = talloc_asprintf (NULL, "%s%s",
595 _find_prefix ("timestamp"), db_key);
599 notmuch->xapian_db->add_document (doc);
601 notmuch->xapian_db->replace_document (doc_id, doc);
604 } catch (Xapian::Error &error) {
605 fprintf (stderr, "A Xapian exception occurred: %s.\n",
606 error.get_msg().c_str());
607 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
617 notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key)
619 Xapian::Document doc;
621 notmuch_private_status_t status;
625 db_key = timestamp_db_key (key);
628 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
630 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
633 ret = Xapian::sortable_unserialise (doc.get_value (0));
634 } catch (Xapian::Error &error) {
646 notmuch_database_add_message (notmuch_database_t *notmuch,
647 const char *filename)
649 notmuch_message_file_t *message_file;
650 notmuch_message_t *message;
651 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
653 GPtrArray *parents, *thread_ids;
655 const char *refs, *in_reply_to, *date, *header;
656 const char *from, *to, *subject, *old_filename;
661 message_file = notmuch_message_file_open (filename);
662 if (message_file == NULL) {
663 ret = NOTMUCH_STATUS_FILE_ERROR;
667 notmuch_message_file_restrict_headers (message_file,
678 /* The first order of business is to find/create a message ID. */
680 header = notmuch_message_file_get_header (message_file, "message-id");
682 message_id = parse_message_id (header, NULL);
683 /* So the header value isn't RFC-compliant, but it's
684 * better than no message-id at all. */
685 if (message_id == NULL)
686 message_id = xstrdup (header);
688 /* No message-id at all, let's generate one by taking a
689 * hash over the file's contents. */
690 char *sha1 = notmuch_sha1_of_file (filename);
692 /* If that failed too, something is really wrong. Give up. */
694 ret = NOTMUCH_STATUS_FILE_ERROR;
698 message_id = g_strdup_printf ("notmuch-sha1-%s", sha1);
702 /* Now that we have a message ID, we get a message object,
703 * (which may or may not reference an existing document in the
706 /* Use NULL for owner since we want to free this locally. */
708 /* XXX: This call can fail by either out-of-memory or an
709 * "impossible" Xapian exception. We should rewrite it to
710 * allow us to propagate the error status. */
711 message = _notmuch_message_create_for_message_id (NULL, notmuch,
713 if (message == NULL) {
714 fprintf (stderr, "Internal error. This shouldn't happen.\n\n");
715 fprintf (stderr, "I mean, it's possible you ran out of memory, but then this code path is still an internal error since it should have detected that and propagated the status value up the stack.\n");
719 /* Has a message previously been added with the same ID? */
720 old_filename = notmuch_message_get_filename (message);
721 if (old_filename && strlen (old_filename)) {
722 ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
725 _notmuch_message_set_filename (message, filename);
726 _notmuch_message_add_term (message, "type", "mail");
729 /* Next, find the thread(s) to which this message belongs. */
730 parents = g_ptr_array_new ();
732 refs = notmuch_message_file_get_header (message_file, "references");
733 parse_references (parents, refs);
735 in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
736 parse_references (parents, in_reply_to);
738 for (i = 0; i < parents->len; i++)
739 _notmuch_message_add_term (message, "ref",
740 (char *) g_ptr_array_index (parents, i));
742 thread_ids = find_thread_ids (notmuch, parents, message_id);
746 for (i = 0; i < parents->len; i++)
747 g_free (g_ptr_array_index (parents, i));
748 g_ptr_array_free (parents, TRUE);
750 if (thread_ids->len) {
755 for (i = 0; i < thread_ids->len; i++) {
756 id = (char *) thread_ids->pdata[i];
757 _notmuch_message_add_thread_id (message, id);
759 thread_id = g_string_new (id);
761 g_string_append_printf (thread_id, ",%s", id);
765 g_string_free (thread_id, TRUE);
767 _notmuch_message_ensure_thread_id (message);
770 g_ptr_array_free (thread_ids, TRUE);
772 date = notmuch_message_file_get_header (message_file, "date");
773 _notmuch_message_set_date (message, date);
775 from = notmuch_message_file_get_header (message_file, "from");
776 subject = notmuch_message_file_get_header (message_file, "subject");
777 to = notmuch_message_file_get_header (message_file, "to");
783 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
786 _notmuch_message_sync (message);
788 } catch (const Xapian::Error &error) {
789 fprintf (stderr, "A Xapian exception occurred: %s.\n",
790 error.get_msg().c_str());
791 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
797 notmuch_message_destroy (message);
799 notmuch_message_file_close (message_file);