1 /* database.cc - The database interfaces of the notmuch mail library
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
21 #include "database-private.h"
27 #include <glib.h> /* g_strdup_printf, g_free, GPtrArray, GHashTable */
31 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
38 /* With these prefix values we follow the conventions published here:
40 * http://xapian.org/docs/omega/termprefixes.html
42 * as much as makes sense. Note that I took some liberty in matching
43 * the reserved prefix values to notmuch concepts, (for example, 'G'
44 * is documented as "newsGroup (or similar entity - e.g. a web forum
45 * name)", for which I think the thread is the closest analogue in
46 * notmuch. This in spite of the fact that we will eventually be
47 * storing mailing-list messages where 'G' for "mailing list name"
48 * might be even a closer analogue. I'm treating the single-character
49 * prefixes preferentially for core notmuch concepts (which will be
50 * nearly universal to all mail messages).
53 prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
56 { "ref", "XREFERENCE" },
57 { "timestamp", "XTIMESTAMP" },
60 prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
66 _find_prefix (const char *name)
70 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++)
71 if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0)
72 return BOOLEAN_PREFIX_INTERNAL[i].prefix;
74 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++)
75 if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0)
76 return BOOLEAN_PREFIX_EXTERNAL[i].prefix;
78 fprintf (stderr, "Internal error: No prefix exists for '%s'\n", name);
85 notmuch_status_to_string (notmuch_status_t status)
88 case NOTMUCH_STATUS_SUCCESS:
89 return "No error occurred";
90 case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
91 return "A Xapian exception occurred";
92 case NOTMUCH_STATUS_FILE_ERROR:
93 return "Something went wrong trying to read or write a file";
94 case NOTMUCH_STATUS_FILE_NOT_EMAIL:
95 return "File is not an email";
96 case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
97 return "Message ID is identical to a message in database";
98 case NOTMUCH_STATUS_NULL_POINTER:
99 return "Erroneous NULL pointer";
100 case NOTMUCH_STATUS_TAG_TOO_LONG:
101 return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)";
103 case NOTMUCH_STATUS_LAST_STATUS:
104 return "Unknown error status value";
108 /* XXX: We should drop this function and convert all callers to call
109 * _notmuch_message_add_term instead. */
111 add_term (Xapian::Document doc,
112 const char *prefix_name,
121 prefix = _find_prefix (prefix_name);
123 term = g_strdup_printf ("%s%s", prefix, value);
125 if (strlen (term) <= NOTMUCH_TERM_MAX)
132 find_doc_ids (notmuch_database_t *notmuch,
133 const char *prefix_name,
135 Xapian::PostingIterator *begin,
136 Xapian::PostingIterator *end)
138 Xapian::PostingIterator i;
141 term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value);
143 *begin = notmuch->xapian_db->postlist_begin (term);
145 *end = notmuch->xapian_db->postlist_end (term);
150 static notmuch_private_status_t
151 find_unique_doc_id (notmuch_database_t *notmuch,
152 const char *prefix_name,
154 unsigned int *doc_id)
156 Xapian::PostingIterator i, end;
158 find_doc_ids (notmuch, prefix_name, value, &i, &end);
162 return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND;
165 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
169 static Xapian::Document
170 find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
172 return notmuch->xapian_db->get_document (doc_id);
175 static notmuch_private_status_t
176 find_unique_document (notmuch_database_t *notmuch,
177 const char *prefix_name,
179 Xapian::Document *document,
180 unsigned int *doc_id)
182 notmuch_private_status_t status;
184 status = find_unique_doc_id (notmuch, prefix_name, value, doc_id);
187 *document = Xapian::Document ();
191 *document = find_document_for_doc_id (notmuch, *doc_id);
192 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
195 /* XXX: Should rewrite this to accept a notmuch_message_t* instead of
196 * a Xapian:Document and then we could just use
197 * notmuch_message_get_thread_ids instead of duplicating its logic
200 insert_thread_id (GHashTable *thread_ids, Xapian::Document doc)
203 Xapian::TermIterator i;
204 const char *prefix_str = _find_prefix ("thread");
207 assert (strlen (prefix_str) == 1);
209 prefix = *prefix_str;
211 i = doc.termlist_begin ();
212 i.skip_to (prefix_str);
215 if (i == doc.termlist_end ())
218 if (value_string.empty () || value_string[0] != prefix)
220 g_hash_table_insert (thread_ids,
221 strdup (value_string.c_str () + 1), NULL);
227 notmuch_database_find_message (notmuch_database_t *notmuch,
228 const char *message_id)
230 notmuch_private_status_t status;
233 status = find_unique_doc_id (notmuch, "id", message_id, &doc_id);
235 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
238 return _notmuch_message_create (notmuch, notmuch, doc_id);
241 /* Return one or more thread_ids, (as a GPtrArray of strings), for the
242 * given message based on looking into the database for any messages
243 * referenced in parents, and also for any messages in the database
244 * referencing message_id.
246 * Caller should free all strings in the array and the array itself,
247 * (g_ptr_array_free) when done. */
249 find_thread_ids (notmuch_database_t *notmuch,
251 const char *message_id)
253 Xapian::PostingIterator child, children_end;
254 Xapian::Document doc;
255 GHashTable *thread_ids;
258 const char *parent_message_id;
261 thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal,
264 find_doc_ids (notmuch, "ref", message_id, &child, &children_end);
265 for ( ; child != children_end; child++) {
266 doc = find_document_for_doc_id (notmuch, *child);
267 insert_thread_id (thread_ids, doc);
270 for (i = 0; i < parents->len; i++) {
271 notmuch_message_t *parent;
272 notmuch_thread_ids_t *ids;
274 parent_message_id = (char *) g_ptr_array_index (parents, i);
275 parent = notmuch_database_find_message (notmuch, parent_message_id);
279 for (ids = notmuch_message_get_thread_ids (parent);
280 notmuch_thread_ids_has_more (ids);
281 notmuch_thread_ids_advance (ids))
285 id = notmuch_thread_ids_get (ids);
286 g_hash_table_insert (thread_ids, strdup (id), NULL);
289 notmuch_message_destroy (parent);
292 result = g_ptr_array_new ();
294 keys = g_hash_table_get_keys (thread_ids);
295 for (l = keys; l; l = l->next) {
296 char *id = (char *) l->data;
297 g_ptr_array_add (result, id);
301 /* We're done with the hash table, but we've taken the pointers to
302 * the allocated strings and put them into our result array, so
303 * tell the hash not to free them on its way out. */
304 g_hash_table_steal_all (thread_ids);
305 g_hash_table_unref (thread_ids);
310 /* Advance 'str' past any whitespace or RFC 822 comments. A comment is
311 * a (potentially nested) parenthesized sequence with '\' used to
312 * escape any character (including parentheses).
314 * If the sequence to be skipped continues to the end of the string,
315 * then 'str' will be left pointing at the final terminating '\0'
319 skip_space_and_comments (const char **str)
324 while (*s && (isspace (*s) || *s == '(')) {
325 while (*s && isspace (*s))
330 while (*s && nesting) {
346 /* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
347 * comments, and the '<' and '>' delimeters.
349 * If not NULL, then *next will be made to point to the first character
350 * not parsed, (possibly pointing to the final '\0' terminator.
352 * Returns a newly allocated string which the caller should free()
355 * Returns NULL if there is any error parsing the message-id. */
357 parse_message_id (const char *message_id, const char **next)
362 if (message_id == NULL)
367 skip_space_and_comments (&s);
369 /* Skip any unstructured text as well. */
370 while (*s && *s != '<')
381 skip_space_and_comments (&s);
384 while (*end && *end != '>')
393 if (end > s && *end == '>')
398 result = strndup (s, end - s + 1);
400 /* Finally, collapse any whitespace that is within the message-id
406 for (r = result, len = strlen (r); *r; r++, len--)
407 if (*r == ' ' || *r == '\t')
408 memmove (r, r+1, len);
414 /* Parse a References header value, putting a copy of each referenced
415 * message-id into 'array'. */
417 parse_references (GPtrArray *array,
426 ref = parse_message_id (refs, &refs);
429 g_ptr_array_add (array, ref);
434 notmuch_database_default_path (void)
436 if (getenv ("NOTMUCH_BASE"))
437 return strdup (getenv ("NOTMUCH_BASE"));
439 return g_strdup_printf ("%s/mail", getenv ("HOME"));
443 notmuch_database_create (const char *path)
445 notmuch_database_t *notmuch = NULL;
446 char *notmuch_path = NULL;
449 char *local_path = NULL;
452 path = local_path = notmuch_database_default_path ();
454 err = stat (path, &st);
456 fprintf (stderr, "Error: Cannot create database at %s: %s.\n",
457 path, strerror (errno));
461 if (! S_ISDIR (st.st_mode)) {
462 fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n",
467 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
469 err = mkdir (notmuch_path, 0755);
472 fprintf (stderr, "Error: Cannot create directory %s: %s.\n",
473 notmuch_path, strerror (errno));
477 notmuch = notmuch_database_open (path);
489 notmuch_database_open (const char *path)
491 notmuch_database_t *notmuch = NULL;
492 char *notmuch_path = NULL, *xapian_path = NULL;
495 char *local_path = NULL;
499 path = local_path = notmuch_database_default_path ();
501 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
503 err = stat (notmuch_path, &st);
505 fprintf (stderr, "Error opening database at %s: %s\n",
506 notmuch_path, strerror (errno));
510 xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian");
512 notmuch = talloc (NULL, notmuch_database_t);
513 notmuch->path = talloc_strdup (notmuch, path);
516 notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
517 Xapian::DB_CREATE_OR_OPEN);
518 notmuch->query_parser = new Xapian::QueryParser;
519 notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
520 notmuch->query_parser->set_database (*notmuch->xapian_db);
522 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
523 prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i];
524 notmuch->query_parser->add_boolean_prefix (prefix->name,
527 } catch (const Xapian::Error &error) {
528 fprintf (stderr, "A Xapian exception occurred: %s\n",
529 error.get_msg().c_str());
544 notmuch_database_close (notmuch_database_t *notmuch)
546 delete notmuch->query_parser;
547 delete notmuch->xapian_db;
548 talloc_free (notmuch);
552 notmuch_database_get_path (notmuch_database_t *notmuch)
554 return notmuch->path;
557 notmuch_private_status_t
558 find_timestamp_document (notmuch_database_t *notmuch, const char *db_key,
559 Xapian::Document *doc, unsigned int *doc_id)
561 return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id);
564 /* We allow the user to use arbitrarily long keys for timestamps,
565 * (they're for filesystem paths after all, which have no limit we
566 * know about). But we have a term-length limit. So if we exceed that,
567 * we'll use the SHA-1 of the user's key as the actual key for
568 * constructing a database term.
570 * Caution: This function returns a newly allocated string which the
571 * caller should free() when finished.
574 timestamp_db_key (const char *key)
576 int term_len = strlen (_find_prefix ("timestamp")) + strlen (key);
578 if (term_len > NOTMUCH_TERM_MAX)
579 return notmuch_sha1_of_string (key);
585 notmuch_database_set_timestamp (notmuch_database_t *notmuch,
586 const char *key, time_t timestamp)
588 Xapian::Document doc;
590 notmuch_private_status_t status;
591 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
594 db_key = timestamp_db_key (key);
597 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
599 doc.add_value (NOTMUCH_VALUE_TIMESTAMP,
600 Xapian::sortable_serialise (timestamp));
602 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
603 char *term = talloc_asprintf (NULL, "%s%s",
604 _find_prefix ("timestamp"), db_key);
608 notmuch->xapian_db->add_document (doc);
610 notmuch->xapian_db->replace_document (doc_id, doc);
613 } catch (Xapian::Error &error) {
614 fprintf (stderr, "A Xapian exception occurred: %s.\n",
615 error.get_msg().c_str());
616 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
626 notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key)
628 Xapian::Document doc;
630 notmuch_private_status_t status;
634 db_key = timestamp_db_key (key);
637 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
639 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
642 ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP));
643 } catch (Xapian::Error &error) {
655 notmuch_database_add_message (notmuch_database_t *notmuch,
656 const char *filename)
658 notmuch_message_file_t *message_file;
659 notmuch_message_t *message;
660 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
662 GPtrArray *parents, *thread_ids;
664 const char *refs, *in_reply_to, *date, *header;
665 const char *from, *to, *subject, *old_filename;
670 message_file = notmuch_message_file_open (filename);
671 if (message_file == NULL) {
672 ret = NOTMUCH_STATUS_FILE_ERROR;
676 notmuch_message_file_restrict_headers (message_file,
687 /* The first order of business is to find/create a message ID. */
689 header = notmuch_message_file_get_header (message_file, "message-id");
691 message_id = parse_message_id (header, NULL);
692 /* So the header value isn't RFC-compliant, but it's
693 * better than no message-id at all. */
694 if (message_id == NULL)
695 message_id = xstrdup (header);
697 /* No message-id at all, let's generate one by taking a
698 * hash over the file's contents. */
699 char *sha1 = notmuch_sha1_of_file (filename);
701 /* If that failed too, something is really wrong. Give up. */
703 ret = NOTMUCH_STATUS_FILE_ERROR;
707 message_id = g_strdup_printf ("notmuch-sha1-%s", sha1);
711 /* Now that we have a message ID, we get a message object,
712 * (which may or may not reference an existing document in the
715 /* Use NULL for owner since we want to free this locally. */
717 /* XXX: This call can fail by either out-of-memory or an
718 * "impossible" Xapian exception. We should rewrite it to
719 * allow us to propagate the error status. */
720 message = _notmuch_message_create_for_message_id (NULL, notmuch,
722 if (message == NULL) {
723 fprintf (stderr, "Internal error. This shouldn't happen.\n\n");
724 fprintf (stderr, "I mean, it's possible you ran out of memory, but then this code path is still an internal error since it should have detected that and propagated the status value up the stack.\n");
728 /* Has a message previously been added with the same ID? */
729 old_filename = notmuch_message_get_filename (message);
730 if (old_filename && strlen (old_filename)) {
731 ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
734 _notmuch_message_set_filename (message, filename);
735 _notmuch_message_add_term (message, "type", "mail");
738 /* Next, find the thread(s) to which this message belongs. */
739 parents = g_ptr_array_new ();
741 refs = notmuch_message_file_get_header (message_file, "references");
742 parse_references (parents, refs);
744 in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
745 parse_references (parents, in_reply_to);
747 for (i = 0; i < parents->len; i++)
748 _notmuch_message_add_term (message, "ref",
749 (char *) g_ptr_array_index (parents, i));
751 thread_ids = find_thread_ids (notmuch, parents, message_id);
755 for (i = 0; i < parents->len; i++)
756 g_free (g_ptr_array_index (parents, i));
757 g_ptr_array_free (parents, TRUE);
759 if (thread_ids->len) {
764 for (i = 0; i < thread_ids->len; i++) {
765 id = (char *) thread_ids->pdata[i];
766 _notmuch_message_add_thread_id (message, id);
768 thread_id = g_string_new (id);
770 g_string_append_printf (thread_id, ",%s", id);
774 g_string_free (thread_id, TRUE);
776 _notmuch_message_ensure_thread_id (message);
779 g_ptr_array_free (thread_ids, TRUE);
781 date = notmuch_message_file_get_header (message_file, "date");
782 _notmuch_message_set_date (message, date);
784 from = notmuch_message_file_get_header (message_file, "from");
785 subject = notmuch_message_file_get_header (message_file, "subject");
786 to = notmuch_message_file_get_header (message_file, "to");
792 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
795 _notmuch_message_sync (message);
797 } catch (const Xapian::Error &error) {
798 fprintf (stderr, "A Xapian exception occurred: %s.\n",
799 error.get_msg().c_str());
800 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
806 notmuch_message_destroy (message);
808 notmuch_message_file_close (message_file);