X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=database.cc;h=c5d774763307ab8b9c1b862ef7c0b4371d586511;hp=36b1b5808f31c7786c40b14f381ed817307bf771;hb=c78358fa8adc0e8fa4f889e479df76e8df186264;hpb=10c176ba0e6d71e920b72a3165c0e56f26b5e4b3 diff --git a/database.cc b/database.cc index 36b1b580..c5d77476 100644 --- a/database.cc +++ b/database.cc @@ -18,130 +18,40 @@ * Author: Carl Worth */ -#include "notmuch-private.h" - -#include -#include -#include -#include -#include -#include -#include +#include "database-private.h" #include -#include - #include -using namespace std; - -struct _notmuch_database { - char *path; - Xapian::WritableDatabase *xapian_db; - Xapian::TermGenerator *term_gen; -}; - -#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) - -/* Xapian complains if we provide a term longer than this. */ -#define NOTMUCH_MAX_TERM 245 - -/* These prefix values are specifically chosen to be compatible - * with sup, (http://sup.rubyforge.org), written by - * William Morgan , and released - * under the GNU GPL v2. - */ - -typedef struct { - const char *name; - const char *prefix; -} prefix_t; - -prefix_t NORMAL_PREFIX[] = { - { "subject", "S" }, - { "body", "B" }, - { "from_name", "FN" }, - { "to_name", "TN" }, - { "name", "N" }, - { "attachment", "A" } -}; - -prefix_t BOOLEAN_PREFIX[] = { - { "type", "K" }, - { "from_email", "FE" }, - { "to_email", "TE" }, - { "email", "E" }, - { "date", "D" }, - { "label", "L" }, - { "source_id", "I" }, - { "attachment_extension", "O" }, - { "msgid", "Q" }, - { "thread", "H" }, - { "ref", "R" } -}; - -/* Similarly, these value numbers are also chosen to be sup - * compatible. */ - -typedef enum { - NOTMUCH_VALUE_MESSAGE_ID = 0, - NOTMUCH_VALUE_THREAD = 1, - NOTMUCH_VALUE_DATE = 2 -} notmuch_value_t; - -static const char * -find_prefix (const char *name) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE (NORMAL_PREFIX); i++) - if (strcmp (name, NORMAL_PREFIX[i].name) == 0) - return NORMAL_PREFIX[i].prefix; - - for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++) - if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0) - return BOOLEAN_PREFIX[i].prefix; +#include /* g_strdup_printf, g_free, GPtrArray, GHashTable */ - return ""; -} - -/* "128 bits of thread-id ought to be enough for anybody" */ -#define NOTMUCH_THREAD_ID_BITS 128 -#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4) -typedef struct _thread_id { - char str[NOTMUCH_THREAD_ID_DIGITS + 1]; -} thread_id_t; +using namespace std; -static void -thread_id_generate (thread_id_t *thread_id) +const char * +notmuch_status_to_string (notmuch_status_t status) { - static int seeded = 0; - FILE *dev_random; - uint32_t value; - char *s; - int i; - - if (! seeded) { - dev_random = fopen ("/dev/random", "r"); - if (dev_random == NULL) { - srand (time (NULL)); - } else { - fread ((void *) &value, sizeof (value), 1, dev_random); - srand (value); - fclose (dev_random); - } - seeded = 1; - } - - s = thread_id->str; - for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) { - value = rand (); - sprintf (s, "%08x", value); - s += 8; + switch (status) { + case NOTMUCH_STATUS_SUCCESS: + return "No error occurred"; + case NOTMUCH_STATUS_XAPIAN_EXCEPTION: + return "A Xapian exception occurred"; + case NOTMUCH_STATUS_FILE_ERROR: + return "Something went wrong trying to read or write a file"; + case NOTMUCH_STATUS_FILE_NOT_EMAIL: + return "File is not an email"; + case NOTMUCH_STATUS_NULL_POINTER: + return "Erroneous NULL pointer"; + case NOTMUCH_STATUS_TAG_TOO_LONG: + return "Tag value is too long"; + default: + case NOTMUCH_STATUS_LAST_STATUS: + return "Unknown error status value"; } } +/* XXX: We should drop this function and convert all callers to call + * _notmuch_message_add_term instead. */ static void add_term (Xapian::Document doc, const char *prefix_name, @@ -153,11 +63,11 @@ add_term (Xapian::Document doc, if (value == NULL) return; - prefix = find_prefix (prefix_name); + prefix = _find_prefix (prefix_name); term = g_strdup_printf ("%s%s", prefix, value); - if (strlen (term) <= NOTMUCH_MAX_TERM) + if (strlen (term) <= NOTMUCH_TERM_MAX) doc.add_term (term); g_free (term); @@ -173,7 +83,7 @@ find_messages_by_term (Xapian::Database *db, Xapian::PostingIterator i; char *term; - term = g_strdup_printf ("%s%s", find_prefix (prefix_name), value); + term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value); *begin = db->postlist_begin (term); @@ -189,19 +99,6 @@ find_message_by_docid (Xapian::Database *db, Xapian::docid docid) return db->get_document (docid); } -Xapian::Document -find_message_by_message_id (Xapian::Database *db, const char *message_id) -{ - Xapian::PostingIterator i, end; - - find_messages_by_term (db, "msgid", message_id, &i, &end); - - if (i != end) - return find_message_by_docid (db, *i); - else - return Xapian::Document (); -} - static void insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) { @@ -225,6 +122,21 @@ insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) } } +notmuch_message_t * +notmuch_database_find_message (notmuch_database_t *notmuch, + const char *message_id) +{ + Xapian::PostingIterator i, end; + + find_messages_by_term (notmuch->xapian_db, + "msgid", message_id, &i, &end); + + if (i == end) + return NULL; + + return _notmuch_message_create (notmuch, notmuch, *i); +} + /* Return one or more thread_ids, (as a GPtrArray of strings), for the * given message based on looking into the database for any messages * referenced in parents, and also for any messages in the database @@ -233,10 +145,11 @@ insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) * Caller should free all strings in the array and the array itself, * (g_ptr_array_free) when done. */ static GPtrArray * -find_thread_ids (Xapian::Database *db, +find_thread_ids (notmuch_database_t *notmuch, GPtrArray *parents, const char *message_id) { + Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::PostingIterator child, children_end; Xapian::Document doc; GHashTable *thread_ids; @@ -255,9 +168,25 @@ find_thread_ids (Xapian::Database *db, } for (i = 0; i < parents->len; i++) { + notmuch_message_t *parent; + notmuch_thread_ids_t *ids; + parent_message_id = (char *) g_ptr_array_index (parents, i); - doc = find_message_by_message_id (db, parent_message_id); - insert_thread_id (thread_ids, doc); + parent = notmuch_database_find_message (notmuch, parent_message_id); + if (parent == NULL) + continue; + + for (ids = notmuch_message_get_thread_ids (parent); + notmuch_thread_ids_has_more (ids); + notmuch_thread_ids_advance (ids)) + { + const char *id; + + id = notmuch_thread_ids_get (ids); + g_hash_table_insert (thread_ids, strdup (id), NULL); + } + + notmuch_message_destroy (parent); } result = g_ptr_array_new (); @@ -278,46 +207,161 @@ find_thread_ids (Xapian::Database *db, return result; } -/* Add a term for each message-id in the References header of the - * message. */ +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') + nesting++; + else if (*s == ')') + nesting--; + else if (*s == '\\') + if (*(s+1)) + s++; + s++; + } + } + } + + *str = s; +} + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimeters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly allocated string which the caller should free() + * when done with it. + * + * Returns NULL if there is any error parsing the message-id. */ +static char * +parse_message_id (const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL) + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = strndup (s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} + +/* Parse a References header value, putting a copy of each referenced + * message-id into 'array'. */ static void parse_references (GPtrArray *array, - const char *refs_str) + const char *refs) { - GMimeReferences *refs, *r; - const char *message_id; + char *ref; - if (refs_str == NULL) + if (refs == NULL) return; - refs = g_mime_references_decode (refs_str); + while (*refs) { + ref = parse_message_id (refs, &refs); - for (r = refs; r; r = r->next) { - message_id = g_mime_references_get_message_id (r); - g_ptr_array_add (array, g_strdup (message_id)); + if (ref) + g_ptr_array_add (array, ref); } +} + +char * +notmuch_database_default_path (void) +{ + if (getenv ("NOTMUCH_BASE")) + return strdup (getenv ("NOTMUCH_BASE")); - g_mime_references_free (refs); + return g_strdup_printf ("%s/mail", getenv ("HOME")); } notmuch_database_t * notmuch_database_create (const char *path) { - char *notmuch_path; + notmuch_database_t *notmuch = NULL; + char *notmuch_path = NULL; struct stat st; int err; + char *local_path = NULL; + + if (path == NULL) + path = local_path = notmuch_database_default_path (); err = stat (path, &st); if (err) { fprintf (stderr, "Error: Cannot create database at %s: %s.\n", path, strerror (errno)); - return NULL; + goto DONE; } if (! S_ISDIR (st.st_mode)) { fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n", path); - return NULL; + goto DONE; } notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); @@ -327,52 +371,64 @@ notmuch_database_create (const char *path) if (err) { fprintf (stderr, "Error: Cannot create directory %s: %s.\n", notmuch_path, strerror (errno)); - free (notmuch_path); - return NULL; + goto DONE; } - free (notmuch_path); + notmuch = notmuch_database_open (path); + + DONE: + if (notmuch_path) + free (notmuch_path); + if (local_path) + free (local_path); - return notmuch_database_open (path); + return notmuch; } notmuch_database_t * notmuch_database_open (const char *path) { - notmuch_database_t *notmuch; - char *notmuch_path, *xapian_path; + notmuch_database_t *notmuch = NULL; + char *notmuch_path = NULL, *xapian_path = NULL; struct stat st; int err; + char *local_path = NULL; - g_mime_init (0); + if (path == NULL) + path = local_path = notmuch_database_default_path (); notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch"); err = stat (notmuch_path, &st); if (err) { - fprintf (stderr, "Error: Cannot stat %s: %s\n", - notmuch_path, strerror (err)); - free (notmuch_path); - return NULL; + fprintf (stderr, "Error opening database at %s: %s\n", + notmuch_path, strerror (errno)); + goto DONE; } xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian"); - free (notmuch_path); - /* C++ is so nasty in requiring these casts. I'm almost tempted to - * write a C wrapper for Xapian... */ - notmuch = (notmuch_database_t *) xmalloc (sizeof (notmuch_database_t)); - notmuch->path = xstrdup (path); + notmuch = talloc (NULL, notmuch_database_t); + notmuch->path = talloc_strdup (notmuch, path); try { notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); + notmuch->query_parser = new Xapian::QueryParser; + notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); + notmuch->query_parser->set_database (*notmuch->xapian_db); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); } - free (xapian_path); + DONE: + if (local_path) + free (local_path); + if (notmuch_path) + free (notmuch_path); + if (xapian_path) + free (xapian_path); return notmuch; } @@ -380,9 +436,9 @@ notmuch_database_open (const char *path) void notmuch_database_close (notmuch_database_t *notmuch) { + delete notmuch->query_parser; delete notmuch->xapian_db; - free (notmuch->path); - free (notmuch); + talloc_free (notmuch); } const char * @@ -397,59 +453,82 @@ notmuch_database_add_message (notmuch_database_t *notmuch, { Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::Document doc; + notmuch_message_file_t *message_file; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - GMimeStream *stream; - GMimeParser *parser; - GMimeMessage *message; GPtrArray *parents, *thread_ids; - FILE *file; + const char *refs, *in_reply_to, *date, *header; + const char *from, *to, *subject; + char *message_id; - const char *refs, *in_reply_to; - const char *message_id; - - time_t time; + time_t time_value; unsigned int i; - file = fopen (filename, "r"); - if (! file) { - fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); - exit (1); + message_file = notmuch_message_file_open (filename); + if (message_file == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; } - stream = g_mime_stream_file_new (file); - - parser = g_mime_parser_new_with_stream (stream); - - message = g_mime_parser_construct_message (parser); + notmuch_message_file_restrict_headers (message_file, + "date", + "from", + "in-reply-to", + "message-id", + "references", + "subject", + "to", + (char *) NULL); try { - doc = Xapian::Document (); - doc.set_data (filename); + add_term (doc, "type", "mail"); + parents = g_ptr_array_new (); - refs = g_mime_object_get_header (GMIME_OBJECT (message), "references"); + refs = notmuch_message_file_get_header (message_file, "references"); parse_references (parents, refs); - in_reply_to = g_mime_object_get_header (GMIME_OBJECT (message), - "in-reply-to"); + in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to"); parse_references (parents, in_reply_to); + for (i = 0; i < parents->len; i++) add_term (doc, "ref", (char *) g_ptr_array_index (parents, i)); - message_id = g_mime_message_get_message_id (message); + header = notmuch_message_file_get_header (message_file, "message-id"); + if (header) { + message_id = parse_message_id (header, NULL); + /* So the header value isn't RFC-compliant, but it's + * better than no message-id at all. */ + if (message_id == NULL) + message_id = xstrdup (header); + } else { + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. */ + char *sha1 = notmuch_sha1_of_file (filename); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } - thread_ids = find_thread_ids (db, parents, message_id); + message_id = g_strdup_printf ("notmuch-sha1-%s", sha1); + free (sha1); + } + + thread_ids = find_thread_ids (notmuch, parents, message_id); for (i = 0; i < parents->len; i++) g_free (g_ptr_array_index (parents, i)); g_ptr_array_free (parents, TRUE); - if (message_id) { - add_term (doc, "msgid", message_id); - doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); - } + + add_term (doc, "msgid", message_id); + doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + + free (message_id); if (thread_ids->len) { unsigned int i; @@ -466,10 +545,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch, free (id); } - g_ptr_array_free (thread_ids, TRUE); doc.add_value (NOTMUCH_VALUE_THREAD, thread_id->str); g_string_free (thread_id, TRUE); - } else if (message_id) { + } else { /* If not part of any existing thread, generate a new thread_id. */ thread_id_t thread_id; @@ -478,19 +556,37 @@ notmuch_database_add_message (notmuch_database_t *notmuch, doc.add_value (NOTMUCH_VALUE_THREAD, thread_id.str); } - g_mime_message_get_date (message, &time, NULL); - doc.add_value (NOTMUCH_VALUE_DATE, Xapian::sortable_serialise (time)); + g_ptr_array_free (thread_ids, TRUE); + + date = notmuch_message_file_get_header (message_file, "date"); + time_value = notmuch_parse_date (date, NULL); + + doc.add_value (NOTMUCH_VALUE_DATE, + Xapian::sortable_serialise (time_value)); - db->add_document (doc); + from = notmuch_message_file_get_header (message_file, "from"); + subject = notmuch_message_file_get_header (message_file, "subject"); + to = notmuch_message_file_get_header (message_file, "to"); + + if (from == NULL && + subject == NULL && + to == NULL) + { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } else { + db->add_document (doc); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); - return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + goto DONE; } - g_object_unref (message); - g_object_unref (parser); - g_object_unref (stream); + DONE: + if (message_file) + notmuch_message_file_close (message_file); - return NOTMUCH_STATUS_SUCCESS; + return ret; }