]> git.notmuchmail.org Git - notmuch/blobdiff - lib/message-file.c
util: move strcase_equal and strcase_hash to util
[notmuch] / lib / message-file.c
index 197ab0143f18e029cdd3b8858ce087fde507c3a4..ee305202fffa63f0e1a1d024c738a1d69a39ebb4 100644 (file)
 
 #include <glib.h> /* GHashTable */
 
-typedef struct {
-    char *str;
-    size_t size;
-    size_t len;
-} header_value_closure_t;
-
 struct _notmuch_message_file {
     /* File object */
     FILE *file;
+    char *filename;
 
-    /* Header storage */
-    int restrict_headers;
+    /* Cache for decoded headers */
     GHashTable *headers;
-    int broken_headers;
-    int good_headers;
-    size_t header_size; /* Length of full message header in bytes. */
-
-    /* Parsing state */
-    char *line;
-    size_t line_size;
-    header_value_closure_t value;
 
-    int parsing_started;
-    int parsing_finished;
+    GMimeMessage *message;
 };
 
-static int
-strcase_equal (const void *a, const void *b)
-{
-    return strcasecmp (a, b) == 0;
-}
-
-static unsigned int
-strcase_hash (const void *ptr)
-{
-    const char *s = ptr;
-
-    /* This is the djb2 hash. */
-    unsigned int hash = 5381;
-    while (s && *s) {
-       hash = ((hash << 5) + hash) + tolower (*s);
-       s++;
-    }
-
-    return hash;
-}
-
 static int
 _notmuch_message_file_destructor (notmuch_message_file_t *message)
 {
-    if (message->line)
-       free (message->line);
-
-    if (message->value.size)
-       free (message->value.str);
-
     if (message->headers)
        g_hash_table_destroy (message->headers);
 
+    if (message->message)
+       g_object_unref (message->message);
+
     if (message->file)
        fclose (message->file);
 
@@ -94,7 +55,8 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message)
 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
  * the talloc owner. */
 notmuch_message_file_t *
-_notmuch_message_file_open_ctx (void *ctx, const char *filename)
+_notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
+                               void *ctx, const char *filename)
 {
     notmuch_message_file_t *message;
 
@@ -102,256 +64,243 @@ _notmuch_message_file_open_ctx (void *ctx, const char *filename)
     if (unlikely (message == NULL))
        return NULL;
 
+    /* Only needed for error messages during parsing. */
+    message->filename = talloc_strdup (message, filename);
+    if (message->filename == NULL)
+       goto FAIL;
+
     talloc_set_destructor (message, _notmuch_message_file_destructor);
 
     message->file = fopen (filename, "r");
     if (message->file == NULL)
        goto FAIL;
 
-    message->headers = g_hash_table_new_full (strcase_hash,
-                                             strcase_equal,
-                                             free,
-                                             free);
-
-    message->parsing_started = 0;
-    message->parsing_finished = 0;
-
     return message;
 
   FAIL:
-    fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
-    notmuch_message_file_close (message);
+    _notmuch_database_log (notmuch, "Error opening %s: %s\n",
+                         filename, strerror (errno));
+    _notmuch_message_file_close (message);
 
     return NULL;
 }
 
 notmuch_message_file_t *
-notmuch_message_file_open (const char *filename)
+_notmuch_message_file_open (notmuch_database_t *notmuch,
+                           const char *filename)
 {
-    return _notmuch_message_file_open_ctx (NULL, filename);
+    return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
 }
 
 void
-notmuch_message_file_close (notmuch_message_file_t *message)
+_notmuch_message_file_close (notmuch_message_file_t *message)
 {
     talloc_free (message);
 }
 
-void
-notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
-                                       va_list va_headers)
+static notmuch_bool_t
+_is_mbox (FILE *file)
 {
-    char *header;
+    char from_buf[5];
+    notmuch_bool_t ret = FALSE;
 
-    if (message->parsing_started)
-       INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
+    /* Is this mbox? */
+    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
+       strncmp (from_buf, "From ", 5) == 0)
+       ret = TRUE;
 
-    while (1) {
-       header = va_arg (va_headers, char*);
-       if (header == NULL)
-           break;
-       g_hash_table_insert (message->headers,
-                            xstrdup (header), NULL);
-    }
+    rewind (file);
 
-    message->restrict_headers = 1;
+    return ret;
 }
 
-void
-notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
+notmuch_status_t
+_notmuch_message_file_parse (notmuch_message_file_t *message)
 {
-    va_list va_headers;
+    GMimeStream *stream;
+    GMimeParser *parser;
+    notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+    static int initialized = 0;
+    notmuch_bool_t is_mbox;
 
-    va_start (va_headers, message);
+    if (message->message)
+       return NOTMUCH_STATUS_SUCCESS;
 
-    notmuch_message_file_restrict_headersv (message, va_headers);
-}
+    is_mbox = _is_mbox (message->file);
 
-static void
-copy_header_unfolding (header_value_closure_t *value,
-                      const char *chunk)
-{
-    char *last;
+    if (! initialized) {
+       g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
+       initialized = 1;
+    }
 
-    if (chunk == NULL)
-       return;
+    message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
+                                             free, g_free);
+    if (! message->headers)
+       return NOTMUCH_STATUS_OUT_OF_MEMORY;
 
-    while (*chunk == ' ' || *chunk == '\t')
-       chunk++;
+    stream = g_mime_stream_file_new (message->file);
 
-    if (value->len + 1 + strlen (chunk) + 1 > value->size) {
-       unsigned int new_size = value->size;
-       if (value->size == 0)
-           new_size = strlen (chunk) + 1;
-       else
-           while (value->len + 1 + strlen (chunk) + 1 > new_size)
-               new_size *= 2;
-       value->str = xrealloc (value->str, new_size);
-       value->size = new_size;
+    /* We'll own and fclose the FILE* ourselves. */
+    g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), FALSE);
+
+    parser = g_mime_parser_new_with_stream (stream);
+    g_mime_parser_set_scan_from (parser, is_mbox);
+
+    message->message = g_mime_parser_construct_message (parser);
+    if (! message->message) {
+       status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
+       goto DONE;
     }
 
-    last = value->str + value->len;
-    if (value->len) {
-       *last = ' ';
-       last++;
-       value->len++;
+    if (is_mbox && ! g_mime_parser_eos (parser)) {
+       /*
+        * This is a multi-message mbox. (For historical reasons, we
+        * do support single-message mboxes.)
+        */
+       status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
     }
 
-    strcpy (last, chunk);
-    value->len += strlen (chunk);
+  DONE:
+    g_object_unref (stream);
+    g_object_unref (parser);
+
+    if (status) {
+       g_hash_table_destroy (message->headers);
+       message->headers = NULL;
+
+       if (message->message) {
+           g_object_unref (message->message);
+           message->message = NULL;
+       }
 
-    last = value->str + value->len - 1;
-    if (*last == '\n') {
-       *last = '\0';
-       value->len--;
+       rewind (message->file);
     }
+
+    return status;
 }
 
-/* As a special-case, a value of NULL for header_desired will force
- * the entire header to be parsed if it is not parsed already. This is
- * used by the _notmuch_message_file_get_headers_end function. */
-const char *
-notmuch_message_file_get_header (notmuch_message_file_t *message,
-                                const char *header_desired)
+notmuch_status_t
+_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
+                                       GMimeMessage **mime_message)
 {
-    int contains;
-    char *header, *decoded_value;
-    const char *s, *colon;
-    int match;
-    static int initialized = 0;
+    notmuch_status_t status;
 
-    if (! initialized) {
-       g_mime_init (0);
-       initialized = 1;
-    }
+    status = _notmuch_message_file_parse (message);
+    if (status)
+       return status;
 
-    message->parsing_started = 1;
-
-    if (header_desired == NULL)
-       contains = 0;
-    else
-       contains = g_hash_table_lookup_extended (message->headers,
-                                                header_desired, NULL,
-                                                (gpointer *) &decoded_value);
-
-    if (contains && decoded_value)
-       return decoded_value;
-
-    if (message->parsing_finished)
-       return "";
-
-#define NEXT_HEADER_LINE(closure)                              \
-    while (1) {                                                        \
-       ssize_t bytes_read = getline (&message->line,           \
-                                     &message->line_size,      \
-                                     message->file);           \
-       if (bytes_read == -1) {                                 \
-           message->parsing_finished = 1;                      \
-           break;                                              \
-       }                                                       \
-       if (*message->line == '\n') {                           \
-           message->parsing_finished = 1;                      \
-           break;                                              \
-       }                                                       \
-       if (closure &&                                          \
-           (*message->line == ' ' || *message->line == '\t'))  \
-       {                                                       \
-           copy_header_unfolding ((closure), message->line);   \
-       }                                                       \
-       if (*message->line == ' ' || *message->line == '\t')    \
-           message->header_size += strlen (message->line);     \
-       else                                                    \
-           break;                                              \
-    }
+    *mime_message = message->message;
 
-    if (message->line == NULL)
-       NEXT_HEADER_LINE (NULL);
+    return NOTMUCH_STATUS_SUCCESS;
+}
 
-    while (1) {
+/*
+ * Get all instances of a header decoded and concatenated.
+ *
+ * The result must be freed using g_free().
+ *
+ * Return NULL on errors, empty string for non-existing headers.
+ */
+static char *
+_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
+                                          const char *header)
+{
+    GMimeHeaderList *headers;
+    GMimeHeaderIter *iter;
+    char *combined = NULL;
 
-       if (message->parsing_finished)
-           break;
+    headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
+    if (! headers)
+       return NULL;
 
-       colon = strchr (message->line, ':');
+    iter = g_mime_header_iter_new ();
+    if (! iter)
+       return NULL;
 
-       if (colon == NULL) {
-           message->broken_headers++;
-           /* A simple heuristic for giving up on things that just
-            * don't look like mail messages. */
-           if (message->broken_headers >= 10 &&
-               message->good_headers < 5)
-           {
-               message->parsing_finished = 1;
-               continue;
-           }
-           NEXT_HEADER_LINE (NULL);
-           continue;
-       }
+    if (! g_mime_header_list_get_iter (headers, iter))
+       goto DONE;
 
-       message->header_size += strlen (message->line);
+    do {
+       const char *value;
+       char *decoded;
 
-       message->good_headers++;
+       if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0)
+           continue;
 
-       header = xstrndup (message->line, colon - message->line);
+       /* Note that GMime retains ownership of value... */
+       value = g_mime_header_iter_get_value (iter);
 
-       if (message->restrict_headers &&
-           ! g_hash_table_lookup_extended (message->headers,
-                                           header, NULL, NULL))
-       {
-           free (header);
-           NEXT_HEADER_LINE (NULL);
-           continue;
+       /* ... while decoded needs to be freed with g_free(). */
+       decoded = g_mime_utils_header_decode_text (value);
+       if (! decoded) {
+           if (combined) {
+               g_free (combined);
+               combined = NULL;
+           }
+           goto DONE;
        }
 
-       s = colon + 1;
-       while (*s == ' ' || *s == '\t')
-           s++;
+       if (combined) {
+           char *tmp = g_strdup_printf ("%s %s", combined, decoded);
+           g_free (decoded);
+           g_free (combined);
+           if (! tmp) {
+               combined = NULL;
+               goto DONE;
+           }
 
-       message->value.len = 0;
-       copy_header_unfolding (&message->value, s);
+           combined = tmp;
+       } else {
+           combined = decoded;
+       }
+    } while (g_mime_header_iter_next (iter));
 
-       NEXT_HEADER_LINE (&message->value);
+    /* Return empty string for non-existing headers. */
+    if (! combined)
+       combined = g_strdup ("");
 
-       if (header_desired == 0)
-           match = 0;
-       else
-           match = (strcasecmp (header, header_desired) == 0);
+  DONE:
+    g_mime_header_iter_free (iter);
 
-       decoded_value = g_mime_utils_header_decode_text (message->value.str);
+    return combined;
+}
 
-       g_hash_table_insert (message->headers, header, decoded_value);
+const char *
+_notmuch_message_file_get_header (notmuch_message_file_t *message,
+                                const char *header)
+{
+    const char *value;
+    char *decoded;
 
-       if (match)
-           return decoded_value;
-    }
+    if (_notmuch_message_file_parse (message))
+       return NULL;
 
-    if (message->parsing_finished) {
-        fclose (message->file);
-        message->file = NULL;
+    /* If we have a cached decoded value, use it. */
+    value = g_hash_table_lookup (message->headers, header);
+    if (value)
+       return value;
+
+    if (strcasecmp (header, "received") == 0) {
+       /*
+        * The Received: header is special. We concatenate all
+        * instances of the header as we use this when analyzing the
+        * path the mail has taken from sender to recipient.
+        */
+       decoded = _notmuch_message_file_get_combined_header (message, header);
+    } else {
+       value = g_mime_object_get_header (GMIME_OBJECT (message->message),
+                                         header);
+       if (value)
+           decoded = g_mime_utils_header_decode_text (value);
+       else
+           decoded = g_strdup ("");
     }
 
-    if (message->line)
-       free (message->line);
-    message->line = NULL;
-
-    if (message->value.size) {
-       free (message->value.str);
-       message->value.str = NULL;
-       message->value.size = 0;
-       message->value.len = 0;
-    }
+    if (! decoded)
+       return NULL;
 
-    /* We've parsed all headers and never found the one we're looking
-     * for. It's probably just not there, but let's check that we
-     * didn't make a mistake preventing us from seeing it. */
-    if (message->restrict_headers && header_desired &&
-       ! g_hash_table_lookup_extended (message->headers,
-                                       header_desired, NULL, NULL))
-    {
-       INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
-                       "included in call to notmuch_message_file_restrict_headers\n",
-                       header_desired);
-    }
+    /* Cache the decoded value. We also own the strings. */
+    g_hash_table_insert (message->headers, xstrdup (header), decoded);
 
-    return "";
+    return decoded;
 }