]> git.notmuchmail.org Git - notmuch/blobdiff - lib/message-file.c
util: move strcase_equal and strcase_hash to util
[notmuch] / lib / message-file.c
index 4d9af89fe44dd5923cee743572149f4cf2c22f46..ee305202fffa63f0e1a1d024c738a1d69a39ebb4 100644 (file)
 
 #include <glib.h> /* GHashTable */
 
-typedef struct {
-    char *str;
-    size_t size;
-    size_t len;
-} header_value_closure_t;
-
 struct _notmuch_message_file {
     /* File object */
     FILE *file;
+    char *filename;
 
-    /* Header storage */
-    int restrict_headers;
+    /* Cache for decoded headers */
     GHashTable *headers;
-    int broken_headers;
-    int good_headers;
-    size_t header_size; /* Length of full message header in bytes. */
-
-    /* Parsing state */
-    char *line;
-    size_t line_size;
-    header_value_closure_t value;
 
-    int parsing_started;
-    int parsing_finished;
+    GMimeMessage *message;
 };
 
-static int
-strcase_equal (const void *a, const void *b)
-{
-    return strcasecmp (a, b) == 0;
-}
-
-static unsigned int
-strcase_hash (const void *ptr)
-{
-    const char *s = ptr;
-
-    /* This is the djb2 hash. */
-    unsigned int hash = 5381;
-    while (s && *s) {
-       hash = ((hash << 5) + hash) + tolower (*s);
-       s++;
-    }
-
-    return hash;
-}
-
 static int
 _notmuch_message_file_destructor (notmuch_message_file_t *message)
 {
-    if (message->line)
-       free (message->line);
-
-    if (message->value.size)
-       free (message->value.str);
-
     if (message->headers)
        g_hash_table_destroy (message->headers);
 
+    if (message->message)
+       g_object_unref (message->message);
+
     if (message->file)
        fclose (message->file);
 
@@ -94,7 +55,8 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message)
 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
  * the talloc owner. */
 notmuch_message_file_t *
-_notmuch_message_file_open_ctx (void *ctx, const char *filename)
+_notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
+                               void *ctx, const char *filename)
 {
     notmuch_message_file_t *message;
 
@@ -102,299 +64,243 @@ _notmuch_message_file_open_ctx (void *ctx, const char *filename)
     if (unlikely (message == NULL))
        return NULL;
 
+    /* Only needed for error messages during parsing. */
+    message->filename = talloc_strdup (message, filename);
+    if (message->filename == NULL)
+       goto FAIL;
+
     talloc_set_destructor (message, _notmuch_message_file_destructor);
 
     message->file = fopen (filename, "r");
     if (message->file == NULL)
        goto FAIL;
 
-    message->headers = g_hash_table_new_full (strcase_hash,
-                                             strcase_equal,
-                                             free,
-                                             g_free);
-
-    message->parsing_started = 0;
-    message->parsing_finished = 0;
-
     return message;
 
   FAIL:
-    fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
-    notmuch_message_file_close (message);
+    _notmuch_database_log (notmuch, "Error opening %s: %s\n",
+                         filename, strerror (errno));
+    _notmuch_message_file_close (message);
 
     return NULL;
 }
 
 notmuch_message_file_t *
-notmuch_message_file_open (const char *filename)
+_notmuch_message_file_open (notmuch_database_t *notmuch,
+                           const char *filename)
 {
-    return _notmuch_message_file_open_ctx (NULL, filename);
+    return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
 }
 
 void
-notmuch_message_file_close (notmuch_message_file_t *message)
+_notmuch_message_file_close (notmuch_message_file_t *message)
 {
     talloc_free (message);
 }
 
-void
-notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
-                                       va_list va_headers)
+static notmuch_bool_t
+_is_mbox (FILE *file)
 {
-    char *header;
+    char from_buf[5];
+    notmuch_bool_t ret = FALSE;
 
-    if (message->parsing_started)
-       INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
+    /* Is this mbox? */
+    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
+       strncmp (from_buf, "From ", 5) == 0)
+       ret = TRUE;
 
-    while (1) {
-       header = va_arg (va_headers, char*);
-       if (header == NULL)
-           break;
-       g_hash_table_insert (message->headers,
-                            xstrdup (header), NULL);
-    }
+    rewind (file);
 
-    message->restrict_headers = 1;
+    return ret;
 }
 
-void
-notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
+notmuch_status_t
+_notmuch_message_file_parse (notmuch_message_file_t *message)
 {
-    va_list va_headers;
+    GMimeStream *stream;
+    GMimeParser *parser;
+    notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+    static int initialized = 0;
+    notmuch_bool_t is_mbox;
 
-    va_start (va_headers, message);
+    if (message->message)
+       return NOTMUCH_STATUS_SUCCESS;
 
-    notmuch_message_file_restrict_headersv (message, va_headers);
-}
+    is_mbox = _is_mbox (message->file);
 
-static void
-copy_header_unfolding (header_value_closure_t *value,
-                      const char *chunk)
-{
-    char *last;
+    if (! initialized) {
+       g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
+       initialized = 1;
+    }
 
-    if (chunk == NULL)
-       return;
+    message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
+                                             free, g_free);
+    if (! message->headers)
+       return NOTMUCH_STATUS_OUT_OF_MEMORY;
 
-    while (*chunk == ' ' || *chunk == '\t')
-       chunk++;
+    stream = g_mime_stream_file_new (message->file);
 
-    if (value->len + 1 + strlen (chunk) + 1 > value->size) {
-       unsigned int new_size = value->size;
-       if (value->size == 0)
-           new_size = strlen (chunk) + 1;
-       else
-           while (value->len + 1 + strlen (chunk) + 1 > new_size)
-               new_size *= 2;
-       value->str = xrealloc (value->str, new_size);
-       value->size = new_size;
+    /* We'll own and fclose the FILE* ourselves. */
+    g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), FALSE);
+
+    parser = g_mime_parser_new_with_stream (stream);
+    g_mime_parser_set_scan_from (parser, is_mbox);
+
+    message->message = g_mime_parser_construct_message (parser);
+    if (! message->message) {
+       status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
+       goto DONE;
     }
 
-    last = value->str + value->len;
-    if (value->len) {
-       *last = ' ';
-       last++;
-       value->len++;
+    if (is_mbox && ! g_mime_parser_eos (parser)) {
+       /*
+        * This is a multi-message mbox. (For historical reasons, we
+        * do support single-message mboxes.)
+        */
+       status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
     }
 
-    strcpy (last, chunk);
-    value->len += strlen (chunk);
+  DONE:
+    g_object_unref (stream);
+    g_object_unref (parser);
+
+    if (status) {
+       g_hash_table_destroy (message->headers);
+       message->headers = NULL;
 
-    last = value->str + value->len - 1;
-    if (*last == '\n') {
-       *last = '\0';
-       value->len--;
+       if (message->message) {
+           g_object_unref (message->message);
+           message->message = NULL;
+       }
+
+       rewind (message->file);
     }
+
+    return status;
 }
 
-/* As a special-case, a value of NULL for header_desired will force
- * the entire header to be parsed if it is not parsed already. This is
- * used by the _notmuch_message_file_get_headers_end function.
- * Another special case is the Received: header. For this header we
- * want to concatenate all instances of the header instead of just
- * hashing the first instance as we use this when analyzing the path
- * the mail has taken from sender to recipient.
- */
-const char *
-notmuch_message_file_get_header (notmuch_message_file_t *message,
-                                const char *header_desired)
+notmuch_status_t
+_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
+                                       GMimeMessage **mime_message)
 {
-    int contains;
-    char *header, *decoded_value, *header_sofar, *combined_header;
-    const char *s, *colon;
-    int match, newhdr, hdrsofar, is_received;
-    static int initialized = 0;
+    notmuch_status_t status;
 
-    is_received = (strcmp(header_desired,"received") == 0);
+    status = _notmuch_message_file_parse (message);
+    if (status)
+       return status;
 
-    if (! initialized) {
-       g_mime_init (0);
-       initialized = 1;
-    }
+    *mime_message = message->message;
 
-    message->parsing_started = 1;
-
-    if (header_desired == NULL)
-       contains = 0;
-    else
-       contains = g_hash_table_lookup_extended (message->headers,
-                                                header_desired, NULL,
-                                                (gpointer *) &decoded_value);
-
-    if (contains && decoded_value)
-       return decoded_value;
-
-    if (message->parsing_finished)
-       return "";
-
-#define NEXT_HEADER_LINE(closure)                              \
-    while (1) {                                                        \
-       ssize_t bytes_read = getline (&message->line,           \
-                                     &message->line_size,      \
-                                     message->file);           \
-       if (bytes_read == -1) {                                 \
-           message->parsing_finished = 1;                      \
-           break;                                              \
-       }                                                       \
-       if (*message->line == '\n') {                           \
-           message->parsing_finished = 1;                      \
-           break;                                              \
-       }                                                       \
-       if (closure &&                                          \
-           (*message->line == ' ' || *message->line == '\t'))  \
-       {                                                       \
-           copy_header_unfolding ((closure), message->line);   \
-       }                                                       \
-       if (*message->line == ' ' || *message->line == '\t')    \
-           message->header_size += strlen (message->line);     \
-       else                                                    \
-           break;                                              \
-    }
+    return NOTMUCH_STATUS_SUCCESS;
+}
 
-    if (message->line == NULL)
-       NEXT_HEADER_LINE (NULL);
+/*
+ * Get all instances of a header decoded and concatenated.
+ *
+ * The result must be freed using g_free().
+ *
+ * Return NULL on errors, empty string for non-existing headers.
+ */
+static char *
+_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
+                                          const char *header)
+{
+    GMimeHeaderList *headers;
+    GMimeHeaderIter *iter;
+    char *combined = NULL;
 
-    while (1) {
+    headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
+    if (! headers)
+       return NULL;
 
-       if (message->parsing_finished)
-           break;
+    iter = g_mime_header_iter_new ();
+    if (! iter)
+       return NULL;
 
-       colon = strchr (message->line, ':');
+    if (! g_mime_header_list_get_iter (headers, iter))
+       goto DONE;
 
-       if (colon == NULL) {
-           message->broken_headers++;
-           /* A simple heuristic for giving up on things that just
-            * don't look like mail messages. */
-           if (message->broken_headers >= 10 &&
-               message->good_headers < 5)
-           {
-               message->parsing_finished = 1;
-               break;
-           }
-           NEXT_HEADER_LINE (NULL);
+    do {
+       const char *value;
+       char *decoded;
+
+       if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0)
            continue;
-       }
 
-       message->header_size += strlen (message->line);
+       /* Note that GMime retains ownership of value... */
+       value = g_mime_header_iter_get_value (iter);
 
-       message->good_headers++;
+       /* ... while decoded needs to be freed with g_free(). */
+       decoded = g_mime_utils_header_decode_text (value);
+       if (! decoded) {
+           if (combined) {
+               g_free (combined);
+               combined = NULL;
+           }
+           goto DONE;
+       }
 
-       header = xstrndup (message->line, colon - message->line);
+       if (combined) {
+           char *tmp = g_strdup_printf ("%s %s", combined, decoded);
+           g_free (decoded);
+           g_free (combined);
+           if (! tmp) {
+               combined = NULL;
+               goto DONE;
+           }
 
-       if (message->restrict_headers &&
-           ! g_hash_table_lookup_extended (message->headers,
-                                           header, NULL, NULL))
-       {
-           free (header);
-           NEXT_HEADER_LINE (NULL);
-           continue;
+           combined = tmp;
+       } else {
+           combined = decoded;
        }
+    } while (g_mime_header_iter_next (iter));
 
-       s = colon + 1;
-       while (*s == ' ' || *s == '\t')
-           s++;
-
-       message->value.len = 0;
-       copy_header_unfolding (&message->value, s);
+    /* Return empty string for non-existing headers. */
+    if (! combined)
+       combined = g_strdup ("");
 
-       NEXT_HEADER_LINE (&message->value);
+  DONE:
+    g_mime_header_iter_free (iter);
 
-       if (header_desired == NULL)
-           match = 0;
-       else
-           match = (strcasecmp (header, header_desired) == 0);
-
-       decoded_value = g_mime_utils_header_decode_text (message->value.str);
-       header_sofar = (char *)g_hash_table_lookup (message->headers, header);
-       /* we treat the Received: header special - we want to concat ALL of 
-        * the Received: headers we encounter.
-        * for everything else we return the first instance of a header */
-       if (strcasecmp(header, "received") == 0) {
-           if (header_sofar == NULL) {
-               /* first Received: header we encountered; just add it */
-               g_hash_table_insert (message->headers, header, decoded_value);
-           } else {
-               /* we need to add the header to those we already collected */
-               newhdr = strlen(decoded_value);
-               hdrsofar = strlen(header_sofar);
-               combined_header = g_malloc(hdrsofar + newhdr + 2);
-               strncpy(combined_header,header_sofar,hdrsofar);
-               *(combined_header+hdrsofar) = ' ';
-               strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
-               g_free (decoded_value);
-               g_hash_table_insert (message->headers, header, combined_header);
-           }
-       } else {
-           if (header_sofar == NULL) {
-               /* Only insert if we don't have a value for this header, yet. */
-               g_hash_table_insert (message->headers, header, decoded_value);
-           } else {
-               free (header);
-               g_free (decoded_value);
-               decoded_value = header_sofar;
-           }
-       }
-       /* if we found a match we can bail - unless of course we are
-        * collecting all the Received: headers */
-       if (match && !is_received)
-           return decoded_value;
-    }
+    return combined;
+}
 
-    if (message->parsing_finished) {
-        fclose (message->file);
-        message->file = NULL;
-    }
+const char *
+_notmuch_message_file_get_header (notmuch_message_file_t *message,
+                                const char *header)
+{
+    const char *value;
+    char *decoded;
 
-    if (message->line)
-       free (message->line);
-    message->line = NULL;
+    if (_notmuch_message_file_parse (message))
+       return NULL;
 
-    if (message->value.size) {
-       free (message->value.str);
-       message->value.str = NULL;
-       message->value.size = 0;
-       message->value.len = 0;
+    /* If we have a cached decoded value, use it. */
+    value = g_hash_table_lookup (message->headers, header);
+    if (value)
+       return value;
+
+    if (strcasecmp (header, "received") == 0) {
+       /*
+        * The Received: header is special. We concatenate all
+        * instances of the header as we use this when analyzing the
+        * path the mail has taken from sender to recipient.
+        */
+       decoded = _notmuch_message_file_get_combined_header (message, header);
+    } else {
+       value = g_mime_object_get_header (GMIME_OBJECT (message->message),
+                                         header);
+       if (value)
+           decoded = g_mime_utils_header_decode_text (value);
+       else
+           decoded = g_strdup ("");
     }
 
-    /* For the Received: header we actually might end up here even
-     * though we found the header (as we force continued parsing
-     * in that case). So let's check if that's the header we were
-     * looking for and return the value that we found (if any)
-     */
-    if (is_received)
-       return (char *)g_hash_table_lookup (message->headers, "received");
-
-    /* We've parsed all headers and never found the one we're looking
-     * for. It's probably just not there, but let's check that we
-     * didn't make a mistake preventing us from seeing it. */
-    if (message->restrict_headers && header_desired &&
-       ! g_hash_table_lookup_extended (message->headers,
-                                       header_desired, NULL, NULL))
-    {
-       INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
-                       "included in call to notmuch_message_file_restrict_headers\n",
-                       header_desired);
-    }
+    if (! decoded)
+       return NULL;
+
+    /* Cache the decoded value. We also own the strings. */
+    g_hash_table_insert (message->headers, xstrdup (header), decoded);
 
-    return "";
+    return decoded;
 }