X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=lib%2Fmessage-file.c;h=eda1b748e2022d73d2d8737f04151f342e0787ad;hp=0c152a3973ac719a8dc7bf51f1050c8a52604ceb;hb=f5db7ad7d243785c274a99734c681e69d13313d0;hpb=8cbb5114a20c1217f23977fd5edca99a0b7a2955 diff --git a/lib/message-file.c b/lib/message-file.c index 0c152a39..eda1b748 100644 --- a/lib/message-file.c +++ b/lib/message-file.c @@ -26,30 +26,15 @@ #include /* GHashTable */ -typedef struct { - char *str; - size_t size; - size_t len; -} header_value_closure_t; - struct _notmuch_message_file { /* File object */ FILE *file; + char *filename; - /* Header storage */ - int restrict_headers; + /* Cache for decoded headers */ GHashTable *headers; - int broken_headers; - int good_headers; - size_t header_size; /* Length of full message header in bytes. */ - - /* Parsing state */ - char *line; - size_t line_size; - header_value_closure_t value; - int parsing_started; - int parsing_finished; + GMimeMessage *message; }; static int @@ -76,15 +61,12 @@ strcase_hash (const void *ptr) static int _notmuch_message_file_destructor (notmuch_message_file_t *message) { - if (message->line) - free (message->line); - - if (message->value.size) - free (message->value.str); - if (message->headers) g_hash_table_destroy (message->headers); + if (message->message) + g_object_unref (message->message); + if (message->file) fclose (message->file); @@ -102,262 +84,254 @@ _notmuch_message_file_open_ctx (void *ctx, const char *filename) if (unlikely (message == NULL)) return NULL; + /* Only needed for error messages during parsing. */ + message->filename = talloc_strdup (message, filename); + if (message->filename == NULL) + goto FAIL; + talloc_set_destructor (message, _notmuch_message_file_destructor); message->file = fopen (filename, "r"); if (message->file == NULL) goto FAIL; - message->headers = g_hash_table_new_full (strcase_hash, - strcase_equal, - free, - free); - - message->parsing_started = 0; - message->parsing_finished = 0; - return message; FAIL: fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); - notmuch_message_file_close (message); + _notmuch_message_file_close (message); return NULL; } notmuch_message_file_t * -notmuch_message_file_open (const char *filename) +_notmuch_message_file_open (const char *filename) { return _notmuch_message_file_open_ctx (NULL, filename); } void -notmuch_message_file_close (notmuch_message_file_t *message) +_notmuch_message_file_close (notmuch_message_file_t *message) { talloc_free (message); } -void -notmuch_message_file_restrict_headersv (notmuch_message_file_t *message, - va_list va_headers) +static notmuch_bool_t +_is_mbox (FILE *file) { - char *header; + char from_buf[5]; + notmuch_bool_t ret = FALSE; - if (message->parsing_started) - INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started"); + /* Is this mbox? */ + if (fread (from_buf, sizeof (from_buf), 1, file) == 1 && + strncmp (from_buf, "From ", 5) == 0) + ret = TRUE; - while (1) { - header = va_arg (va_headers, char*); - if (header == NULL) - break; - g_hash_table_insert (message->headers, - xstrdup (header), NULL); - } + rewind (file); - message->restrict_headers = 1; + return ret; } -void -notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...) +notmuch_status_t +_notmuch_message_file_parse (notmuch_message_file_t *message) { - va_list va_headers; + GMimeStream *stream; + GMimeParser *parser; + notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + static int initialized = 0; + notmuch_bool_t is_mbox; - va_start (va_headers, message); + if (message->message) + return NOTMUCH_STATUS_SUCCESS; - notmuch_message_file_restrict_headersv (message, va_headers); -} + is_mbox = _is_mbox (message->file); -static void -copy_header_unfolding (header_value_closure_t *value, - const char *chunk) -{ - char *last; + if (! initialized) { + g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS); + initialized = 1; + } - if (chunk == NULL) - return; + message->headers = g_hash_table_new_full (strcase_hash, strcase_equal, + free, g_free); + if (! message->headers) + return NOTMUCH_STATUS_OUT_OF_MEMORY; - while (*chunk == ' ' || *chunk == '\t') - chunk++; + stream = g_mime_stream_file_new (message->file); - if (value->len + 1 + strlen (chunk) + 1 > value->size) { - unsigned int new_size = value->size; - if (value->size == 0) - new_size = strlen (chunk) + 1; - else - while (value->len + 1 + strlen (chunk) + 1 > new_size) - new_size *= 2; - value->str = xrealloc (value->str, new_size); - value->size = new_size; + /* We'll own and fclose the FILE* ourselves. */ + g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), FALSE); + + parser = g_mime_parser_new_with_stream (stream); + g_mime_parser_set_scan_from (parser, is_mbox); + + message->message = g_mime_parser_construct_message (parser); + if (! message->message) { + status = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; } - last = value->str + value->len; - if (value->len) { - *last = ' '; - last++; - value->len++; + if (is_mbox) { + if (! g_mime_parser_eos (parser)) { + /* This is a multi-message mbox. */ + status = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } + /* + * For historical reasons, we support single-message mboxes, + * but this behavior is likely to change in the future, so + * warn. + */ + static notmuch_bool_t mbox_warning = FALSE; + if (! mbox_warning) { + mbox_warning = TRUE; + fprintf (stderr, "\ +Warning: %s is an mbox containing a single message,\n\ +likely caused by misconfigured mail delivery. Support for single-message\n\ +mboxes is deprecated and may be removed in the future.\n", message->filename); + } } - strcpy (last, chunk); - value->len += strlen (chunk); + DONE: + g_object_unref (stream); + g_object_unref (parser); - last = value->str + value->len - 1; - if (*last == '\n') { - *last = '\0'; - value->len--; + if (status) { + g_hash_table_destroy (message->headers); + message->headers = NULL; + + if (message->message) { + g_object_unref (message->message); + message->message = NULL; + } + + rewind (message->file); } + + return status; } -/* As a special-case, a value of NULL for header_desired will force - * the entire header to be parsed if it is not parsed already. This is - * used by the _notmuch_message_file_get_headers_end function. */ -const char * -notmuch_message_file_get_header (notmuch_message_file_t *message, - const char *header_desired) +notmuch_status_t +_notmuch_message_file_get_mime_message (notmuch_message_file_t *message, + GMimeMessage **mime_message) { - int contains; - char *header, *decoded_value; - const char *s, *colon; - int match; - static int initialized = 0; + notmuch_status_t status; - if (! initialized) { - g_mime_init (0); - initialized = 1; - } + status = _notmuch_message_file_parse (message); + if (status) + return status; - message->parsing_started = 1; - - if (header_desired == NULL) - contains = 0; - else - contains = g_hash_table_lookup_extended (message->headers, - header_desired, NULL, - (gpointer *) &decoded_value); - - if (contains && decoded_value) - return decoded_value; - - if (message->parsing_finished) - return ""; - -#define NEXT_HEADER_LINE(closure) \ - while (1) { \ - ssize_t bytes_read = getline (&message->line, \ - &message->line_size, \ - message->file); \ - if (bytes_read == -1) { \ - message->parsing_finished = 1; \ - break; \ - } \ - if (*message->line == '\n') { \ - message->parsing_finished = 1; \ - break; \ - } \ - if (closure && \ - (*message->line == ' ' || *message->line == '\t')) \ - { \ - copy_header_unfolding ((closure), message->line); \ - } \ - if (*message->line == ' ' || *message->line == '\t') \ - message->header_size += strlen (message->line); \ - else \ - break; \ - } + *mime_message = message->message; - if (message->line == NULL) - NEXT_HEADER_LINE (NULL); + return NOTMUCH_STATUS_SUCCESS; +} - while (1) { +/* + * Get all instances of a header decoded and concatenated. + * + * The result must be freed using g_free(). + * + * Return NULL on errors, empty string for non-existing headers. + */ +static char * +_notmuch_message_file_get_combined_header (notmuch_message_file_t *message, + const char *header) +{ + GMimeHeaderList *headers; + GMimeHeaderIter *iter; + char *combined = NULL; - if (message->parsing_finished) - break; + headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message)); + if (! headers) + return NULL; - colon = strchr (message->line, ':'); + iter = g_mime_header_iter_new (); + if (! iter) + return NULL; - if (colon == NULL) { - message->broken_headers++; - /* A simple heuristic for giving up on things that just - * don't look like mail messages. */ - if (message->broken_headers >= 10 && - message->good_headers < 5) - { - message->parsing_finished = 1; - break; - } - NEXT_HEADER_LINE (NULL); - continue; - } + if (! g_mime_header_list_get_iter (headers, iter)) + goto DONE; - message->header_size += strlen (message->line); + do { + const char *value; + char *decoded; - message->good_headers++; + if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0) + continue; - header = xstrndup (message->line, colon - message->line); + /* Note that GMime retains ownership of value... */ + value = g_mime_header_iter_get_value (iter); - if (message->restrict_headers && - ! g_hash_table_lookup_extended (message->headers, - header, NULL, NULL)) - { - free (header); - NEXT_HEADER_LINE (NULL); - continue; + /* ... while decoded needs to be freed with g_free(). */ + decoded = g_mime_utils_header_decode_text (value); + if (! decoded) { + if (combined) { + g_free (combined); + combined = NULL; + } + goto DONE; } - s = colon + 1; - while (*s == ' ' || *s == '\t') - s++; + if (combined) { + char *tmp = g_strdup_printf ("%s %s", combined, decoded); + g_free (decoded); + g_free (combined); + if (! tmp) { + combined = NULL; + goto DONE; + } - message->value.len = 0; - copy_header_unfolding (&message->value, s); + combined = tmp; + } else { + combined = decoded; + } + } while (g_mime_header_iter_next (iter)); - NEXT_HEADER_LINE (&message->value); + /* Return empty string for non-existing headers. */ + if (! combined) + combined = g_strdup (""); - if (header_desired == 0) - match = 0; - else - match = (strcasecmp (header, header_desired) == 0); - - decoded_value = g_mime_utils_header_decode_text (message->value.str); - if (g_hash_table_lookup (message->headers, header) == NULL) { - /* Only insert if we don't have a value for this header, yet. - * This way we always return the FIRST instance of any header - * we search for - * FIXME: we should be returning ALL instances of a header - * or at least provide a way to iterate over them - */ - g_hash_table_insert (message->headers, header, decoded_value); - } - if (match) - return decoded_value; - } + DONE: + g_mime_header_iter_free (iter); - if (message->parsing_finished) { - fclose (message->file); - message->file = NULL; - } + return combined; +} - if (message->line) - free (message->line); - message->line = NULL; +const char * +_notmuch_message_file_get_header (notmuch_message_file_t *message, + const char *header) +{ + const char *value; + char *decoded; - if (message->value.size) { - free (message->value.str); - message->value.str = NULL; - message->value.size = 0; - message->value.len = 0; - } + if (_notmuch_message_file_parse (message)) + return NULL; - /* We've parsed all headers and never found the one we're looking - * for. It's probably just not there, but let's check that we - * didn't make a mistake preventing us from seeing it. */ - if (message->restrict_headers && header_desired && - ! g_hash_table_lookup_extended (message->headers, - header_desired, NULL, NULL)) - { - INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n" - "included in call to notmuch_message_file_restrict_headers\n", - header_desired); + /* If we have a cached decoded value, use it. */ + value = g_hash_table_lookup (message->headers, header); + if (value) + return value; + + if (strcasecmp (header, "received") == 0) { + /* + * The Received: header is special. We concatenate all + * instances of the header as we use this when analyzing the + * path the mail has taken from sender to recipient. + */ + decoded = _notmuch_message_file_get_combined_header (message, header); + } else { + value = g_mime_object_get_header (GMIME_OBJECT (message->message), + header); + if (value) + decoded = g_mime_utils_header_decode_text (value); + else + decoded = g_strdup (""); } - return ""; + if (! decoded) + return NULL; + + /* Cache the decoded value. We also own the strings. */ + g_hash_table_insert (message->headers, xstrdup (header), decoded); + + return decoded; }