X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=lib%2Fmessage-file.c;h=311bd478b0cfff13620ba653795ccb9cd62246bf;hp=4d9af89fe44dd5923cee743572149f4cf2c22f46;hb=HEAD;hpb=27dacc7947309bb8f6f84b2cd83dc7ec280576b2 diff --git a/lib/message-file.c b/lib/message-file.c index 4d9af89f..68f646a4 100644 --- a/lib/message-file.c +++ b/lib/message-file.c @@ -13,7 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see http://www.gnu.org/licenses/ . + * along with this program. If not, see https://www.gnu.org/licenses/ . * * Author: Carl Worth */ @@ -26,67 +26,28 @@ #include /* GHashTable */ -typedef struct { - char *str; - size_t size; - size_t len; -} header_value_closure_t; - struct _notmuch_message_file { - /* File object */ - FILE *file; + /* open stream to (possibly gzipped) file */ + GMimeStream *stream; + char *filename; - /* Header storage */ - int restrict_headers; + /* Cache for decoded headers */ GHashTable *headers; - int broken_headers; - int good_headers; - size_t header_size; /* Length of full message header in bytes. */ - - /* Parsing state */ - char *line; - size_t line_size; - header_value_closure_t value; - int parsing_started; - int parsing_finished; + GMimeMessage *message; }; -static int -strcase_equal (const void *a, const void *b) -{ - return strcasecmp (a, b) == 0; -} - -static unsigned int -strcase_hash (const void *ptr) -{ - const char *s = ptr; - - /* This is the djb2 hash. */ - unsigned int hash = 5381; - while (s && *s) { - hash = ((hash << 5) + hash) + tolower (*s); - s++; - } - - return hash; -} - static int _notmuch_message_file_destructor (notmuch_message_file_t *message) { - if (message->line) - free (message->line); - - if (message->value.size) - free (message->value.str); - if (message->headers) g_hash_table_destroy (message->headers); - if (message->file) - fclose (message->file); + if (message->message) + g_object_unref (message->message); + + if (message->stream) + g_object_unref (message->stream); return 0; } @@ -94,7 +55,8 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message) /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as * the talloc owner. */ notmuch_message_file_t * -_notmuch_message_file_open_ctx (void *ctx, const char *filename) +_notmuch_message_file_open_ctx (notmuch_database_t *notmuch, + void *ctx, const char *filename) { notmuch_message_file_t *message; @@ -102,299 +64,339 @@ _notmuch_message_file_open_ctx (void *ctx, const char *filename) if (unlikely (message == NULL)) return NULL; - talloc_set_destructor (message, _notmuch_message_file_destructor); + const char *prefix = notmuch_config_get (notmuch, NOTMUCH_CONFIG_MAIL_ROOT); + + if (prefix == NULL) + goto FAIL; + + if (*filename == '/') { + if (strncmp (filename, prefix, strlen (prefix)) != 0) { + _notmuch_database_log (notmuch, "Error opening %s: path outside mail root\n", + filename); + errno = 0; + goto FAIL; + } + message->filename = talloc_strdup (message, filename); + } else { + message->filename = talloc_asprintf (message, "%s/%s", prefix, filename); + } - message->file = fopen (filename, "r"); - if (message->file == NULL) + if (message->filename == NULL) goto FAIL; - message->headers = g_hash_table_new_full (strcase_hash, - strcase_equal, - free, - g_free); + talloc_set_destructor (message, _notmuch_message_file_destructor); - message->parsing_started = 0; - message->parsing_finished = 0; + message->stream = g_mime_stream_gzfile_open (message->filename); + if (message->stream == NULL) + goto FAIL; return message; FAIL: - fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); - notmuch_message_file_close (message); + if (errno) + _notmuch_database_log (notmuch, "Error opening %s: %s\n", + filename, strerror (errno)); + _notmuch_message_file_close (message); return NULL; } notmuch_message_file_t * -notmuch_message_file_open (const char *filename) +_notmuch_message_file_open (notmuch_database_t *notmuch, + const char *filename) +{ + return _notmuch_message_file_open_ctx (notmuch, NULL, filename); +} + +const char * +_notmuch_message_file_get_filename (notmuch_message_file_t *message_file) { - return _notmuch_message_file_open_ctx (NULL, filename); + return message_file->filename; } void -notmuch_message_file_close (notmuch_message_file_t *message) +_notmuch_message_file_close (notmuch_message_file_t *message) { talloc_free (message); } -void -notmuch_message_file_restrict_headersv (notmuch_message_file_t *message, - va_list va_headers) +static bool +_is_mbox (GMimeStream *stream) { - char *header; + char from_buf[5]; + bool ret = false; - if (message->parsing_started) - INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started"); + /* Is this mbox? */ + if (g_mime_stream_read (stream, from_buf, sizeof (from_buf)) == sizeof (from_buf) && + strncmp (from_buf, "From ", 5) == 0) + ret = true; - while (1) { - header = va_arg (va_headers, char*); - if (header == NULL) - break; - g_hash_table_insert (message->headers, - xstrdup (header), NULL); - } + g_mime_stream_reset (stream); - message->restrict_headers = 1; + return ret; } -void -notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...) +notmuch_status_t +_notmuch_message_file_parse (notmuch_message_file_t *message) { - va_list va_headers; + GMimeParser *parser; + notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + bool is_mbox; - va_start (va_headers, message); + if (message->message) + return NOTMUCH_STATUS_SUCCESS; - notmuch_message_file_restrict_headersv (message, va_headers); -} + is_mbox = _is_mbox (message->stream); -static void -copy_header_unfolding (header_value_closure_t *value, - const char *chunk) -{ - char *last; + _notmuch_init (); - if (chunk == NULL) - return; + message->headers = g_hash_table_new_full (strcase_hash, strcase_equal, + free, g_free); + if (! message->headers) + return NOTMUCH_STATUS_OUT_OF_MEMORY; - while (*chunk == ' ' || *chunk == '\t') - chunk++; + parser = g_mime_parser_new_with_stream (message->stream); + g_mime_parser_set_scan_from (parser, is_mbox); - if (value->len + 1 + strlen (chunk) + 1 > value->size) { - unsigned int new_size = value->size; - if (value->size == 0) - new_size = strlen (chunk) + 1; - else - while (value->len + 1 + strlen (chunk) + 1 > new_size) - new_size *= 2; - value->str = xrealloc (value->str, new_size); - value->size = new_size; + message->message = g_mime_parser_construct_message (parser, NULL); + if (! message->message) { + status = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; } - last = value->str + value->len; - if (value->len) { - *last = ' '; - last++; - value->len++; + if (is_mbox && ! g_mime_parser_eos (parser)) { + /* + * This is a multi-message mbox. (For historical reasons, we + * do support single-message mboxes.) + */ + status = NOTMUCH_STATUS_FILE_NOT_EMAIL; } - strcpy (last, chunk); - value->len += strlen (chunk); + DONE: + g_mime_stream_reset (message->stream); + g_object_unref (parser); + + if (status) { + g_hash_table_destroy (message->headers); + message->headers = NULL; + + if (message->message) { + g_object_unref (message->message); + message->message = NULL; + } - last = value->str + value->len - 1; - if (*last == '\n') { - *last = '\0'; - value->len--; } + + return status; } -/* As a special-case, a value of NULL for header_desired will force - * the entire header to be parsed if it is not parsed already. This is - * used by the _notmuch_message_file_get_headers_end function. - * Another special case is the Received: header. For this header we - * want to concatenate all instances of the header instead of just - * hashing the first instance as we use this when analyzing the path - * the mail has taken from sender to recipient. - */ -const char * -notmuch_message_file_get_header (notmuch_message_file_t *message, - const char *header_desired) +notmuch_status_t +_notmuch_message_file_get_mime_message (notmuch_message_file_t *message, + GMimeMessage **mime_message) { - int contains; - char *header, *decoded_value, *header_sofar, *combined_header; - const char *s, *colon; - int match, newhdr, hdrsofar, is_received; - static int initialized = 0; + notmuch_status_t status; + + status = _notmuch_message_file_parse (message); + if (status) + return status; + + *mime_message = message->message; + + return NOTMUCH_STATUS_SUCCESS; +} - is_received = (strcmp(header_desired,"received") == 0); +/* + * Get all instances of a header decoded and concatenated. + * + * The result must be freed using g_free(). + * + * Return NULL on errors, empty string for non-existing headers. + */ - if (! initialized) { - g_mime_init (0); - initialized = 1; +static char * +_extend_header (char *combined, const char *value) +{ + char *decoded; + + decoded = g_mime_utils_header_decode_text (NULL, value); + if (! decoded) { + if (combined) { + g_free (combined); + combined = NULL; + } + goto DONE; } - message->parsing_started = 1; - - if (header_desired == NULL) - contains = 0; - else - contains = g_hash_table_lookup_extended (message->headers, - header_desired, NULL, - (gpointer *) &decoded_value); - - if (contains && decoded_value) - return decoded_value; - - if (message->parsing_finished) - return ""; - -#define NEXT_HEADER_LINE(closure) \ - while (1) { \ - ssize_t bytes_read = getline (&message->line, \ - &message->line_size, \ - message->file); \ - if (bytes_read == -1) { \ - message->parsing_finished = 1; \ - break; \ - } \ - if (*message->line == '\n') { \ - message->parsing_finished = 1; \ - break; \ - } \ - if (closure && \ - (*message->line == ' ' || *message->line == '\t')) \ - { \ - copy_header_unfolding ((closure), message->line); \ - } \ - if (*message->line == ' ' || *message->line == '\t') \ - message->header_size += strlen (message->line); \ - else \ - break; \ + if (combined) { + char *tmp = g_strdup_printf ("%s %s", combined, decoded); + g_free (decoded); + g_free (combined); + if (! tmp) { + combined = NULL; + goto DONE; + } + + combined = tmp; + } else { + combined = decoded; } + DONE: + return combined; +} - if (message->line == NULL) - NEXT_HEADER_LINE (NULL); +static char * +_notmuch_message_file_get_combined_header (notmuch_message_file_t *message, + const char *header) +{ + char *combined = NULL; + GMimeHeaderList *headers; - while (1) { + headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message)); + if (! headers) + return NULL; - if (message->parsing_finished) - break; - colon = strchr (message->line, ':'); + for (int i = 0; i < g_mime_header_list_get_count (headers); i++) { + const char *value; + GMimeHeader *g_header = g_mime_header_list_get_header_at (headers, i); - if (colon == NULL) { - message->broken_headers++; - /* A simple heuristic for giving up on things that just - * don't look like mail messages. */ - if (message->broken_headers >= 10 && - message->good_headers < 5) - { - message->parsing_finished = 1; - break; - } - NEXT_HEADER_LINE (NULL); + if (strcasecmp (g_mime_header_get_name (g_header), header) != 0) continue; - } - - message->header_size += strlen (message->line); - message->good_headers++; + /* GMime retains ownership of value, we hope */ + value = g_mime_header_get_value (g_header); - header = xstrndup (message->line, colon - message->line); + combined = _extend_header (combined, value); + } - if (message->restrict_headers && - ! g_hash_table_lookup_extended (message->headers, - header, NULL, NULL)) - { - free (header); - NEXT_HEADER_LINE (NULL); - continue; - } + /* Return empty string for non-existing headers. */ + if (! combined) + combined = g_strdup (""); - s = colon + 1; - while (*s == ' ' || *s == '\t') - s++; + return combined; +} - message->value.len = 0; - copy_header_unfolding (&message->value, s); +const char * +_notmuch_message_file_get_header (notmuch_message_file_t *message, + const char *header) +{ + const char *value; + char *decoded; - NEXT_HEADER_LINE (&message->value); + if (_notmuch_message_file_parse (message)) + return NULL; - if (header_desired == NULL) - match = 0; + /* If we have a cached decoded value, use it. */ + value = g_hash_table_lookup (message->headers, header); + if (value) + return value; + + if (strcasecmp (header, "received") == 0 || + strcasecmp (header, "delivered-to") == 0) { + /* + * The Received: header is special. We concatenate all instances of the + * header as we use this when analyzing the path the mail has taken + * from sender to recipient. + * + * Similarly, multiple instances of Delivered-To may be present. We + * concatenate them so the one with highest priority may be picked (eg. + * primary_email before other_email). + */ + decoded = _notmuch_message_file_get_combined_header (message, header); + } else { + value = g_mime_object_get_header (GMIME_OBJECT (message->message), + header); + if (value) + decoded = g_mime_utils_header_decode_text (NULL, value); else - match = (strcasecmp (header, header_desired) == 0); - - decoded_value = g_mime_utils_header_decode_text (message->value.str); - header_sofar = (char *)g_hash_table_lookup (message->headers, header); - /* we treat the Received: header special - we want to concat ALL of - * the Received: headers we encounter. - * for everything else we return the first instance of a header */ - if (strcasecmp(header, "received") == 0) { - if (header_sofar == NULL) { - /* first Received: header we encountered; just add it */ - g_hash_table_insert (message->headers, header, decoded_value); - } else { - /* we need to add the header to those we already collected */ - newhdr = strlen(decoded_value); - hdrsofar = strlen(header_sofar); - combined_header = g_malloc(hdrsofar + newhdr + 2); - strncpy(combined_header,header_sofar,hdrsofar); - *(combined_header+hdrsofar) = ' '; - strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1); - g_free (decoded_value); - g_hash_table_insert (message->headers, header, combined_header); - } - } else { - if (header_sofar == NULL) { - /* Only insert if we don't have a value for this header, yet. */ - g_hash_table_insert (message->headers, header, decoded_value); - } else { - free (header); - g_free (decoded_value); - decoded_value = header_sofar; - } - } - /* if we found a match we can bail - unless of course we are - * collecting all the Received: headers */ - if (match && !is_received) - return decoded_value; + decoded = g_strdup (""); } - if (message->parsing_finished) { - fclose (message->file); - message->file = NULL; - } + if (! decoded) + return NULL; - if (message->line) - free (message->line); - message->line = NULL; + /* Cache the decoded value. We also own the strings. */ + g_hash_table_insert (message->headers, xstrdup (header), decoded); - if (message->value.size) { - free (message->value.str); - message->value.str = NULL; - message->value.size = 0; - message->value.len = 0; + return decoded; +} + +notmuch_status_t +_notmuch_message_file_get_headers (notmuch_message_file_t *message_file, + const char **from_out, + const char **subject_out, + const char **to_out, + const char **date_out, + char **message_id_out) +{ + notmuch_status_t ret; + const char *header; + const char *from, *to, *subject, *date; + char *message_id = NULL; + + /* Parse message up front to get better error status. */ + ret = _notmuch_message_file_parse (message_file); + if (ret) + goto DONE; + + /* Before we do any real work, (especially before doing a + * potential SHA-1 computation on the entire file's contents), + * let's make sure that what we're looking at looks like an + * actual email message. + */ + from = _notmuch_message_file_get_header (message_file, "from"); + subject = _notmuch_message_file_get_header (message_file, "subject"); + to = _notmuch_message_file_get_header (message_file, "to"); + date = _notmuch_message_file_get_header (message_file, "date"); + + if ((from == NULL || *from == '\0') && + (subject == NULL || *subject == '\0') && + (to == NULL || *to == '\0')) { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; } - /* For the Received: header we actually might end up here even - * though we found the header (as we force continued parsing - * in that case). So let's check if that's the header we were - * looking for and return the value that we found (if any) + /* Now that we're sure it's mail, the first order of business + * is to find a message ID (or else create one ourselves). */ - if (is_received) - return (char *)g_hash_table_lookup (message->headers, "received"); - - /* We've parsed all headers and never found the one we're looking - * for. It's probably just not there, but let's check that we - * didn't make a mistake preventing us from seeing it. */ - if (message->restrict_headers && header_desired && - ! g_hash_table_lookup_extended (message->headers, - header_desired, NULL, NULL)) - { - INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n" - "included in call to notmuch_message_file_restrict_headers\n", - header_desired); + header = _notmuch_message_file_get_header (message_file, "message-id"); + if (header && *header != '\0') { + message_id = _notmuch_message_id_parse (message_file, header, NULL); + + /* So the header value isn't RFC-compliant, but it's + * better than no message-id at all. + */ + if (message_id == NULL) + message_id = talloc_strdup (message_file, header); } - return ""; + if (message_id == NULL ) { + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. + */ + char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file)); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1); + free (sha1); + } + DONE: + if (ret == NOTMUCH_STATUS_SUCCESS) { + if (from_out) + *from_out = from; + if (subject_out) + *subject_out = subject; + if (to_out) + *to_out = to; + if (date_out) + *date_out = date; + if (message_id_out) + *message_id_out = message_id; + } + return ret; }