X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=notmuch-index-message.cc;h=3175d2d829b26b9c8db72d9d962bdf141cdb9851;hp=4585a3b98905d03d44252f7a88d55b2c06f76f23;hb=387a28281c8b8c8025e976d610cf0dae0f196894;hpb=bae1ce09a37071cdf592048938319c72653e96e0 diff --git a/notmuch-index-message.cc b/notmuch-index-message.cc index 4585a3b9..3175d2d8 100644 --- a/notmuch-index-message.cc +++ b/notmuch-index-message.cc @@ -54,6 +54,7 @@ #include #include #include +#include #include @@ -127,7 +128,40 @@ find_prefix (const char *name) return ""; } -int TERM_COMBINED = 0; +/* "128 bits of thread-id ought to be enough for anybody" */ +#define NOTMUCH_THREAD_ID_BITS 128 +#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4) +typedef struct _thread_id { + char str[NOTMUCH_THREAD_ID_DIGITS + 1]; +} thread_id_t; + +static void +thread_id_generate (thread_id_t *thread_id) +{ + FILE *urandom; + uint32_t value; + char *s; + int i; + + urandom = fopen ("/dev/urandom", "r"); + if (urandom == NULL) { + fprintf (stderr, "Error opening /dev/urandom: %s\n", + strerror (errno)); + fprintf (stderr, "Perhaps notmuch needs some portability fixes for your platform?\n"); + exit (1); + } + + s = thread_id->str; + for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) { + fread ((void *) &value, sizeof (value), 1, urandom); + sprintf (s, "%08x", value); + s += 8; + } + + fclose (urandom); + + printf ("Generated thread id: %s\n", thread_id->str); +} static void add_term (Xapian::Document doc, @@ -170,30 +204,44 @@ gen_terms_address_name (Xapian::TermGenerator term_gen, InternetAddress *address, const char *prefix_name) { - const char *name; - int own_name = 0; - - name = internet_address_get_name (address); - - /* In the absence of a name, we'll strip the part before the @ - * from the address. */ - if (! name) { - InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); - const char *addr = internet_address_mailbox_get_addr (mailbox); - const char *at; + if (INTERNET_ADDRESS_IS_MAILBOX(address)) { + const char *name; + int own_name = 0; + + name = internet_address_get_name (address); + + /* In the absence of a name, we'll strip the part before the @ + * from the address. */ + if (! name) { + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); + const char *addr = internet_address_mailbox_get_addr (mailbox); + const char *at; + + at = strchr (addr, '@'); + if (at) { + name = strndup (addr, at - addr); + own_name = 1; + } + } - at = strchr (addr, '@'); - if (at) { - name = strndup (addr, at - addr); - own_name = 1; + if (name) + gen_terms (term_gen, prefix_name, name); + + if (own_name) + free ((void *) name); + } else if (INTERNET_ADDRESS_IS_GROUP (address)) { + InternetAddressGroup *group = INTERNET_ADDRESS_GROUP (address); + InternetAddressList *list = internet_address_group_get_members(group); + if (list) { + int length = internet_address_list_length(list); + int i; + + for (i = 0; i < length; i++) + gen_terms_address_name(term_gen, + internet_address_list_get_address(list, i), + prefix_name); } } - - if (name) - gen_terms (term_gen, prefix_name, name); - - if (own_name) - free ((void *) name); } static void @@ -204,6 +252,9 @@ gen_terms_address_names (Xapian::TermGenerator term_gen, int i; InternetAddress *address; + if (addresses == NULL) + return; + for (i = 0; i < internet_address_list_length (addresses); i++) { address = internet_address_list_get_address (addresses, i); gen_terms_address_name (term_gen, address, address_type); @@ -217,13 +268,27 @@ add_term_address_addr (Xapian::Document doc, InternetAddress *address, const char *prefix_name) { - InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); - const char *addr; - - addr = internet_address_mailbox_get_addr (mailbox); - - if (addr) - add_term (doc, prefix_name, addr); + if (INTERNET_ADDRESS_IS_MAILBOX(address)) { + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); + const char *addr; + + addr = internet_address_mailbox_get_addr (mailbox); + + if (addr) + add_term (doc, prefix_name, addr); + } else if (INTERNET_ADDRESS_IS_GROUP (address)) { + InternetAddressGroup *group = INTERNET_ADDRESS_GROUP (address); + InternetAddressList *list = internet_address_group_get_members(group); + if (list) { + int length = internet_address_list_length(list); + int i; + + for (i = 0; i < length; i++) + add_term_address_addr(doc, + internet_address_list_get_address(list, i), + prefix_name); + } + } } static void @@ -234,6 +299,9 @@ add_terms_address_addrs (Xapian::Document doc, int i; InternetAddress *address; + if (addresses == NULL) + return; + for (i = 0; i < internet_address_list_length (addresses); i++) { address = internet_address_list_get_address (addresses, i); add_term_address_addr (doc, address, address_type); @@ -246,6 +314,9 @@ skip_re_in_subject (const char *subject) { const char *s = subject; + if (subject == NULL) + return NULL; + while (*s) { while (*s && isspace (*s)) s++; @@ -465,7 +536,17 @@ gen_terms_part (Xapian::TermGenerator term_gen, return; } - if (! GMIME_IS_PART (part)) { + if (GMIME_IS_MESSAGE_PART (part)) { + GMimeMessage *message; + + message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part)); + + gen_terms_part (term_gen, g_mime_message_get_mime_part (message)); + + return; + } + + if (! (GMIME_IS_PART (part))) { fprintf (stderr, "Warning: Not indexing unknown mime part: %s.\n", g_type_name (G_OBJECT_TYPE (part))); return; @@ -497,7 +578,8 @@ gen_terms_part (Xapian::TermGenerator term_gen, stream = g_mime_stream_mem_new_with_byte_array (byte_array); g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); wrapper = g_mime_part_get_content_object (GMIME_PART (part)); - g_mime_data_wrapper_write_to_stream (wrapper, stream); + if (wrapper) + g_mime_data_wrapper_write_to_stream (wrapper, stream); g_object_unref (stream); @@ -613,8 +695,10 @@ index_file (Xapian::WritableDatabase db, add_term (doc, "type", "mail"); add_term (doc, "source_id", "1"); - add_term (doc, "msgid", message_id); - doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + if (message_id) { + add_term (doc, "msgid", message_id); + doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + } if (thread_ids->len) { unsigned int i; @@ -638,10 +722,14 @@ index_file (Xapian::WritableDatabase db, doc.add_value (NOTMUCH_VALUE_THREAD, thread_id->str); g_string_free (thread_id, TRUE); - } else { - /* If not referenced thread, use the message ID */ - add_term (doc, "thread", message_id); - doc.add_value (NOTMUCH_VALUE_THREAD, message_id); + } else if (message_id) { + /* If not part of any existing thread, generate a new thread_id. */ + thread_id_t thread_id; + + thread_id_generate (&thread_id); + + add_term (doc, "thread", thread_id.str); + doc.add_value (NOTMUCH_VALUE_THREAD, thread_id.str); } doc.add_value (NOTMUCH_VALUE_DATE, Xapian::sortable_serialise (time)); @@ -670,6 +758,9 @@ main (int argc, char **argv) GIOChannel *channel; GIOStatus gio_status; GError *error = NULL; + int count; + struct timeval tv_start, tv_last, tv_now; + double elapsed; if (argc < 2) { usage (argv[0]); @@ -691,6 +782,11 @@ main (int argc, char **argv) channel = g_io_channel_unix_new (fileno (stdin)); + count = 0; + + gettimeofday (&tv_start, NULL); + tv_last = tv_start; + while (1) { gio_status = g_io_channel_read_line (channel, &filename, NULL, NULL, &error); @@ -706,8 +802,23 @@ main (int argc, char **argv) index_file (db, term_gen, filename); g_free (filename); + + count++; + if (count % 1000 == 0) { + gettimeofday (&tv_now, NULL); + printf ("Indexed %d messages (%g messages/second)\n", + count, 1000 / ((tv_now.tv_sec - tv_last.tv_sec) + + (tv_now.tv_usec - tv_last.tv_usec) / 1e6)); + tv_last = tv_now; + } } + gettimeofday (&tv_now, NULL); + elapsed = (tv_now.tv_sec - tv_start.tv_sec + + (tv_now.tv_usec - tv_start.tv_usec) / 1e6); + printf ("Completed indexing of %d messages in %g seconds (%g messages/second)\n", + count, elapsed, count / elapsed); + } catch (const Xapian::Error &error) { cerr << "A Xapian exception occurred: " << error.get_msg () << endl; exit (1);