]> git.notmuchmail.org Git - notmuch/commitdiff
Convert non-UTF-8 parts to UTF-8 before indexing them
authorMichal Sojka <sojkam1@fel.cvut.cz>
Fri, 24 Feb 2012 07:36:22 +0000 (08:36 +0100)
committerDavid Bremner <bremner@debian.org>
Wed, 29 Feb 2012 11:41:39 +0000 (07:41 -0400)
This fixes a bug that didn't allow to search for non-ASCII words such
parts. The code here was copied from show_text_part_content(), because
the show command already does the needed conversion when showing the
message.

lib/index.cc

index d8f8b2bf516247ccc14aec8927dc5ae703a830d3..e377732220effdcb537934bf6036d01528829db3 100644 (file)
@@ -315,6 +315,7 @@ _index_mime_part (notmuch_message_t *message,
     GByteArray *byte_array;
     GMimeContentDisposition *disposition;
     char *body;
+    const char *charset;
 
     if (! part) {
        fprintf (stderr, "Warning: Not indexing empty mime part.\n");
@@ -390,6 +391,20 @@ _index_mime_part (notmuch_message_t *message,
     g_mime_stream_filter_add (GMIME_STREAM_FILTER (filter),
                              discard_uuencode_filter);
 
+    charset = g_mime_object_get_content_type_parameter (part, "charset");
+    if (charset) {
+       GMimeFilter *charset_filter;
+       charset_filter = g_mime_filter_charset_new (charset, "UTF-8");
+       /* This result can be NULL for things like "unknown-8bit".
+        * Don't set a NULL filter as that makes GMime print
+        * annoying assertion-failure messages on stderr. */
+       if (charset_filter) {
+           g_mime_stream_filter_add (GMIME_STREAM_FILTER (filter),
+                                     charset_filter);
+           g_object_unref (charset_filter);
+       }
+    }
+
     wrapper = g_mime_part_get_content_object (GMIME_PART (part));
     if (wrapper)
        g_mime_data_wrapper_write_to_stream (wrapper, filter);