]> git.notmuchmail.org Git - notmuch/commitdiff
lib: detect mislabeled Windows-1252 parts
authorSebastian Poeplau <sebastian.poeplau@eurecom.fr>
Tue, 7 Aug 2018 12:48:22 +0000 (14:48 +0200)
committerDavid Bremner <david@tethera.net>
Wed, 29 Aug 2018 09:34:39 +0000 (06:34 -0300)
Use GMime functionality to detect mislabeled messages and apply the
correct (Windows) encoding instead.

notmuch-show.c
test/T300-encoding.sh

index 1072ea558dfbcc04b91b4aa9041c3f7dd8692adf..c3a3783a4094c952f61b567907b87f658a1381ab 100644 (file)
@@ -272,6 +272,7 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
     GMimeContentType *content_type = g_mime_object_get_content_type (GMIME_OBJECT (part));
     GMimeStream *stream_filter = NULL;
     GMimeFilter *crlf_filter = NULL;
+    GMimeFilter *windows_filter = NULL;
     GMimeDataWrapper *wrapper;
     const char *charset;
 
@@ -282,13 +283,37 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
     if (stream_out == NULL)
        return;
 
+    charset = g_mime_object_get_content_type_parameter (part, "charset");
+    charset = charset ? g_mime_charset_canon_name (charset) : NULL;
+    wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+    if (wrapper && charset && !g_ascii_strncasecmp (charset, "iso-8859-", 9)) {
+       GMimeStream *null_stream = NULL;
+       GMimeStream *null_stream_filter = NULL;
+
+       /* Check for mislabeled Windows encoding */
+       null_stream = g_mime_stream_null_new ();
+       null_stream_filter = g_mime_stream_filter_new (null_stream);
+       windows_filter = g_mime_filter_windows_new (charset);
+       g_mime_stream_filter_add(GMIME_STREAM_FILTER (null_stream_filter),
+                                windows_filter);
+       g_mime_data_wrapper_write_to_stream (wrapper, null_stream_filter);
+       charset = g_mime_filter_windows_real_charset(
+           (GMimeFilterWindows *) windows_filter);
+
+       if (null_stream_filter)
+           g_object_unref (null_stream_filter);
+       if (null_stream)
+           g_object_unref (null_stream);
+       /* Keep a reference to windows_filter in order to prevent the
+        * charset string from deallocation. */
+    }
+
     stream_filter = g_mime_stream_filter_new (stream_out);
     crlf_filter = g_mime_filter_crlf_new (false, false);
     g_mime_stream_filter_add(GMIME_STREAM_FILTER (stream_filter),
                             crlf_filter);
     g_object_unref (crlf_filter);
 
-    charset = g_mime_object_get_content_type_parameter (part, "charset");
     if (charset) {
        GMimeFilter *charset_filter;
        charset_filter = g_mime_filter_charset_new (charset, "UTF-8");
@@ -313,11 +338,12 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
        }
     }
 
-    wrapper = g_mime_part_get_content_object (GMIME_PART (part));
     if (wrapper && stream_filter)
        g_mime_data_wrapper_write_to_stream (wrapper, stream_filter);
     if (stream_filter)
        g_object_unref(stream_filter);
+    if (windows_filter)
+       g_object_unref (windows_filter);
 }
 
 static const char*
index 4a6bfd2f3ae94caf2e9e4ba63c81dcee67308bda..1e9d2a3da010a7c5c5d27fad4e3ddbb4993154cc 100755 (executable)
@@ -45,7 +45,6 @@ output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
 test_expect_equal "$output" "thread:0000000000000005   2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)"
 
 test_begin_subtest "Mislabeled Windows-1252 encoding"
-test_subtest_known_broken
 add_message '[content-type]="text/plain; charset=iso-8859-1"'                           \
             "[body]=$'This text contains \x93Windows-1252\x94 character codes.'"
 cat <<EOF > EXPECTED