aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDavid Bremner <david@tethera.net>2023-01-05 20:02:06 -0400
committerDavid Bremner <david@tethera.net>2023-04-02 19:24:43 -0300
commita554690d6af0ac8cb55166a20efd0f449abde389 (patch)
treeb852e9fe4b19ad6dbf3d49b731d82253151b8c5a /test
parent3f5809bf28becbddfed9ff33d6f1242346904c23 (diff)
lib: index attachments with mime types matching index.as_text
Instead of skipping indexing all attachments, we check of a (user configured) mime type that is indexable as text.
Diffstat (limited to 'test')
-rwxr-xr-xtest/T050-new.sh8
-rwxr-xr-xtest/T760-as-text.sh77
-rw-r--r--test/corpora/indexing/fake-pdf:2,S11
3 files changed, 88 insertions, 8 deletions
diff --git a/test/T050-new.sh b/test/T050-new.sh
index 09c2bfc6..52888be2 100755
--- a/test/T050-new.sh
+++ b/test/T050-new.sh
@@ -470,12 +470,4 @@ Date: Fri, 17 Jun 2016 22:14:41 -0400
EOF
test_expect_equal_file EXPECTED OUTPUT
-add_email_corpus indexing
-
-test_begin_subtest "index text/* attachments"
-test_subtest_known_broken
-notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED
-notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT
-test_expect_equal_file_nonempty EXPECTED OUTPUT
-
test_done
diff --git a/test/T760-as-text.sh b/test/T760-as-text.sh
new file mode 100755
index 00000000..744567f2
--- /dev/null
+++ b/test/T760-as-text.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+test_description='index attachments as text'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+add_email_corpus indexing
+test_begin_subtest "empty as_text; skip text/x-diff"
+messages=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain)
+count=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz)
+test_expect_equal "$messages,$count" "1,0"
+
+notmuch config set index.as_text "^text/"
+add_email_corpus indexing
+
+test_begin_subtest "as_index is text/; find text/x-diff"
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "reindex with empty as_text, skips text/x-diff"
+notmuch config set index.as_text
+notmuch reindex '*'
+messages=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain)
+count=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz)
+test_expect_equal "$messages,$count" "1,0"
+
+test_begin_subtest "reindex with empty as_text; skips application/pdf"
+notmuch config set index.as_text
+notmuch reindex '*'
+gmessages=$(notmuch count id:871qo9p4tf.fsf@tethera.net)
+count=$(notmuch count id:871qo9p4tf.fsf@tethera.net and body:not-really-PDF)
+test_expect_equal "$messages,$count" "1,0"
+
+test_begin_subtest "reindex with as_text as text/; finds text/x-diff"
+notmuch config set index.as_text "^text/"
+notmuch reindex '*'
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "reindex with as_text as text/; skips application/pdf"
+notmuch config set index.as_text "^text/"
+notmuch config set index.as_text
+notmuch reindex '*'
+messages=$(notmuch count id:871qo9p4tf.fsf@tethera.net)
+count=$(notmuch count id:871qo9p4tf.fsf@tethera.net and body:not-really-PDF)
+test_expect_equal "$messages,$count" "1,0"
+
+test_begin_subtest "as_text has multiple regexes"
+notmuch config set index.as_text "blahblah;^text/"
+notmuch reindex '*'
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "as_text is non-anchored regex"
+notmuch config set index.as_text "e.t/"
+notmuch reindex '*'
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED
+notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "as_text is 'application/pdf'"
+notmuch config set index.as_text "^application/pdf$"
+notmuch reindex '*'
+notmuch search id:871qo9p4tf.fsf@tethera.net > EXPECTED
+notmuch search id:871qo9p4tf.fsf@tethera.net and '"not really PDF"' > OUTPUT
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "as_text is bad regex"
+notmuch config set index.as_text '['
+notmuch reindex '*' >& OUTPUT
+cat<<EOF > EXPECTED
+Error in index.as_text: Invalid regular expression: [
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
diff --git a/test/corpora/indexing/fake-pdf:2,S b/test/corpora/indexing/fake-pdf:2,S
new file mode 100644
index 00000000..60a7a47f
--- /dev/null
+++ b/test/corpora/indexing/fake-pdf:2,S
@@ -0,0 +1,11 @@
+From: David Bremner <david@tethera.net>
+To: example@example.com
+Subject: attachment content type
+Date: Thu, 05 Jan 2023 08:02:36 -0400
+Message-ID: <871qo9p4tf.fsf@tethera.net>
+MIME-Version: 1.0
+Content-Type: application/pdf
+Content-Disposition: attachment; filename=fake.pdf
+Content-Transfer-Encoding: base64
+
+dGhpcyBpcyBub3QgcmVhbGx5IFBERgo= \ No newline at end of file