diff options
| author | David Bremner <david@tethera.net> | 2023-01-05 20:02:06 -0400 |
|---|---|---|
| committer | David Bremner <david@tethera.net> | 2023-04-02 19:24:43 -0300 |
| commit | a554690d6af0ac8cb55166a20efd0f449abde389 (patch) | |
| tree | b852e9fe4b19ad6dbf3d49b731d82253151b8c5a /test | |
| parent | 3f5809bf28becbddfed9ff33d6f1242346904c23 (diff) | |
lib: index attachments with mime types matching index.as_text
Instead of skipping indexing all attachments, we check of a (user
configured) mime type that is indexable as text.
Diffstat (limited to 'test')
| -rwxr-xr-x | test/T050-new.sh | 8 | ||||
| -rwxr-xr-x | test/T760-as-text.sh | 77 | ||||
| -rw-r--r-- | test/corpora/indexing/fake-pdf:2,S | 11 |
3 files changed, 88 insertions, 8 deletions
diff --git a/test/T050-new.sh b/test/T050-new.sh index 09c2bfc6..52888be2 100755 --- a/test/T050-new.sh +++ b/test/T050-new.sh @@ -470,12 +470,4 @@ Date: Fri, 17 Jun 2016 22:14:41 -0400 EOF test_expect_equal_file EXPECTED OUTPUT -add_email_corpus indexing - -test_begin_subtest "index text/* attachments" -test_subtest_known_broken -notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED -notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT -test_expect_equal_file_nonempty EXPECTED OUTPUT - test_done diff --git a/test/T760-as-text.sh b/test/T760-as-text.sh new file mode 100755 index 00000000..744567f2 --- /dev/null +++ b/test/T760-as-text.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +test_description='index attachments as text' +. $(dirname "$0")/test-lib.sh || exit 1 + +add_email_corpus indexing +test_begin_subtest "empty as_text; skip text/x-diff" +messages=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain) +count=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz) +test_expect_equal "$messages,$count" "1,0" + +notmuch config set index.as_text "^text/" +add_email_corpus indexing + +test_begin_subtest "as_index is text/; find text/x-diff" +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT +test_expect_equal_file_nonempty EXPECTED OUTPUT + +test_begin_subtest "reindex with empty as_text, skips text/x-diff" +notmuch config set index.as_text +notmuch reindex '*' +messages=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain) +count=$(notmuch count id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz) +test_expect_equal "$messages,$count" "1,0" + +test_begin_subtest "reindex with empty as_text; skips application/pdf" +notmuch config set index.as_text +notmuch reindex '*' +gmessages=$(notmuch count id:871qo9p4tf.fsf@tethera.net) +count=$(notmuch count id:871qo9p4tf.fsf@tethera.net and body:not-really-PDF) +test_expect_equal "$messages,$count" "1,0" + +test_begin_subtest "reindex with as_text as text/; finds text/x-diff" +notmuch config set index.as_text "^text/" +notmuch reindex '*' +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT +test_expect_equal_file_nonempty EXPECTED OUTPUT + +test_begin_subtest "reindex with as_text as text/; skips application/pdf" +notmuch config set index.as_text "^text/" +notmuch config set index.as_text +notmuch reindex '*' +messages=$(notmuch count id:871qo9p4tf.fsf@tethera.net) +count=$(notmuch count id:871qo9p4tf.fsf@tethera.net and body:not-really-PDF) +test_expect_equal "$messages,$count" "1,0" + +test_begin_subtest "as_text has multiple regexes" +notmuch config set index.as_text "blahblah;^text/" +notmuch reindex '*' +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT +test_expect_equal_file_nonempty EXPECTED OUTPUT + +test_begin_subtest "as_text is non-anchored regex" +notmuch config set index.as_text "e.t/" +notmuch reindex '*' +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain > EXPECTED +notmuch search id:20200930101213.2m2pt3jrspvcrxfx@localhost.localdomain and ersatz > OUTPUT +test_expect_equal_file_nonempty EXPECTED OUTPUT + +test_begin_subtest "as_text is 'application/pdf'" +notmuch config set index.as_text "^application/pdf$" +notmuch reindex '*' +notmuch search id:871qo9p4tf.fsf@tethera.net > EXPECTED +notmuch search id:871qo9p4tf.fsf@tethera.net and '"not really PDF"' > OUTPUT +test_expect_equal_file_nonempty EXPECTED OUTPUT + +test_begin_subtest "as_text is bad regex" +notmuch config set index.as_text '[' +notmuch reindex '*' >& OUTPUT +cat<<EOF > EXPECTED +Error in index.as_text: Invalid regular expression: [ +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_done diff --git a/test/corpora/indexing/fake-pdf:2,S b/test/corpora/indexing/fake-pdf:2,S new file mode 100644 index 00000000..60a7a47f --- /dev/null +++ b/test/corpora/indexing/fake-pdf:2,S @@ -0,0 +1,11 @@ +From: David Bremner <david@tethera.net> +To: example@example.com +Subject: attachment content type +Date: Thu, 05 Jan 2023 08:02:36 -0400 +Message-ID: <871qo9p4tf.fsf@tethera.net> +MIME-Version: 1.0 +Content-Type: application/pdf +Content-Disposition: attachment; filename=fake.pdf +Content-Transfer-Encoding: base64 + +dGhpcyBpcyBub3QgcmVhbGx5IFBERgo=
\ No newline at end of file |
