From 200e164dc7acc3f19c6fe39164c472985e1ba384 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:20 -0700 Subject: [PATCH] lib/parse-sexp: support subject field The broken tests are because we do not yet handle phrase searches. --- doc/man7/notmuch-sexp-queries.rst | 62 +++++++++++++++++++++++++++++-- lib/parse-sexp.cc | 19 +++++++++- test/T081-sexpr-search.sh | 57 ++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 5 deletions(-) diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst index 0304759e..08e97cc3 100644 --- a/doc/man7/notmuch-sexp-queries.rst +++ b/doc/man7/notmuch-sexp-queries.rst @@ -36,9 +36,8 @@ An s-expression query is either an atom, the empty list, or a a *field*, *logical operation*, or *modifier*, and 0 or more subqueries. -``*`` -``()`` - The empty list matches all messages +``*`` ``()`` + Match all messages. *term* Match all messages containing *term*, possibly after @@ -64,6 +63,59 @@ subqueries. FIELDS `````` +*Fields* (also called *prefixes* in notmuch documentation) +correspond to attributes of mail messages. Some are inherent (and +immutable) like ``subject``, while others ``tag`` and ``property`` are +settable by the user. Each concrete field in +:any:`the table below ` +is discussed further under "Search prefixes" in +:any:`notmuch-search-terms(7)`. The row *user* refers to user defined +fields, described in :any:`notmuch-config(1)`. + +.. _field-table: + +.. table:: Fields with supported modifiers + + +------------+-----------+-----------+-----------+-----------+----------+ + | field | combine | type | expand | wildcard | regex | + +============+===========+===========+===========+===========+==========+ + | *none* | and | | no | yes | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | *user* | and | phrase | no | yes | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | attachment | and | phrase | yes | yes | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | body | and | phrase | no | no | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | date | | range | no | no | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | folder | or | phrase | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | from | and | phrase | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | id | or | term | no | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | is | and | term | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | lastmod | | range | no | no | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | mid | or | term | no | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | mimetype | or | phrase | yes | yes | no | + +------------+-----------+-----------+-----------+-----------+----------+ + | path | or | term | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | property | and | term | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | subject | and | phrase | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | tag | and | term | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | thread | or | term | yes | yes | yes | + +------------+-----------+-----------+-----------+-----------+----------+ + | to | and | phrase | yes | yes | no | + +------------+-----------+-----------+-----------+-----------+----------+ + .. _modifiers: MODIFIERS @@ -86,6 +138,10 @@ EXAMPLES ``(not Bob Marley)`` Match messages containing neither "Bob" nor "Marley", nor their stems, +``(subject quick "brown fox")`` + Match messages whose subject contains "quick" (anywhere, stemmed) and + the phrase "brown fox". + .. |q1| replace:: :math:`q_1` .. |q2| replace:: :math:`q_2` .. |qn| replace:: :math:`q_n` diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 0d2c0ba8..25556058 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -8,7 +8,8 @@ * definitions from sexp.h */ typedef enum { - SEXP_FLAG_NONE = 0, + SEXP_FLAG_NONE = 0, + SEXP_FLAG_FIELD = 1 << 0, } _sexp_flag_t; typedef struct { @@ -26,6 +27,8 @@ static _sexp_prefix_t prefixes[] = SEXP_FLAG_NONE }, { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, SEXP_FLAG_NONE }, + { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, { } }; @@ -76,8 +79,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent if (sx->ty == SEXP_VALUE) { std::string term = Xapian::Unicode::tolower (sx->val); Xapian::Stem stem = *(notmuch->stemmer); + std::string term_prefix = parent ? _find_prefix (parent->name) : ""; if (sx->aty == SEXP_BASIC) - term = "Z" + stem (term); + term = "Z" + term_prefix + stem (term); + else + term = term_prefix + term; output = Xapian::Query (term); return NOTMUCH_STATUS_SUCCESS; @@ -97,6 +103,15 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) { if (strcmp (prefix->name, sx->list->val) == 0) { + if (prefix->flags & SEXP_FLAG_FIELD) { + if (parent) { + _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n", + prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + parent = prefix; + } + return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, sx->list->next, output); } diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index 5e1bb18d..90cef50c 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -62,6 +62,55 @@ test_begin_subtest "single term in body, unstemmed version" notmuch search --query=sexp '"arriv"' > OUTPUT test_expect_equal_file /dev/null OUTPUT +test_begin_subtest "Search by 'subject'" +add_message [subject]=subjectsearchtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp '(subject subjectsearchtest)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)" + +test_begin_subtest "Search by 'subject' (case insensitive)" +notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(subject "Maildir")' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'subject' (utf-8):" +add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, and):" +output=$(notmuch search --query=sexp '(subject (and utf8 sübjéct))' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, and outside):" +output=$(notmuch search --query=sexp '(and (subject utf8) (subject sübjéct))' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, or):" +notmuch search --query=sexp '(subject (or utf8 subjectsearchtest))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread) +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'subject' (utf-8, or outside):" +notmuch search --query=sexp '(or (subject utf8) (subject subjectsearchtest))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread) +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'subject' (utf-8, phrase-token):" +test_subtest_known_broken +output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + +test_begin_subtest "Search by 'subject' (utf-8, quoted string):" +test_subtest_known_broken +output=$(notmuch search --query=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" + test_begin_subtest "Unbalanced parens" # A code 1 indicates the error was handled (a crash will return e.g. 139). test_expect_code 1 "notmuch search --query=sexp '('" @@ -90,4 +139,12 @@ unexpected list in field/operation position EOF test_expect_equal_file EXPECTED OUTPUT +test_begin_subtest "illegal nesting" +notmuch search --query=sexp '(subject (subject foo))' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +nested field: 'subject' inside 'subject' +EOF +test_expect_equal_file EXPECTED OUTPUT + test_done -- 2.43.0