From 11d47950c18f2d19718e35b7264dabf2ff2fd621 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 7 Mar 2017 22:32:26 -0400 Subject: [PATCH] lib: Add regexp expansion for for tags and paths From a UI perspective this looks similar to what was already provided for from, subject, and mid, but the implementation is quite different. It uses the database's list of terms to construct a term based query equivalent to the passed regular expression. --- lib/database.cc | 12 +++++--- lib/regexp-fields.cc | 32 +++++++++++++++++---- test/T650-regexp-query.sh | 58 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 89 insertions(+), 13 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 49b3849c..5b13f541 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -259,12 +259,15 @@ prefix_t prefix_table[] = { { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "thread", "G", NOTMUCH_FIELD_EXTERNAL }, - { "tag", "K", NOTMUCH_FIELD_EXTERNAL }, - { "is", "K", NOTMUCH_FIELD_EXTERNAL }, + { "tag", "K", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, + { "is", "K", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, { "id", "Q", NOTMUCH_FIELD_EXTERNAL }, { "mid", "Q", NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, - { "path", "P", NOTMUCH_FIELD_EXTERNAL }, + { "path", "P", NOTMUCH_FIELD_EXTERNAL| + NOTMUCH_FIELD_PROCESSOR }, { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL }, /* * Unconditionally add ':' to reduce potential ambiguity with @@ -272,7 +275,8 @@ prefix_t prefix_table[] = { * letters. See Xapian document termprefixes.html for related * discussion. */ - { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL }, + { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, #if HAVE_XAPIAN_FIELD_PROCESSOR { "date", NULL, NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 7ae55e70..084bc8c0 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix) else if (prefix == "mid") return NOTMUCH_VALUE_MESSAGE_ID; else - throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'"); + return Xapian::BAD_VALUENO; } RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, @@ -156,15 +156,35 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { - if (str.size () == 0) - return Xapian::Query(Xapian::Query::OP_AND_NOT, + if (str.empty ()) { + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + return Xapian::Query(Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + } else { + return Xapian::Query (term_prefix); + } + } if (str.at (0) == '/') { - if (str.at (str.size () - 1) == '/'){ - RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2)); - return Xapian::Query (postings->release ()); + if (str.length() > 1 && str.at (str.size () - 1) == '/'){ + std::string regexp_str = str.substr(1,str.size () - 2); + if (slot != Xapian::BAD_VALUENO) { + RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); + return Xapian::Query (postings->release ()); + } else { + std::vector terms; + regex_t regexp; + + compile_regex(regexp, regexp_str.c_str ()); + for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); + it != notmuch->xapian_db->allterms_end (); ++it) { + if (regexec (®exp, (*it).c_str () + term_prefix.size(), + 0, NULL, 0) == 0) + terms.push_back(*it); + } + return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end()); + } } else { throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh index 27fc9ab9..b7bdda11 100755 --- a/test/T650-regexp-query.sh +++ b/test/T650-regexp-query.sh @@ -2,13 +2,54 @@ test_description='regular expression searches' . ./test-lib.sh || exit 1 -add_email_corpus - - if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then test_done fi +add_message '[dir]=bad' '[subject]="To the bone"' +add_message '[dir]=.' '[subject]="Top level"' +add_message '[dir]=bad/news' '[subject]="Bears"' +mkdir -p "${MAIL_DIR}/duplicate/bad/news" +cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news" + +add_message '[dir]=things' '[subject]="These are a few"' +add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"' +add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"' + +test_begin_subtest "empty path:// search" +notmuch search 'path:""' > EXPECTED +notmuch search 'path:/^$/' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "empty folder:// search" +notmuch search 'folder:""' > EXPECTED +notmuch search 'folder:/^$/' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "unanchored folder:// specification" +output=$(notmuch search folder:/bad/ | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" + +test_begin_subtest "anchored folder:// search" +output=$(notmuch search 'folder:/^bad$/' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)" + +test_begin_subtest "unanchored path:// specification" +output=$(notmuch search path:/bad/ | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" + +test_begin_subtest "anchored path:// search" +output=$(notmuch search 'path:/^bad$/' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)" + +# Use "standard" corpus from here on. +rm -rf $MAIL_DIR +add_email_corpus + notmuch search --output=messages from:cworth > cworth.msg-ids # these headers will generate no document terms @@ -120,4 +161,15 @@ thread:XXX 2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older ver EOF test_expect_equal_file EXPECTED OUTPUT +test_begin_subtest "unanchored tag search" +notmuch search tag:signed or tag:inbox > EXPECTED +notmuch search tag:/i/ > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +notmuch tag +testsi '*' +test_begin_subtest "anchored tag search" +notmuch search tag:signed > EXPECTED +notmuch search tag:/^si/ > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + test_done -- 2.43.0