diff options
| author | David Bremner <david@tethera.net> | 2021-08-24 08:17:22 -0700 |
|---|---|---|
| committer | David Bremner <david@tethera.net> | 2021-09-04 17:07:19 -0700 |
| commit | 90d9c2ad5c459624d17f92d0844e7a7fbb87d7a2 (patch) | |
| tree | 3c511cbadac8d876127f15102d73f4c2d12a598e /lib | |
| parent | 48ad0e1ff350a35dd0af6a1892edf27aa5115927 (diff) | |
lib/parse-sexp: support phrase queries.
Anything that is quoted or not purely word characters is considered a
phrase. Phrases are not stemmed, because the stems do not have
positional information in the database. It is less efficient to scan
the term twice, but it avoids a second pass to add prefixes, so maybe
it balances out. In any case, it seems unlikely query parsing is very
often a bottleneck.
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/parse-sexp.cc | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 25556058..0917f505 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -2,7 +2,7 @@ #if HAVE_SFSEXP #include "sexp.h" - +#include "unicode-util.h" /* _sexp is used for file scope symbols to avoid clashing with * definitions from sexp.h */ @@ -67,6 +67,36 @@ _sexp_combine_query (notmuch_database_t *notmuch, sx->next, output); } +static notmuch_status_t +_sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output) +{ + Xapian::Utf8Iterator p (phrase); + Xapian::Utf8Iterator end; + std::vector<std::string> terms; + + while (p != end) { + Xapian::Utf8Iterator start; + while (p != end && ! Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p == end) + break; + + start = p; + + while (p != end && Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p != start) { + std::string word (start, p); + word = Xapian::Unicode::tolower (word); + terms.push_back (term_prefix + word); + } + } + output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ()); + return NOTMUCH_STATUS_SUCCESS; +} + /* Here we expect the s-expression to be a proper list, with first * element defining and operation, or as a special case the empty * list */ @@ -80,13 +110,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent std::string term = Xapian::Unicode::tolower (sx->val); Xapian::Stem stem = *(notmuch->stemmer); std::string term_prefix = parent ? _find_prefix (parent->name) : ""; - if (sx->aty == SEXP_BASIC) - term = "Z" + term_prefix + stem (term); - else - term = term_prefix + term; - - output = Xapian::Query (term); - return NOTMUCH_STATUS_SUCCESS; + if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) { + output = Xapian::Query ("Z" + term_prefix + stem (term)); + return NOTMUCH_STATUS_SUCCESS; + } else { + return _sexp_parse_phrase (term_prefix, sx->val, output); + } } /* Empty list */ |
