aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Bremner <david@tethera.net>2021-08-24 08:17:22 -0700
committerDavid Bremner <david@tethera.net>2021-09-04 17:07:19 -0700
commit90d9c2ad5c459624d17f92d0844e7a7fbb87d7a2 (patch)
tree3c511cbadac8d876127f15102d73f4c2d12a598e /lib
parent48ad0e1ff350a35dd0af6a1892edf27aa5115927 (diff)
lib/parse-sexp: support phrase queries.
Anything that is quoted or not purely word characters is considered a phrase. Phrases are not stemmed, because the stems do not have positional information in the database. It is less efficient to scan the term twice, but it avoids a second pass to add prefixes, so maybe it balances out. In any case, it seems unlikely query parsing is very often a bottleneck.
Diffstat (limited to 'lib')
-rw-r--r--lib/parse-sexp.cc45
1 files changed, 37 insertions, 8 deletions
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 25556058..0917f505 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -2,7 +2,7 @@
#if HAVE_SFSEXP
#include "sexp.h"
-
+#include "unicode-util.h"
/* _sexp is used for file scope symbols to avoid clashing with
* definitions from sexp.h */
@@ -67,6 +67,36 @@ _sexp_combine_query (notmuch_database_t *notmuch,
sx->next, output);
}
+static notmuch_status_t
+_sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output)
+{
+ Xapian::Utf8Iterator p (phrase);
+ Xapian::Utf8Iterator end;
+ std::vector<std::string> terms;
+
+ while (p != end) {
+ Xapian::Utf8Iterator start;
+ while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p == end)
+ break;
+
+ start = p;
+
+ while (p != end && Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p != start) {
+ std::string word (start, p);
+ word = Xapian::Unicode::tolower (word);
+ terms.push_back (term_prefix + word);
+ }
+ }
+ output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ());
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -80,13 +110,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
- if (sx->aty == SEXP_BASIC)
- term = "Z" + term_prefix + stem (term);
- else
- term = term_prefix + term;
-
- output = Xapian::Query (term);
- return NOTMUCH_STATUS_SUCCESS;
+ if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
+ output = Xapian::Query ("Z" + term_prefix + stem (term));
+ return NOTMUCH_STATUS_SUCCESS;
+ } else {
+ return _sexp_parse_phrase (term_prefix, sx->val, output);
+ }
}
/* Empty list */