From 4083fd8bec7a34cf9c6a722b7dd511e0d31712f6 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:32 -0700 Subject: [PATCH] lib/thread-fp: factor out query expansion, rewrite in Xapian It will be convenient not to have to construct a notmuch query object when parsing subqueries, so the commit rewrites the query expansion (currently only used for thread:{} queries) using only Xapian. As a bonus it seems about 15% faster in initial experiments. --- lib/database-private.h | 16 +++++++++++++- lib/parse-sexp.cc | 2 -- lib/query.cc | 48 ++++++++++++++++++++++++++++++++++++++++++ lib/thread-fp.cc | 26 ++++++++--------------- 4 files changed, 72 insertions(+), 20 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 7ee8e62d..9ee3b933 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -40,6 +40,10 @@ #include +#if HAVE_SFSEXP +#include +#endif + /* Bit masks for _notmuch_database::features. Features are named, * independent aspects of the database schema. * @@ -313,11 +317,21 @@ notmuch_status_t _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, Xapian::Query &output); +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg); + /* regexp-fields.cc */ notmuch_status_t _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, std::string regexp_str, Xapian::Query &output, std::string &msg); -#endif +#if HAVE_SFSEXP +/* parse-sexp.cc */ +notmuch_status_t +_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, + Xapian::Query &output); +#endif +#endif #endif diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 84914296..17401f47 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent Xapian::Query &output) { if (sx->ty == SEXP_VALUE) { - std::string term = Xapian::Unicode::tolower (sx->val); - Xapian::Stem stem = *(notmuch->stemmer); std::string term_prefix = parent ? _find_prefix (parent->name) : ""; if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) { diff --git a/lib/query.cc b/lib/query.cc index 87ee18fc..b0937fcc 100644 --- a/lib/query.cc +++ b/lib/query.cc @@ -821,3 +821,51 @@ notmuch_query_get_database (const notmuch_query_t *query) { return query->notmuch; } + +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg) +{ + std::set terms; + const std::string term_prefix = _find_prefix (field); + + if (_debug_query ()) { + fprintf (stderr, "Expanding subquery:\n%s\n", + subquery.get_description ().c_str ()); + } + + try { + Xapian::Enquire enquire (*notmuch->xapian_db); + Xapian::MSet mset; + + enquire.set_weighting_scheme (Xapian::BoolWeight ()); + enquire.set_query (subquery); + + mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ()); + + for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) { + Xapian::docid doc_id = *iterator; + Xapian::Document doc = notmuch->xapian_db->get_document (doc_id); + Xapian::TermIterator i = doc.termlist_begin (); + + for (i.skip_to (term_prefix); + i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) { + terms.insert (*i); + } + } + output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + if (_debug_query ()) { + fprintf (stderr, "Expanded query:\n%s\n", + subquery.get_description ().c_str ()); + } + + } catch (const Xapian::Error &error) { + _notmuch_database_log (notmuch, + "A Xapian exception occurred expanding query: %s\n", + error.get_msg ().c_str ()); + msg = error.get_msg (); + return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + return NOTMUCH_STATUS_SUCCESS; +} diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc index 06708ef2..3aa9c423 100644 --- a/lib/thread-fp.cc +++ b/lib/thread-fp.cc @@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str) if (str.size () <= 1 || str.at (str.size () - 1) != '}') { throw Xapian::QueryParserError ("missing } in '" + str + "'"); } else { + Xapian::Query subquery; + Xapian::Query query; + std::string msg; std::string subquery_str = str.substr (1, str.size () - 2); - notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ()); - notmuch_messages_t *messages; - std::set terms; - if (! subquery) - throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str + - "'"); + status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg); + if (status) + throw Xapian::QueryParserError (msg); - status = notmuch_query_search_messages (subquery, &messages); + status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg); if (status) - throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str + - "'"); + throw Xapian::QueryParserError (msg); - for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) { - std::string term = thread_prefix; - notmuch_message_t *message; - message = notmuch_messages_get (messages); - term += _notmuch_message_get_thread_id_only (message); - terms.insert (term); - } - return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + return query; } } else { /* literal thread id */ -- 2.43.0