X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=lib%2Fregexp-fields.cc;h=0feb50e586ba84f1c4ef84cb62e78ea679b472c3;hp=1651677cd8638b759a9f96838237a98aa2ef5af2;hb=HEAD;hpb=a34f30888e7874fba2032a066a7babce1dd3f69f diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 1651677c..3a775261 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -25,29 +25,34 @@ #include "regexp-fields.h" #include "notmuch-private.h" #include "database-private.h" +#include "xapian-extra.h" -#if HAVE_XAPIAN_FIELD_PROCESSOR -static void -compile_regex (regex_t ®exp, const char *str) +notmuch_status_t +compile_regex (regex_t ®exp, const char *str, std::string &msg) { int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB); if (err != 0) { size_t len = regerror (err, ®exp, NULL, 0); char *buffer = new char[len]; - std::string msg; + msg = "Regexp error: "; (void) regerror (err, ®exp, buffer, len); - msg.assign (buffer, len); + msg.append (buffer, len); delete[] buffer; - throw Xapian::QueryParserError (msg); + return NOTMUCH_STATUS_ILLEGAL_ARGUMENT; } + return NOTMUCH_STATUS_SUCCESS; } RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp) : slot_ (slot) { - compile_regex (regexp_, regexp.c_str ()); + std::string msg; + notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg); + + if (status) + throw Xapian::QueryParserError (msg); } RegexpPostingSource::~RegexpPostingSource () @@ -124,53 +129,124 @@ bool RegexpPostingSource::check (Xapian::docid did, unused (double min_wt)) { started_ = true; - if (!it_.check (did) || at_end ()) + if (! it_.check (did) || at_end ()) return false; return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0); } -static inline Xapian::valueno _find_slot (std::string prefix) +static inline Xapian::valueno +_find_slot (std::string prefix) { if (prefix == "from") return NOTMUCH_VALUE_FROM; else if (prefix == "subject") return NOTMUCH_VALUE_SUBJECT; + else if (prefix == "mid") + return NOTMUCH_VALUE_MESSAGE_ID; else - throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'"); + return Xapian::BAD_VALUENO; } -RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_) - : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())), - parser (parser_), notmuch (notmuch_) +RegexpFieldProcessor::RegexpFieldProcessor (std::string field_, + notmuch_field_flag_t options_, + Xapian::QueryParser &parser_, + notmuch_database_t *notmuch_) + : slot (_find_slot (field_)), + field (field_), + term_prefix (_find_prefix (field_.c_str ())), + options (options_), + parser (parser_), + notmuch (notmuch_) { }; +notmuch_status_t +_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, + std::string regexp_str, + Xapian::Query &output, std::string &msg) +{ + regex_t regexp; + notmuch_status_t status; + + status = compile_regex (regexp, regexp_str.c_str (), msg); + if (status) { + _notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ()); + return status; + } + + if (slot == Xapian::BAD_VALUENO) + slot = _find_slot (field); + + if (slot == Xapian::BAD_VALUENO) { + std::string term_prefix = _find_prefix (field.c_str ()); + std::vector terms; + + for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); + it != notmuch->xapian_db->allterms_end (); ++it) { + if (regexec (®exp, (*it).c_str () + term_prefix.size (), + 0, NULL, 0) == 0) + terms.push_back (*it); + } + output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + } else { + RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); + output = Xapian::Query (postings->release ()); + } + return NOTMUCH_STATUS_SUCCESS; +} + Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { - if (str.size () == 0) - return Xapian::Query(Xapian::Query::OP_AND_NOT, - Xapian::Query::MatchAll, - Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + if (str.empty ()) { + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + return Xapian::Query (Xapian::Query::OP_AND_NOT, + xapian_query_match_all (), + Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + } else { + return Xapian::Query (term_prefix); + } + } if (str.at (0) == '/') { - if (str.at (str.size () - 1) == '/'){ - RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2)); - return Xapian::Query (postings->release ()); + if (str.length () > 1 && str.at (str.size () - 1) == '/') { + Xapian::Query query; + std::string regexp_str = str.substr (1, str.size () - 2); + std::string msg; + notmuch_status_t status; + + status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg); + if (status) + throw Xapian::QueryParserError (msg); + return query; } else { throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } } else { - /* TODO replace this with a nicer API level triggering of - * phrase parsing, when possible */ - std::string query_str; + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + /* TODO replace this with a nicer API level triggering of + * phrase parsing, when possible */ + std::string query_str; + + if ((str.at (0) != '(' || *str.rbegin () != ')') && + (*str.rbegin () != '*' || str.find (' ') != std::string::npos)) + query_str = '"' + str + '"'; + else + query_str = str; + + return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix); + } else { + /* Boolean prefix */ + std::string query_str; + std::string term; - if (str.find (' ') != std::string::npos) - query_str = '"' + str + '"'; - else - query_str = str; + if (str.length () > 1 && str.at (str.size () - 1) == '/') + query_str = str.substr (0, str.size () - 1); + else + query_str = str; - return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix); + term = term_prefix + query_str; + return Xapian::Query (term); + } } } -#endif