1 /* regexp-fields.cc - field processor glue for regex supporting fields
3 * This file is part of notmuch.
5 * Copyright © 2015 Austin Clements
6 * Copyright © 2016 David Bremner
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see https://www.gnu.org/licenses/ .
21 * Author: Austin Clements <aclements@csail.mit.edu>
22 * David Bremner <david@tethera.net>
25 #include "regexp-fields.h"
26 #include "notmuch-private.h"
27 #include "database-private.h"
29 #if HAVE_XAPIAN_FIELD_PROCESSOR
31 compile_regex (regex_t ®exp, const char *str)
33 int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
36 size_t len = regerror (err, ®exp, NULL, 0);
37 char *buffer = new char[len];
39 (void) regerror (err, ®exp, buffer, len);
40 msg.assign (buffer, len);
43 throw Xapian::QueryParserError (msg);
47 RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
50 compile_regex (regexp_, regexp.c_str ());
53 RegexpPostingSource::~RegexpPostingSource ()
59 RegexpPostingSource::init (const Xapian::Database &db)
62 it_ = db_.valuestream_begin (slot_);
63 end_ = db.valuestream_end (slot_);
68 RegexpPostingSource::get_termfreq_min () const
74 RegexpPostingSource::get_termfreq_est () const
76 return get_termfreq_max () / 2;
80 RegexpPostingSource::get_termfreq_max () const
82 return db_.get_value_freq (slot_);
86 RegexpPostingSource::get_docid () const
88 return it_.get_docid ();
92 RegexpPostingSource::at_end () const
98 RegexpPostingSource::next (unused (double min_wt))
100 if (started_ && ! at_end ())
104 for (; ! at_end (); ++it_) {
105 std::string value = *it_;
106 if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
112 RegexpPostingSource::skip_to (Xapian::docid did, unused (double min_wt))
116 for (; ! at_end (); ++it_) {
117 std::string value = *it_;
118 if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
124 RegexpPostingSource::check (Xapian::docid did, unused (double min_wt))
127 if (!it_.check (did) || at_end ())
129 return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0);
132 static inline Xapian::valueno _find_slot (std::string prefix)
134 if (prefix == "from")
135 return NOTMUCH_VALUE_FROM;
136 else if (prefix == "subject")
137 return NOTMUCH_VALUE_SUBJECT;
139 throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
142 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
143 : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
144 parser (parser_), notmuch (notmuch_)
149 RegexpFieldProcessor::operator() (const std::string & str)
151 if (str.at (0) == '/') {
152 if (str.at (str.size () - 1) == '/'){
153 RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
154 return Xapian::Query (postings->release ());
156 throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
159 /* TODO replace this with a nicer API level triggering of
160 * phrase parsing, when possible */
161 std::string quoted='"' + str + '"';
162 return parser.parse_query (quoted, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);