- from:<name-or-address>
+- from:/<regex>/
+
- to:<name-or-address>
- subject:<word-or-quoted-phrase>
+- subject:/<regex>/
+
- attachment:<word>
- mimetype:<word>
by including quotation marks around the phrase, immediately following
**subject:**.
+If notmuch is built with **Xapian Field Processors** (see below) the
+**from:** and **subject** prefix can be also used to restrict the
+results to those whose from/subject value matches a regular expression
+(see **regex(7)**) delimited with //.
+
+::
+
+ notmuch search 'from:/bob@.*[.]example[.]com/'
+
The **attachment:** prefix can be used to search for specific filenames
(or extensions) of attachments to email messages.
----------------------------------
Xapian (and hence notmuch) prefixes are either **boolean**, supporting
-exact matches like "tag:inbox" or **probabilistic**, supporting a more flexible **term** based searching. The prefixes currently supported by notmuch are as follows.
-
+exact matches like "tag:inbox" or **probabilistic**, supporting a more
+flexible **term** based searching. Certain **special** prefixes are
+processed by notmuch in a way not stricly fitting either of Xapian's
+built in styles. The prefixes currently supported by notmuch are as
+follows.
Boolean
**tag:**, **id:**, **thread:**, **folder:**, **path:**, **property:**
Probabilistic
- **from:**, **to:**, **subject:**, **attachment:**, **mimetype:**
+ **to:**, **attachment:**, **mimetype:**
+Special
+ **from:**, **query:**, **subject:**
Terms and phrases
-----------------
- non-range date queries, e.g. "date:today"
- named queries e.g. "query:my_special_query"
+- regular expression searches, e.g. "subject:/^\\[SPAM\\]/"
SEE ALSO
========
$(dir)/query.cc \
$(dir)/query-fp.cc \
$(dir)/config.cc \
+ $(dir)/regexp-fields.cc \
$(dir)/thread.cc
libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
#include "database-private.h"
#include "parse-time-vrp.h"
#include "query-fp.h"
+#include "regexp-fields.h"
#include "string-util.h"
#include <iostream>
NOTMUCH_FIELD_PROCESSOR },
#endif
{ "from", "XFROM", NOTMUCH_FIELD_EXTERNAL |
- NOTMUCH_FIELD_PROBABILISTIC },
+ NOTMUCH_FIELD_PROBABILISTIC |
+ NOTMUCH_FIELD_PROCESSOR },
{ "to", "XTO", NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROBABILISTIC },
{ "attachment", "XATTACHMENT", NOTMUCH_FIELD_EXTERNAL |
{ "mimetype", "XMIMETYPE", NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROBABILISTIC },
{ "subject", "XSUBJECT", NOTMUCH_FIELD_EXTERNAL |
- NOTMUCH_FIELD_PROBABILISTIC },
+ NOTMUCH_FIELD_PROBABILISTIC |
+ NOTMUCH_FIELD_PROCESSOR},
};
static void
else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
else
- INTERNAL_ERROR("unsupported field processor prefix: %s\n", prefix->name);
+ fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release ();
/* we treat all field-processor fields as boolean in order to get the raw input */
notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
--- /dev/null
+/* regexp-fields.cc - field processor glue for regex supporting fields
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2015 Austin Clements
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Austin Clements <aclements@csail.mit.edu>
+ * David Bremner <david@tethera.net>
+ */
+
+#include "regexp-fields.h"
+#include "notmuch-private.h"
+#include "database-private.h"
+
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+static void
+compile_regex (regex_t ®exp, const char *str)
+{
+ int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
+
+ if (err != 0) {
+ size_t len = regerror (err, ®exp, NULL, 0);
+ char *buffer = new char[len];
+ std::string msg;
+ (void) regerror (err, ®exp, buffer, len);
+ msg.assign (buffer, len);
+ delete buffer;
+
+ throw Xapian::QueryParserError (msg);
+ }
+}
+
+RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
+ : slot_ (slot)
+{
+ compile_regex (regexp_, regexp.c_str ());
+}
+
+RegexpPostingSource::~RegexpPostingSource ()
+{
+ regfree (®exp_);
+}
+
+void
+RegexpPostingSource::init (const Xapian::Database &db)
+{
+ db_ = db;
+ it_ = db_.valuestream_begin (slot_);
+ end_ = db.valuestream_end (slot_);
+ started_ = false;
+
+ /* make sure we start on a matching value */
+ while (!at_end() && regexec (®exp_, (*it_).c_str (), 0, NULL, 0) != 0) {
+ ++it_;
+ }
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_min () const
+{
+ return 0;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_est () const
+{
+ return get_termfreq_max () / 2;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_max () const
+{
+ return db_.get_value_freq (slot_);
+}
+
+Xapian::docid
+RegexpPostingSource::get_docid () const
+{
+ return it_.get_docid ();
+}
+
+bool
+RegexpPostingSource::at_end () const
+{
+ return it_ == end_;
+}
+
+void
+RegexpPostingSource::next (unused (double min_wt))
+{
+ if (started_ && ! at_end ())
+ ++it_;
+ started_ = true;
+
+ for (; ! at_end (); ++it_) {
+ std::string value = *it_;
+ if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
+ break;
+ }
+}
+
+static inline Xapian::valueno _find_slot (std::string prefix)
+{
+ if (prefix == "from")
+ return NOTMUCH_VALUE_FROM;
+ else if (prefix == "subject")
+ return NOTMUCH_VALUE_SUBJECT;
+ else
+ throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+}
+
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
+ : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
+ parser (parser_), notmuch (notmuch_)
+{
+};
+
+Xapian::Query
+RegexpFieldProcessor::operator() (const std::string & str)
+{
+ if (str.at (0) == '/') {
+ if (str.at (str.size () - 1) == '/'){
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
+ return Xapian::Query (postings->release ());
+ } else {
+ throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
+ }
+ } else {
+ /* TODO replace this with a nicer API level triggering of
+ * phrase parsing, when possible */
+ std::string quoted='"' + str + '"';
+ return parser.parse_query (quoted, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+ }
+}
+#endif
--- /dev/null
+/* regex-fields.h - xapian glue for semi-bruteforce regexp search
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2015 Austin Clements
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Austin Clements <aclements@csail.mit.edu>
+ * David Bremner <david@tethera.net>
+ */
+
+#ifndef NOTMUCH_REGEXP_FIELDS_H
+#define NOTMUCH_REGEXP_FIELDS_H
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+#include <sys/types.h>
+#include <regex.h>
+#include "database-private.h"
+#include "notmuch-private.h"
+
+/* A posting source that returns documents where a value matches a
+ * regexp.
+ */
+class RegexpPostingSource : public Xapian::PostingSource
+{
+ protected:
+ const Xapian::valueno slot_;
+ regex_t regexp_;
+ Xapian::Database db_;
+ bool started_;
+ Xapian::ValueIterator it_, end_;
+
+/* No copying */
+ RegexpPostingSource (const RegexpPostingSource &);
+ RegexpPostingSource &operator= (const RegexpPostingSource &);
+
+ public:
+ RegexpPostingSource (Xapian::valueno slot, const std::string ®exp);
+ ~RegexpPostingSource ();
+ void init (const Xapian::Database &db);
+ Xapian::doccount get_termfreq_min () const;
+ Xapian::doccount get_termfreq_est () const;
+ Xapian::doccount get_termfreq_max () const;
+ Xapian::docid get_docid () const;
+ bool at_end () const;
+ void next (unused (double min_wt));
+};
+
+
+class RegexpFieldProcessor : public Xapian::FieldProcessor {
+ protected:
+ Xapian::valueno slot;
+ std::string term_prefix;
+ Xapian::QueryParser &parser;
+ notmuch_database_t *notmuch;
+
+ public:
+ RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
+
+ ~RegexpFieldProcessor () { };
+
+ Xapian::Query operator()(const std::string & str);
+};
+#endif
+#endif /* NOTMUCH_REGEXP_FIELDS_H */
--- /dev/null
+#!/usr/bin/env bash
+test_description='regular expression searches'
+. ./test-lib.sh || exit 1
+
+add_email_corpus
+
+
+if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then
+ test_done
+fi
+
+notmuch search --output=messages from:cworth > cworth.msg-ids
+
+test_begin_subtest "regexp from search, case sensitive"
+notmuch search --output=messages from:/carl/ > OUTPUT
+test_expect_equal_file /dev/null OUTPUT
+
+test_begin_subtest "empty regexp or query"
+notmuch search --output=messages from:/carl/ or from:/cworth/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "non-empty regexp and query"
+notmuch search from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [1/2] Carl Worth| Alex Botero-Lowry; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX 2009-11-18 [1/2] Carl Worth| Ingmar Vanhassel; [notmuch] [PATCH] Typsos (inbox unread)
+thread:XXX 2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+thread:XXX 2009-11-18 [1/2] Carl Worth| Keith Packard; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+thread:XXX 2009-11-18 [2/5] Carl Worth| Mikhail Gusarov, Keith Packard; [notmuch] [PATCH 1/2] Close message file after parsing message headers (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp from search, duplicate term search"
+notmuch search --output=messages from:/cworth/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "long enough regexp matches only desired senders"
+notmuch search --output=messages 'from:"/C.* Wo/"' > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "shorter regexp matches one more sender"
+notmuch search --output=messages 'from:"/C.* W/"' > OUTPUT
+{ echo id:1258544095-16616-1-git-send-email-chris@chris-wilson.co.uk; cat cworth.msg-ids; } > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, non-ASCII"
+notmuch search --output=messages subject:/accentué/ > OUTPUT
+echo id:877h1wv7mg.fsf@inf-8657.int-evry.fr > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, punctuation"
+notmuch search subject:/\'X\'/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [2/2] Keith Packard, Carl Worth; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, no punctuation"
+notmuch search subject:/X/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [2/2] Keith Packard, Carl Worth; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "combine regexp from and subject"
+notmuch search subject:/-C/ and from:/.an.k/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-17 [1/2] Jan Janak| Carl Worth; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp error reporting"
+notmuch search 'from:/unbalanced[/' 1>OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: A Xapian exception occurred
+A Xapian exception occurred performing query: Invalid regular expression
+Query string was: from:/unbalanced[/
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done