]> git.notmuchmail.org Git - notmuch/blob - lib/parse-sexp.cc
e562e8f5f0f9fa8f002fbc10389ce3fc49b7ba97
[notmuch] / lib / parse-sexp.cc
1 #include "database-private.h"
2
3 #if HAVE_SFSEXP
4 #include "sexp.h"
5 #include "unicode-util.h"
6
7 /* _sexp is used for file scope symbols to avoid clashing with
8  * definitions from sexp.h */
9
10 typedef enum {
11     SEXP_FLAG_NONE      = 0,
12     SEXP_FLAG_FIELD     = 1 << 0,
13     SEXP_FLAG_BOOLEAN   = 1 << 1,
14     SEXP_FLAG_SINGLE    = 1 << 2,
15     SEXP_FLAG_WILDCARD  = 1 << 3,
16     SEXP_FLAG_REGEX     = 1 << 4,
17     SEXP_FLAG_DO_REGEX  = 1 << 5,
18     SEXP_FLAG_EXPAND    = 1 << 6,
19     SEXP_FLAG_DO_EXPAND = 1 << 7,
20 } _sexp_flag_t;
21
22 /*
23  * define bitwise operators to hide casts */
24
25 inline _sexp_flag_t
26 operator| (_sexp_flag_t a, _sexp_flag_t b)
27 {
28     return static_cast<_sexp_flag_t>(
29         static_cast<unsigned>(a) | static_cast<unsigned>(b));
30 }
31
32 inline _sexp_flag_t
33 operator& (_sexp_flag_t a, _sexp_flag_t b)
34 {
35     return static_cast<_sexp_flag_t>(
36         static_cast<unsigned>(a) & static_cast<unsigned>(b));
37 }
38
39 typedef struct  {
40     const char *name;
41     Xapian::Query::op xapian_op;
42     Xapian::Query initial;
43     _sexp_flag_t flags;
44 } _sexp_prefix_t;
45
46 static _sexp_prefix_t prefixes[] =
47 {
48     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
49       SEXP_FLAG_NONE },
50     { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
51       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
52     { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
53       SEXP_FLAG_FIELD },
54     { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
55       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
56     { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
57       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
58     { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
59       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
60     { "infix",          Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
61       SEXP_FLAG_SINGLE },
62     { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
63       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
64     { "matching",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
65       SEXP_FLAG_DO_EXPAND },
66     { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
67       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
68     { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
69       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
70     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
71       SEXP_FLAG_NONE },
72     { "of",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
73       SEXP_FLAG_DO_EXPAND },
74     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
75       SEXP_FLAG_NONE },
76     { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
77       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
78     { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
79       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
80     { "regex",          Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
81       SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
82     { "rx",             Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
83       SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
84     { "starts-with",    Xapian::Query::OP_WILDCARD,     Xapian::Query::MatchAll,
85       SEXP_FLAG_SINGLE },
86     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
87       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
88     { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
89       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
90     { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
91       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
92     { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
93       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
94     { }
95 };
96
97 static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
98                                                const _sexp_prefix_t *parent,
99                                                const sexp_t *sx,
100                                                Xapian::Query &output);
101
102 static notmuch_status_t
103 _sexp_combine_query (notmuch_database_t *notmuch,
104                      const _sexp_prefix_t *parent,
105                      Xapian::Query::op operation,
106                      Xapian::Query left,
107                      const sexp_t *sx,
108                      Xapian::Query &output)
109 {
110     Xapian::Query subquery;
111
112     notmuch_status_t status;
113
114     /* if we run out elements, return accumulator */
115
116     if (! sx) {
117         output = left;
118         return NOTMUCH_STATUS_SUCCESS;
119     }
120
121     status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
122     if (status)
123         return status;
124
125     return _sexp_combine_query (notmuch,
126                                 parent,
127                                 operation,
128                                 Xapian::Query (operation, left, subquery),
129                                 sx->next, output);
130 }
131
132 static notmuch_status_t
133 _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output)
134 {
135     Xapian::Utf8Iterator p (phrase);
136     Xapian::Utf8Iterator end;
137     std::vector<std::string> terms;
138
139     while (p != end) {
140         Xapian::Utf8Iterator start;
141         while (p != end && ! Xapian::Unicode::is_wordchar (*p))
142             p++;
143
144         if (p == end)
145             break;
146
147         start = p;
148
149         while (p != end && Xapian::Unicode::is_wordchar (*p))
150             p++;
151
152         if (p != start) {
153             std::string word (start, p);
154             word = Xapian::Unicode::tolower (word);
155             terms.push_back (term_prefix + word);
156         }
157     }
158     output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ());
159     return NOTMUCH_STATUS_SUCCESS;
160 }
161
162 static notmuch_status_t
163 _sexp_parse_wildcard (notmuch_database_t *notmuch,
164                       const _sexp_prefix_t *parent,
165                       std::string match,
166                       Xapian::Query &output)
167 {
168
169     std::string term_prefix = parent ? _find_prefix (parent->name) : "";
170
171     if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
172         _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
173         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
174     }
175
176     output = Xapian::Query (Xapian::Query::OP_WILDCARD,
177                             term_prefix + Xapian::Unicode::tolower (match));
178     return NOTMUCH_STATUS_SUCCESS;
179 }
180
181 static notmuch_status_t
182 _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, const sexp_t *sx,
183                       Xapian::Query &output)
184 {
185     Xapian::Stem stem = *(notmuch->stemmer);
186
187     if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
188         std::string term = Xapian::Unicode::tolower (sx->val);
189
190         output = Xapian::Query ("Z" + term_prefix + stem (term));
191         return NOTMUCH_STATUS_SUCCESS;
192     } else {
193         return _sexp_parse_phrase (term_prefix, sx->val, output);
194     }
195
196 }
197
198 notmuch_status_t
199 _sexp_parse_regex (notmuch_database_t *notmuch,
200                    const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
201                    std::string val, Xapian::Query &output)
202 {
203     if (! parent) {
204         _notmuch_database_log (notmuch, "illegal '%s' outside field\n",
205                                prefix->name);
206         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
207     }
208
209     if (! (parent->flags & SEXP_FLAG_REGEX)) {
210         _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n",
211                                prefix->name, parent->name);
212         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
213     }
214
215     std::string msg; /* ignored */
216
217     return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name,
218                                      val, output, msg);
219 }
220
221
222 static notmuch_status_t
223 _sexp_expand_query (notmuch_database_t *notmuch,
224                     const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
225                     const sexp_t *sx, Xapian::Query &output)
226 {
227     Xapian::Query subquery;
228     notmuch_status_t status;
229     std::string msg;
230
231     if (! (parent->flags & SEXP_FLAG_EXPAND)) {
232         _notmuch_database_log (notmuch, "'%s' unsupported inside '%s'\n", prefix->name, parent->name);
233         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
234     }
235
236     status = _sexp_combine_query (notmuch, NULL, prefix->xapian_op, prefix->initial, sx, subquery);
237     if (status)
238         return status;
239
240     status = _notmuch_query_expand (notmuch, parent->name, subquery, output, msg);
241     if (status) {
242         _notmuch_database_log (notmuch, "error expanding query %s\n", msg.c_str ());
243     }
244     return status;
245 }
246
247 static notmuch_status_t
248 _sexp_parse_infix (notmuch_database_t *notmuch,  const _sexp_prefix_t *parent,
249                    const sexp_t *sx, Xapian::Query &output)
250 {
251     if (parent) {
252         _notmuch_database_log (notmuch, "'infix' not supported inside '%s'\n", parent->name);
253         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
254     }
255     try {
256         output = notmuch->query_parser->parse_query (sx->val, NOTMUCH_QUERY_PARSER_FLAGS);
257     } catch (const Xapian::QueryParserError &error) {
258         _notmuch_database_log (notmuch, "Syntax error in infix query: %s\n", sx->val);
259         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
260     } catch (const Xapian::Error &error) {
261         if (! notmuch->exception_reported) {
262             _notmuch_database_log (notmuch,
263                                    "A Xapian exception occurred parsing query: %s\n",
264                                    error.get_msg ().c_str ());
265             _notmuch_database_log_append (notmuch,
266                                           "Query string was: %s\n",
267                                           sx->val);
268             notmuch->exception_reported = true;
269             return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
270         }
271     }
272     return NOTMUCH_STATUS_SUCCESS;
273 }
274
275 /* Here we expect the s-expression to be a proper list, with first
276  * element defining and operation, or as a special case the empty
277  * list */
278
279 static notmuch_status_t
280 _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
281                        Xapian::Query &output)
282 {
283     if (sx->ty == SEXP_VALUE) {
284         std::string term_prefix = parent ? _find_prefix (parent->name) : "";
285
286         if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
287             return _sexp_parse_wildcard (notmuch, parent, "", output);
288         }
289
290         if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
291             output = Xapian::Query (term_prefix + sx->val);
292             return NOTMUCH_STATUS_SUCCESS;
293         }
294         if (parent) {
295             return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
296         } else {
297             Xapian::Query accumulator;
298             for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) {
299                 if (prefix->flags & SEXP_FLAG_FIELD) {
300                     notmuch_status_t status;
301                     Xapian::Query subquery;
302                     term_prefix = _find_prefix (prefix->name);
303                     status = _sexp_parse_one_term (notmuch, term_prefix, sx, subquery);
304                     if (status)
305                         return status;
306                     accumulator = Xapian::Query (Xapian::Query::OP_OR, accumulator, subquery);
307                 }
308             }
309             output = accumulator;
310             return NOTMUCH_STATUS_SUCCESS;
311         }
312     }
313
314     /* Empty list */
315     if (! sx->list) {
316         output = Xapian::Query::MatchAll;
317         return NOTMUCH_STATUS_SUCCESS;
318     }
319
320     if (sx->list->ty == SEXP_LIST) {
321         _notmuch_database_log (notmuch, "unexpected list in field/operation position\n",
322                                sx->list->val);
323         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
324     }
325
326     for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
327         if (strcmp (prefix->name, sx->list->val) == 0) {
328             if (prefix->flags & SEXP_FLAG_FIELD) {
329                 if (parent) {
330                     _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
331                                            prefix->name, parent->name);
332                     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
333                 }
334                 parent = prefix;
335             }
336
337             if ((prefix->flags & SEXP_FLAG_SINGLE) &&
338                 (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
339                 _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
340                                        prefix->name);
341                 return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
342             }
343
344             if (strcmp (prefix->name, "infix") == 0) {
345                 return _sexp_parse_infix (notmuch, parent, sx->list->next, output);
346             }
347
348             if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
349                 return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
350
351             if (prefix->flags & SEXP_FLAG_DO_REGEX) {
352                 return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
353             }
354
355             if (prefix->flags & SEXP_FLAG_DO_EXPAND) {
356                 return _sexp_expand_query (notmuch, prefix, parent, sx->list->next, output);
357             }
358
359             return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
360                                         sx->list->next, output);
361         }
362     }
363
364     _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
365
366     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
367 }
368
369 notmuch_status_t
370 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
371                                       Xapian::Query &output)
372 {
373     const sexp_t *sx = NULL;
374     char *buf = talloc_strdup (notmuch, querystr);
375
376     sx = parse_sexp (buf, strlen (querystr));
377     if (! sx) {
378         _notmuch_database_log (notmuch, "invalid s-expression: '%s'\n", querystr);
379         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
380     }
381
382     return _sexp_to_xapian_query (notmuch, NULL, sx, output);
383 }
384 #endif