]> git.notmuchmail.org Git - notmuch/blob - lib/parse-sexp.cc
lib/thread-fp: factor out query expansion, rewrite in Xapian
[notmuch] / lib / parse-sexp.cc
1 #include "database-private.h"
2
3 #if HAVE_SFSEXP
4 #include "sexp.h"
5 #include "unicode-util.h"
6
7 /* _sexp is used for file scope symbols to avoid clashing with
8  * definitions from sexp.h */
9
10 typedef enum {
11     SEXP_FLAG_NONE      = 0,
12     SEXP_FLAG_FIELD     = 1 << 0,
13     SEXP_FLAG_BOOLEAN   = 1 << 1,
14     SEXP_FLAG_SINGLE    = 1 << 2,
15     SEXP_FLAG_WILDCARD  = 1 << 3,
16     SEXP_FLAG_REGEX     = 1 << 4,
17     SEXP_FLAG_DO_REGEX  = 1 << 5,
18 } _sexp_flag_t;
19
20 /*
21  * define bitwise operators to hide casts */
22
23 inline _sexp_flag_t
24 operator| (_sexp_flag_t a, _sexp_flag_t b)
25 {
26     return static_cast<_sexp_flag_t>(
27         static_cast<unsigned>(a) | static_cast<unsigned>(b));
28 }
29
30 inline _sexp_flag_t
31 operator& (_sexp_flag_t a, _sexp_flag_t b)
32 {
33     return static_cast<_sexp_flag_t>(
34         static_cast<unsigned>(a) & static_cast<unsigned>(b));
35 }
36
37 typedef struct  {
38     const char *name;
39     Xapian::Query::op xapian_op;
40     Xapian::Query initial;
41     _sexp_flag_t flags;
42 } _sexp_prefix_t;
43
44 static _sexp_prefix_t prefixes[] =
45 {
46     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
47       SEXP_FLAG_NONE },
48     { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
49       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
50     { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
51       SEXP_FLAG_FIELD },
52     { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
53       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
54     { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
55       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
56     { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
57       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
58     { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
59       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
60     { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
61       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
62     { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
63       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
64     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
65       SEXP_FLAG_NONE },
66     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
67       SEXP_FLAG_NONE },
68     { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
69       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
70     { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
71       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
72     { "regex",          Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
73       SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
74     { "rx",             Xapian::Query::OP_INVALID,      Xapian::Query::MatchAll,
75       SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
76     { "starts-with",    Xapian::Query::OP_WILDCARD,     Xapian::Query::MatchAll,
77       SEXP_FLAG_SINGLE },
78     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
79       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
80     { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
81       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
82     { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
83       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
84     { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
85       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
86     { }
87 };
88
89 static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
90                                                const _sexp_prefix_t *parent,
91                                                const sexp_t *sx,
92                                                Xapian::Query &output);
93
94 static notmuch_status_t
95 _sexp_combine_query (notmuch_database_t *notmuch,
96                      const _sexp_prefix_t *parent,
97                      Xapian::Query::op operation,
98                      Xapian::Query left,
99                      const sexp_t *sx,
100                      Xapian::Query &output)
101 {
102     Xapian::Query subquery;
103
104     notmuch_status_t status;
105
106     /* if we run out elements, return accumulator */
107
108     if (! sx) {
109         output = left;
110         return NOTMUCH_STATUS_SUCCESS;
111     }
112
113     status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
114     if (status)
115         return status;
116
117     return _sexp_combine_query (notmuch,
118                                 parent,
119                                 operation,
120                                 Xapian::Query (operation, left, subquery),
121                                 sx->next, output);
122 }
123
124 static notmuch_status_t
125 _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output)
126 {
127     Xapian::Utf8Iterator p (phrase);
128     Xapian::Utf8Iterator end;
129     std::vector<std::string> terms;
130
131     while (p != end) {
132         Xapian::Utf8Iterator start;
133         while (p != end && ! Xapian::Unicode::is_wordchar (*p))
134             p++;
135
136         if (p == end)
137             break;
138
139         start = p;
140
141         while (p != end && Xapian::Unicode::is_wordchar (*p))
142             p++;
143
144         if (p != start) {
145             std::string word (start, p);
146             word = Xapian::Unicode::tolower (word);
147             terms.push_back (term_prefix + word);
148         }
149     }
150     output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ());
151     return NOTMUCH_STATUS_SUCCESS;
152 }
153
154 static notmuch_status_t
155 _sexp_parse_wildcard (notmuch_database_t *notmuch,
156                       const _sexp_prefix_t *parent,
157                       std::string match,
158                       Xapian::Query &output)
159 {
160
161     std::string term_prefix = parent ? _find_prefix (parent->name) : "";
162
163     if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
164         _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
165         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
166     }
167
168     output = Xapian::Query (Xapian::Query::OP_WILDCARD,
169                             term_prefix + Xapian::Unicode::tolower (match));
170     return NOTMUCH_STATUS_SUCCESS;
171 }
172
173 static notmuch_status_t
174 _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, const sexp_t *sx,
175                       Xapian::Query &output)
176 {
177     Xapian::Stem stem = *(notmuch->stemmer);
178
179     if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
180         std::string term = Xapian::Unicode::tolower (sx->val);
181
182         output = Xapian::Query ("Z" + term_prefix + stem (term));
183         return NOTMUCH_STATUS_SUCCESS;
184     } else {
185         return _sexp_parse_phrase (term_prefix, sx->val, output);
186     }
187
188 }
189
190 notmuch_status_t
191 _sexp_parse_regex (notmuch_database_t *notmuch,
192                    const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
193                    std::string val, Xapian::Query &output)
194 {
195     if (! parent) {
196         _notmuch_database_log (notmuch, "illegal '%s' outside field\n",
197                                prefix->name);
198         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
199     }
200
201     if (! (parent->flags & SEXP_FLAG_REGEX)) {
202         _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n",
203                                prefix->name, parent->name);
204         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
205     }
206
207     std::string msg; /* ignored */
208
209     return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name,
210                                      val, output, msg);
211 }
212
213 /* Here we expect the s-expression to be a proper list, with first
214  * element defining and operation, or as a special case the empty
215  * list */
216
217 static notmuch_status_t
218 _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
219                        Xapian::Query &output)
220 {
221     if (sx->ty == SEXP_VALUE) {
222         std::string term_prefix = parent ? _find_prefix (parent->name) : "";
223
224         if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
225             return _sexp_parse_wildcard (notmuch, parent, "", output);
226         }
227
228         if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
229             output = Xapian::Query (term_prefix + sx->val);
230             return NOTMUCH_STATUS_SUCCESS;
231         }
232         if (parent) {
233             return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
234         } else {
235             Xapian::Query accumulator;
236             for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) {
237                 if (prefix->flags & SEXP_FLAG_FIELD) {
238                     notmuch_status_t status;
239                     Xapian::Query subquery;
240                     term_prefix = _find_prefix (prefix->name);
241                     status = _sexp_parse_one_term (notmuch, term_prefix, sx, subquery);
242                     if (status)
243                         return status;
244                     accumulator = Xapian::Query (Xapian::Query::OP_OR, accumulator, subquery);
245                 }
246             }
247             output = accumulator;
248             return NOTMUCH_STATUS_SUCCESS;
249         }
250     }
251
252     /* Empty list */
253     if (! sx->list) {
254         output = Xapian::Query::MatchAll;
255         return NOTMUCH_STATUS_SUCCESS;
256     }
257
258     if (sx->list->ty == SEXP_LIST) {
259         _notmuch_database_log (notmuch, "unexpected list in field/operation position\n",
260                                sx->list->val);
261         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
262     }
263
264     for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
265         if (strcmp (prefix->name, sx->list->val) == 0) {
266             if (prefix->flags & SEXP_FLAG_FIELD) {
267                 if (parent) {
268                     _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
269                                            prefix->name, parent->name);
270                     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
271                 }
272                 parent = prefix;
273             }
274
275             if ((prefix->flags & SEXP_FLAG_SINGLE) &&
276                 (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
277                 _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
278                                        prefix->name);
279                 return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
280             }
281
282             if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
283                 return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
284
285             if (prefix->flags & SEXP_FLAG_DO_REGEX) {
286                 return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
287             }
288
289             return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
290                                         sx->list->next, output);
291         }
292     }
293
294     _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
295
296     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
297 }
298
299 notmuch_status_t
300 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
301                                       Xapian::Query &output)
302 {
303     const sexp_t *sx = NULL;
304     char *buf = talloc_strdup (notmuch, querystr);
305
306     sx = parse_sexp (buf, strlen (querystr));
307     if (! sx) {
308         _notmuch_database_log (notmuch, "invalid s-expression: '%s'\n", querystr);
309         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
310     }
311
312     return _sexp_to_xapian_query (notmuch, NULL, sx, output);
313 }
314 #endif