1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_regex.cpp
28 #include "vogl_core.h"
29 #include "vogl_regex.h"
30 #include "vogl_strutils.h"
35 : m_is_initialized(false)
39 regexp::regexp(const char *pPattern, uint flags)
40 : m_is_initialized(false)
42 init(pPattern, flags);
54 vogl_regfree(&m_regex);
55 m_is_initialized = false;
59 m_match_strings.clear();
63 bool regexp::init(const char *pPattern, uint flags)
67 flags |= REG_EXTENDED;
69 int errcode = vogl_regcomp(&m_regex, pPattern, flags);
72 size_t errbuf_size = vogl_regerror(errcode, &m_regex, NULL, 0);
73 if ((errbuf_size) && (errbuf_size < cUINT32_MAX))
75 vogl::vector<char> errbuf(static_cast<uint>(errbuf_size));
77 vogl_regerror(errcode, &m_regex, errbuf.get_ptr(), errbuf_size);
79 m_error.set(errbuf.get_ptr());
83 m_error = "unknown regex error";
89 m_is_initialized = true;
94 bool regexp::find_any(const char *pString)
96 if (!m_is_initialized)
102 return vogl_regexec(&m_regex, pString, 0, NULL, 0) == 0;
105 const char *regexp::find_first(const char *pString, int &begin, int &end)
110 if (!m_is_initialized)
117 if ((vogl_regexec(&m_regex, pString, 1, &match, 0)) != 0)
123 begin = static_cast<int>(match.rm_so);
124 end = static_cast<int>(match.rm_eo);
126 return pString + match.rm_so;
129 bool regexp::full_match(const char *pString)
132 const char *p = find_first(pString, begin, end);
136 return (!begin) && (pString[end] == '\0');
139 uint regexp::find(const char *pString)
141 if (!m_is_initialized)
144 m_match_strings.resize(0);
145 m_match_locs.resize(0);
148 const char *pCur = pString;
152 if ((vogl_regexec(&m_regex, pCur, 1, &match, 0)) != 0)
158 if ((match.rm_eo >= cMaxDynamicStringLen) || (match.rm_so >= cMaxDynamicStringLen))
164 int len = static_cast<int>(match.rm_eo) - static_cast<int>(match.rm_so);
165 if ((len < 0) || (len >= cMaxDynamicStringLen))
173 str.set_from_buf(pCur + match.rm_so, static_cast<int>(len));
175 m_match_strings.enlarge(1)->swap(str);
176 m_match_locs.enlarge(1)->set(static_cast<int>(pCur - pString) + static_cast<int>(match.rm_so), len);
179 if (match.rm_so == match.rm_eo)
186 return m_match_strings.size();
189 uint regexp::replace(dynamic_string &result, const char *pString, const char *pReplacement)
191 uint n = find(pString);
200 uint str_len = vogl_strlen(pString);
203 for (uint i = 0; i < n; i++)
205 const regexp_match_loc &cur_match = m_match_locs[i];
207 VOGL_ASSERT(cur_match.m_start >= cur_ofs);
209 if (cur_match.m_start > cur_ofs)
210 result.append(pString + cur_ofs, cur_match.m_start - cur_ofs);
213 result.append(pReplacement);
215 cur_ofs = cur_match.m_start + cur_match.m_len;
218 if (cur_ofs < str_len)
220 result.append(pString + cur_ofs, str_len - cur_ofs);
226 static uint regexp_test(const char *pPat, const char *pStr)
228 dynamic_string_array matches(regexp_find(pStr, pPat));
229 printf("Pattern: \"%s\" String: \"%s\" Find: %u\n", pPat, pStr, matches.size());
230 for (uint i = 0; i < matches.size(); i++)
231 printf("\"%s\"\n", matches[i].get_ptr());
232 return matches.size();
235 static bool regexp_test2(const char *pPat, const char *pStr)
237 bool result = regexp_find_any(pStr, pPat);
238 printf("Pattern: \"%s\" String: \"%s\" FindAny: %u\n", pPat, pStr, result);
242 static bool regexp_test3(const char *pPat, const char *pStr)
244 bool result = regexp_full_match(pStr, pPat);
245 printf("Pattern: \"%s\" String: \"%s\" Match: %u\n", pPat, pStr, result);
249 static uint g_regexp_test_fail_count;
250 static void regexp_test_check(bool success)
254 g_regexp_test_fail_count++;
261 g_regexp_test_fail_count = 0;
263 // Some of these tests where borrowed from stb.h.
265 regexp_test_check(regexp_test("ab+c", "ab") == 0);
266 regexp_test_check(regexp_test("ab+c", "abc blah") == 1);
267 regexp_test_check(regexp_test("ab+c$", "abc blah") == 0);
268 regexp_test_check(regexp_test("ab+c$", "abc") == 1);
269 regexp_test_check(regexp_test("ab+c", "abc") == 1);
270 regexp_test_check(regexp_test("ab+c", "abbc") == 1);
271 regexp_test_check(regexp_test("get|set", "getter") == 1);
272 regexp_test_check(regexp_test("get|set", "setter") == 1);
273 regexp_test_check(regexp_test("[a.c]", "a") == 1);
274 regexp_test_check(regexp_test("[a.c]", "z") == 0);
275 regexp_test_check(regexp_test("[a.c]", ".") == 1);
276 regexp_test_check(regexp_test("[a.c]", "c") == 1);
277 regexp_test_check(regexp_test("gr(a|e)y", "grey") == 1);
278 regexp_test_check(regexp_test("gr(a|e)y", "gray") == 1);
279 regexp_test_check(regexp_test("gr(a|e)y", "gry") == 0);
280 regexp_test_check(regexp_test("g{1}", "g") == 1);
281 regexp_test_check(regexp_test("g{3,5}", "gg") == 0);
282 regexp_test_check(regexp_test("g{3,5}", "gggg") == 1);
283 regexp_test_check(regexp_test("g{3,5}", "ggggg") == 1);
284 regexp_test_check(regexp_test("g{3,5}", "gggggg") == 1);
285 regexp_test_check(regexp_test("g{3,5}", "ggggggXggg") == 2);
286 regexp_test_check(regexp_test("(g{3,5}|x{3,7})", "ggggggXxxx!gggg") == 3);
287 regexp_test_check(regexp_test2("Uniform", "glUniform") == 1);
288 regexp_test_check(regexp_test3("Uniform", "Uniform") == 1);
289 regexp_test_check(regexp_test3("Uniform$", "Uniform") == 1);
290 regexp_test_check(regexp_test3("Uniform$", "UniformBlah") == 0);
291 regexp_test_check(regexp_test3("Uniform", "UniformBlah") == 0);
292 regexp_test_check(regexp_test3("Uniform", "glUniformBlah") == 0);
294 regexp_test_check(regexp_test("<h(.)>([^<]+)", "<h2>Egg prices</h2>") == 1);
300 r.replace(x, "AaThis is a test zz BlAh", "!!!!!!");
301 printf("%s\n", x.get_ptr());
302 regexp_test_check(x.compare("A!!!!!!This is !!!!!! test !!!!!!!!!!!! BlAh", true) == 0);
309 r.replace(x, "aThis is a test zz BlAh", "!!!!!!");
310 printf("%s\n", x.get_ptr());
311 regexp_test_check(x.compare("!!!!!!This is !!!!!! test !!!!!!!!!!!! BlAh", true) == 0);
318 r.replace(x, "aThis is a test zz BlAha", "!!!!!!");
319 printf("%s\n", x.get_ptr());
320 regexp_test_check(x.compare("!!!!!!This is !!!!!! test !!!!!!!!!!!! BlAh!!!!!!", true) == 0);
323 regexp_test_check(regexp_replace("Blah", "a", "!").compare("Bl!h", true) == 0);
324 regexp_test_check(regexp_replace("Blah", "Blah", "!").compare("!", true) == 0);
325 regexp_test_check(regexp_replace("BlahX", "Blah", "!").compare("!X", true) == 0);
326 regexp_test_check(regexp_replace("BlahX", "Blah", "X").compare("XX", true) == 0);
327 regexp_test_check(regexp_replace("Blah", "Blah", "") == "");
328 regexp_test_check(regexp_replace("XBlahX", "Blah", "").compare("XX", true) == 0);
329 regexp_test_check(regexp_replace("XBlahX", "Blah", NULL).compare("XX", true) == 0);
331 const char *z = "foo.*[bd]ak?r";
332 regexp_test_check(regexp_test(z, "muggle man food is barfy") == 1);
333 regexp_test_check(regexp_test("foo.*bar", "muggle man food is farfy") == 0);
334 regexp_test_check(regexp_test("[^a-zA-Z]foo[^a-zA-Z]", "dfoobar xfood") == 0);
335 regexp_test_check(regexp_test(z, "muman foob is bakrfy") == 1);
337 regexp_test_check(regexp_test(z, "muman foob is bakrfy") == 0);
338 regexp_test_check(regexp_test(z, "muman foob is bbkrfy") == 1);
340 regexp_test_check(regexp_test("ab+c", "abc") == 1);
341 regexp_test_check(regexp_test("ab+c", "abbc") == 1);
342 regexp_test_check(regexp_test("ab+c", "abbbc") == 1);
343 regexp_test_check(regexp_test("ab+c", "ac") == 0);
345 regexp_test_check(regexp_test("ab?c", "ac") == 1);
346 regexp_test_check(regexp_test("ab?c", "abc") == 1);
347 regexp_test_check(regexp_test("ab?c", "abbc") == 0);
348 regexp_test_check(regexp_test("ab?c", "abbbc") == 0);
349 regexp_test_check(regexp_test("ab?c", "babbbc") == 0);
351 regexp_test_check(regexp_test("gr(a|e)y", "grey") == 1);
352 regexp_test_check(regexp_test("gr(a|e)y", "graey") == 0);
353 regexp_test_check(regexp_test("gr(a|e)y", "gray") == 1);
354 regexp_test_check(regexp_test("gr(a?|e)y", "gry") == 1);
355 regexp_test_check(regexp_test("gr(a?|e)y", "grey") == 1);
356 regexp_test_check(regexp_test("gr(a?|e)y", "gray") == 1);
358 regexp_test_check(regexp_test("(a|b)*", "") == 1);
359 regexp_test_check(regexp_test("(a|b)*", "a") == 1);
360 regexp_test_check(regexp_test("(a|b)*", "ab") == 1);
361 regexp_test_check(regexp_test("(a|b)*", "aa") == 1);
362 regexp_test_check(regexp_test("(a|b)*", "ba") == 1);
363 regexp_test_check(regexp_test("(a|b)*", "bb") == 1);
364 regexp_test_check(regexp_test("(a|b)*", "aaa") == 1);
365 regexp_test_check(regexp_test("(a|b)", "z") == 0);
366 regexp_test_check(regexp_test("*", "z") == 0); // purposely bogus
368 regexp_test_check(regexp_test("", "abc") == 0); // purposely bogus
369 regexp_test_check(regexp_test("(a|b)*", "abc") == 2);
370 regexp_test_check(regexp_test("(a|b)*", "c") == 1);
371 regexp_test_check(regexp_test("(a|b)*c", "abcab") == 1);
372 regexp_test_check(regexp_test("(a|b)*c", "abcbac") == 2);
376 r.init(".*foo.*bar.*");
377 regexp_test_check(r.find_any("foobarx") == 1);
378 regexp_test_check(r.find_any("foobar") == 1);
379 regexp_test_check(r.find_any("foo bar") == 1);
380 regexp_test_check(r.find_any("fo foo ba ba bar ba") == 1);
381 regexp_test_check(r.find_any("fo oo oo ba ba bar foo") == 0);
386 r.init(".*foo.?bar.*");
387 regexp_test_check(r.find_any("abfoobarx") == 1);
388 regexp_test_check(r.find_any("abfoobar") == 1);
389 regexp_test_check(r.find_any("abfoo bar") == 1);
390 regexp_test_check(r.find_any("abfoo bar") == 0);
391 regexp_test_check(r.find_any("abfo foo ba ba bar ba") == 0);
392 regexp_test_check(r.find_any("abfo oo oo ba ba bar foo") == 0);
397 r.init(".*m((foo|bar)*baz)m.*");
398 regexp_test_check(r.find_any("abfoobarx") == 0);
399 regexp_test_check(r.find_any("a mfoofoofoobazm d") == 1);
400 regexp_test_check(r.find_any("a mfoobarbazfoom d") == 0);
401 regexp_test_check(r.find_any("a mbarbarfoobarbazm d") == 1);
402 regexp_test_check(r.find_any("a mfoobarfoo bazm d") == 0);
403 regexp_test_check(r.find_any("a mm foobarfoobarfoobar ") == 0);
409 regexp_test_check(r.find_any("fz") == 1);
410 regexp_test_check(r.find("fz") == 2);
411 regexp_test_check(r.find_any("ff") == 1);
412 regexp_test_check(r.find_any("z") == 1);
418 regexp_test_check(r.find_any("mfzn") == 0);
419 regexp_test_check(r.find_any("mffn") == 0);
420 regexp_test_check(r.find_any("mzn") == 1);
421 regexp_test_check(r.find_any("mn") == 1);
422 regexp_test_check(r.find_any("mzfn") == 0);
424 regexp_test_check(r.find_any("manmanmannnnnnnmmmmmmmmm ") == 0);
425 regexp_test_check(r.find_any("manmanmannnnnnnmmmmmmmmm ") == 0);
426 regexp_test_check(r.find_any("manmanmannnnnnnmmmmmmmmmffzzz ") == 0);
427 regexp_test_check(r.find_any("manmanmannnnnnnmmmmmmmmmnfzzz ") == 1);
428 regexp_test_check(r.find_any("mmmfn aanmannnnnnnmmmmmm fzzz ") == 1);
429 regexp_test_check(r.find_any("mmmzzn anmannnnnnnmmmmmm fzzz ") == 1);
430 regexp_test_check(r.find_any("mm anmannnnnnnmmmmmm fzmzznzz ") == 1);
431 regexp_test_check(r.find_any("mm anmannnnnnnmmmmmm fzmzzfnzz ") == 0);
432 regexp_test_check(r.find_any("manmfnmannnnnnnmmmmmmmmmffzzz ") == 1);
437 r.init(".*m((foo|bar)*|baz)m.*");
438 regexp_test_check(r.find_any("abfoobarx") == 0);
439 regexp_test_check(r.find_any("a mfoofoofoobazm d") == 0);
440 regexp_test_check(r.find_any("a mfoobarbazfoom d") == 0);
441 regexp_test_check(r.find_any("a mbazm d") == 1);
442 regexp_test_check(r.find_any("a mfoobarfoom d") == 1);
443 regexp_test_check(r.find_any("a mm foobarfoobarfoobar ") == 1);
448 r.init("[a-fA-F]..[^]a-zA-Z]");
449 regexp_test_check(r.find_any("Axx1") == 1);
450 regexp_test_check(r.find_any("Fxx1") == 1);
451 regexp_test_check(r.find_any("Bxx]") == 0);
452 regexp_test_check(r.find_any("Cxxz") == 0);
453 regexp_test_check(r.find_any("gxx[") == 0);
454 regexp_test_check(r.find_any("-xx0") == 0);
459 return !g_regexp_test_fail_count;