X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=util%2Funicode-util.c;fp=util%2Funicode-util.c;h=312e900f76c526b6dc25dd0329be68ab0207a977;hp=0000000000000000000000000000000000000000;hb=781125c9e92a2b9a2b9fbe54adec28ddb60f35b1;hpb=46ab6013a29233b32dba49cf9c50e70fd02db1c3 diff --git a/util/unicode-util.c b/util/unicode-util.c new file mode 100644 index 00000000..312e900f --- /dev/null +++ b/util/unicode-util.c @@ -0,0 +1,43 @@ +#include "unicode-util.h" + +/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to + link directly to libxapian. +*/ + +static bool +unicode_is_wordchar (notmuch_unichar ch) +{ + switch (g_unichar_type (ch)) { + case G_UNICODE_UPPERCASE_LETTER: + case G_UNICODE_LOWERCASE_LETTER: + case G_UNICODE_TITLECASE_LETTER: + case G_UNICODE_MODIFIER_LETTER: + case G_UNICODE_OTHER_LETTER: + case G_UNICODE_NON_SPACING_MARK: + case G_UNICODE_ENCLOSING_MARK: + case G_UNICODE_SPACING_MARK: + case G_UNICODE_DECIMAL_NUMBER: + case G_UNICODE_LETTER_NUMBER: + case G_UNICODE_OTHER_NUMBER: + case G_UNICODE_CONNECT_PUNCTUATION: + return true; + default: + return false; + } +} + +bool +unicode_word_utf8 (const char *utf8_str) +{ + gunichar *decoded = g_utf8_to_ucs4_fast (utf8_str, -1, NULL); + const gunichar *p = decoded; + bool ret; + + while (*p && unicode_is_wordchar (*p)) + p++; + + ret = (*p == '\0'); + + g_free (decoded); + return ret; +}