aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorDavid Bremner <bremner@debian.org>2023-12-01 07:51:09 -0400
committerDavid Bremner <bremner@debian.org>2023-12-01 07:51:09 -0400
commit126347b6942dd4b0291beb67b119431ebd750a2a (patch)
tree532c5163cb0972c8b9e6c8b4577b86afb9c6a6a2 /util
Import notmuch_0.38.2.orig.tar.xz
[dgit import orig notmuch_0.38.2.orig.tar.xz]
Diffstat (limited to 'util')
-rw-r--r--util/Makefile5
-rw-r--r--util/Makefile.local18
-rw-r--r--util/crypto.c245
-rw-r--r--util/crypto.h106
-rw-r--r--util/error_util.c40
-rw-r--r--util/error_util.h54
-rw-r--r--util/gmime-extra.c221
-rw-r--r--util/gmime-extra.h81
-rw-r--r--util/hex-escape.c159
-rw-r--r--util/hex-escape.h50
-rw-r--r--util/path-util.c27
-rw-r--r--util/path-util.h19
-rw-r--r--util/repair.c158
-rw-r--r--util/repair.h44
-rw-r--r--util/string-util.c298
-rw-r--r--util/string-util.h100
-rw-r--r--util/talloc-extra.c14
-rw-r--r--util/talloc-extra.h26
-rw-r--r--util/unicode-util.c43
-rw-r--r--util/unicode-util.h19
-rw-r--r--util/util.c24
-rw-r--r--util/util.h29
-rw-r--r--util/xapian-extra.h15
-rw-r--r--util/xutil.c139
-rw-r--r--util/xutil.h60
-rw-r--r--util/zlib-extra.c95
-rw-r--r--util/zlib-extra.h39
27 files changed, 2128 insertions, 0 deletions
diff --git a/util/Makefile b/util/Makefile
new file mode 100644
index 00000000..fa25832e
--- /dev/null
+++ b/util/Makefile
@@ -0,0 +1,5 @@
+all:
+ $(MAKE) -C .. all
+
+.DEFAULT:
+ $(MAKE) -C .. $@
diff --git a/util/Makefile.local b/util/Makefile.local
new file mode 100644
index 00000000..8a0b9bc3
--- /dev/null
+++ b/util/Makefile.local
@@ -0,0 +1,18 @@
+# -*- makefile-gmake -*-
+
+dir := util
+extra_cflags += -I$(srcdir)/$(dir)
+
+libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+ $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
+ $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \
+ $(dir)/repair.c $(dir)/path-util.c \
+ $(dir)/unicode-util.c
+
+libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
+
+$(dir)/libnotmuch_util.a: $(libnotmuch_util_modules)
+ $(call quiet,AR) rcs $@ $^
+
+SRCS := $(SRCS) $(libnotmuch_util_c_srcs)
+CLEAN := $(CLEAN) $(libnotmuch_util_modules) $(dir)/libnotmuch_util.a
diff --git a/util/crypto.c b/util/crypto.c
new file mode 100644
index 00000000..156a6550
--- /dev/null
+++ b/util/crypto.c
@@ -0,0 +1,245 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2012 Jameson Rollins
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Authors: Jameson Rollins <jrollins@finestructure.net>
+ */
+
+#include "crypto.h"
+#include <strings.h>
+#include "error_util.h"
+#define unused(x) x __attribute__ ((unused))
+
+#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
+
+void
+_notmuch_crypto_cleanup (unused(_notmuch_crypto_t *crypto))
+{
+}
+
+GMimeObject *
+_notmuch_crypto_decrypt (bool *attempted,
+ notmuch_decryption_policy_t decrypt,
+ notmuch_message_t *message,
+ GMimeObject *part,
+ GMimeDecryptResult **decrypt_result,
+ GError **err)
+{
+ GMimeObject *ret = NULL;
+
+ if (decrypt == NOTMUCH_DECRYPT_FALSE)
+ return NULL;
+
+ /* try decryption with session key if one is stashed */
+ if (message) {
+ notmuch_message_properties_t *list = NULL;
+
+ for (list = notmuch_message_get_properties (message, "session-key", TRUE);
+ notmuch_message_properties_valid (list); notmuch_message_properties_move_to_next (
+ list)) {
+ if (err && *err) {
+ g_error_free (*err);
+ *err = NULL;
+ }
+ if (attempted)
+ *attempted = true;
+ if (GMIME_IS_MULTIPART_ENCRYPTED (part)) {
+ ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part),
+ GMIME_DECRYPT_NONE,
+ notmuch_message_properties_value (list),
+ decrypt_result, err);
+ } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) {
+ GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part);
+ GMimeSecureMimeType type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7);
+ if (type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) {
+ ret = g_mime_application_pkcs7_mime_decrypt (pkcs7,
+ GMIME_DECRYPT_NONE,
+ notmuch_message_properties_value (
+ list),
+ decrypt_result, err);
+ }
+ }
+ if (ret)
+ break;
+ }
+ if (list)
+ notmuch_message_properties_destroy (list);
+ if (ret)
+ return ret;
+ }
+
+ if (err && *err) {
+ g_error_free (*err);
+ *err = NULL;
+ }
+
+ if (decrypt == NOTMUCH_DECRYPT_AUTO)
+ return ret;
+
+ if (attempted)
+ *attempted = true;
+ GMimeDecryptFlags flags = GMIME_DECRYPT_NONE;
+
+ if (decrypt == NOTMUCH_DECRYPT_TRUE && decrypt_result)
+ flags |= GMIME_DECRYPT_EXPORT_SESSION_KEY;
+ if (GMIME_IS_MULTIPART_ENCRYPTED (part)) {
+ ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part), flags, NULL,
+ decrypt_result, err);
+ } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) {
+ GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part);
+ GMimeSecureMimeType p7type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7);
+ if (p7type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) {
+ ret = g_mime_application_pkcs7_mime_decrypt (pkcs7, flags, NULL,
+ decrypt_result, err);
+ }
+ }
+ return ret;
+}
+
+static int
+_notmuch_message_crypto_destructor (_notmuch_message_crypto_t *msg_crypto)
+{
+ if (! msg_crypto)
+ return 0;
+ if (msg_crypto->sig_list)
+ g_object_unref (msg_crypto->sig_list);
+ if (msg_crypto->payload_subject)
+ talloc_free (msg_crypto->payload_subject);
+ return 0;
+}
+
+_notmuch_message_crypto_t *
+_notmuch_message_crypto_new (void *ctx)
+{
+ _notmuch_message_crypto_t *ret = talloc_zero (ctx, _notmuch_message_crypto_t);
+
+ talloc_set_destructor (ret, _notmuch_message_crypto_destructor);
+ return ret;
+}
+
+notmuch_status_t
+_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto,
+ GMimeSignatureList *sigs)
+{
+ if (! msg_crypto)
+ return NOTMUCH_STATUS_NULL_POINTER;
+
+ /* Signatures that arrive after a payload part during DFS are not
+ * part of the cryptographic envelope: */
+ if (msg_crypto->payload_encountered)
+ return NOTMUCH_STATUS_SUCCESS;
+
+ if (msg_crypto->sig_list)
+ g_object_unref (msg_crypto->sig_list);
+
+ /* This signature list needs to persist as long as the _n_m_crypto
+ * object survives. Increasing its reference counter prevents
+ * garbage-collection until after _n_m_crypto_destroy is
+ * called. */
+ msg_crypto->sig_list = sigs;
+ if (sigs)
+ g_object_ref (sigs);
+
+ if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_FULL)
+ msg_crypto->signature_encrypted = true;
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
+
+bool
+_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part,
+ GMimeObject *parent, int childnum)
+{
+ const char *protected_headers = NULL;
+ const char *forwarded = NULL;
+ const char *subject = NULL;
+
+ if ((! msg_crypto) || (! part))
+ INTERNAL_ERROR ("_notmuch_message_crypto_potential_payload() got NULL for %s\n",
+ msg_crypto? "part" : "msg_crypto");
+
+ /* only fire on the first payload part encountered */
+ if (msg_crypto->payload_encountered)
+ return false;
+
+ /* the first child of multipart/encrypted that matches the
+ * encryption protocol should be "control information" metadata,
+ * not payload. So we skip it. (see
+ * https://tools.ietf.org/html/rfc1847#page-8) */
+ if (parent && GMIME_IS_MULTIPART_ENCRYPTED (parent) && childnum ==
+ GMIME_MULTIPART_ENCRYPTED_VERSION) {
+ const char *enc_type = g_mime_object_get_content_type_parameter (parent, "protocol");
+ GMimeContentType *ct = g_mime_object_get_content_type (part);
+ if (ct && enc_type) {
+ const char *part_type = g_mime_content_type_get_mime_type (ct);
+ if (part_type && strcmp (part_type, enc_type) == 0)
+ return false;
+ }
+ }
+
+ msg_crypto->payload_encountered = true;
+
+ /* don't bother recording anything if there is no cryptographic
+ * envelope: */
+ if ((msg_crypto->decryption_status != NOTMUCH_MESSAGE_DECRYPTED_FULL) &&
+ (msg_crypto->sig_list == NULL))
+ return false;
+
+ /* Verify that this payload has headers that are intended to be
+ * exported to the larger message: */
+
+ /* Consider a payload that uses Alexei Melinkov's forwarded="no" for
+ * message/global or message/rfc822:
+ * https://tools.ietf.org/html/draft-melnikov-smime-header-signing-05#section-4 */
+ forwarded = g_mime_object_get_content_type_parameter (part, "forwarded");
+ if (GMIME_IS_MESSAGE_PART (part) && forwarded && strcmp (forwarded, "no") == 0) {
+ GMimeMessage *message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part));
+ subject = g_mime_message_get_subject (message);
+ /* FIXME: handle more than just Subject: at some point */
+ } else {
+ /* Consider "memoryhole"-style protected headers as practiced by Enigmail and K-9 */
+ protected_headers = g_mime_object_get_content_type_parameter (part, "protected-headers");
+ if (protected_headers && strcasecmp ("v1", protected_headers) == 0)
+ subject = g_mime_object_get_header (part, "Subject");
+ /* FIXME: handle more than just Subject: at some point */
+ }
+
+ if (subject) {
+ if (msg_crypto->payload_subject)
+ talloc_free (msg_crypto->payload_subject);
+ msg_crypto->payload_subject = talloc_strdup (msg_crypto, subject);
+ }
+
+ return true;
+}
+
+
+notmuch_status_t
+_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto)
+{
+ if (! msg_crypto)
+ return NOTMUCH_STATUS_NULL_POINTER;
+
+ /* see the rationale for different values of
+ * _notmuch_message_decryption_status_t in util/crypto.h */
+ if (! msg_crypto->payload_encountered)
+ msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_FULL;
+ else if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_NONE)
+ msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_PARTIAL;
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/util/crypto.h b/util/crypto.h
new file mode 100644
index 00000000..3c5d384b
--- /dev/null
+++ b/util/crypto.h
@@ -0,0 +1,106 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include <stdbool.h>
+#include "gmime-extra.h"
+#include "notmuch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _notmuch_crypto {
+ bool verify;
+ notmuch_decryption_policy_t decrypt;
+} _notmuch_crypto_t;
+
+GMimeObject *
+_notmuch_crypto_decrypt (bool *attempted,
+ notmuch_decryption_policy_t decrypt,
+ notmuch_message_t *message,
+ GMimeObject *part,
+ GMimeDecryptResult **decrypt_result,
+ GError **err);
+
+void
+_notmuch_crypto_cleanup (_notmuch_crypto_t *crypto);
+
+/* The user probably wants to know if the entire message was in the
+ * clear. When replying, the MUA probably wants to know whether there
+ * was any part decrypted in the message. And when displaying to the
+ * user, we probably only want to display "encrypted message" if the
+ * entire message was covered by encryption. */
+typedef enum {
+ NOTMUCH_MESSAGE_DECRYPTED_NONE = 0,
+ NOTMUCH_MESSAGE_DECRYPTED_PARTIAL,
+ NOTMUCH_MESSAGE_DECRYPTED_FULL,
+} _notmuch_message_decryption_status_t;
+
+/* description of the cryptographic state of a given message overall;
+ * for use by simple user agents.
+ */
+typedef struct _notmuch_message_crypto {
+ /* encryption status: partial, full, none */
+ _notmuch_message_decryption_status_t decryption_status;
+ /* FIXME: can we show what key(s) a fully-encrypted message was
+ * encrypted to? This data is not necessarily cryptographically
+ * reliable; even when we decrypt, we might not know which public
+ * key was used (e.g. if we're using a session key). */
+
+ /* signature status of the whole message (either the whole message
+ * is signed, or it is not) -- this means that partially-signed
+ * messages will get no signature status. */
+ GMimeSignatureList *sig_list;
+ /* if part of the message was signed, and the MUA is clever, it
+ * can determine on its own exactly which part and try to make
+ * more sense of it. */
+
+ /* mark this flag once we encounter a payload (i.e. something that
+ * is not part of the cryptographic envelope) */
+ bool payload_encountered;
+
+ /* the value of any "Subject:" header in the cryptographic payload
+ * (the top level part within the crypto envelope), converted to
+ * UTF-8 */
+ char *payload_subject;
+
+ /* if both signed and encrypted, was the signature encrypted? */
+ bool signature_encrypted;
+} _notmuch_message_crypto_t;
+
+
+/* _notmuch_message_crypto_t objects should be released with
+ * talloc_free (), or they will be released along with their parent
+ * context.
+ */
+_notmuch_message_crypto_t *
+_notmuch_message_crypto_new (void *ctx);
+
+/* call potential_sig_list during a depth-first-search on a message to
+ * consider a particular signature as relevant for the message.
+ */
+notmuch_status_t
+_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto,
+ GMimeSignatureList *sigs);
+
+/* call successful_decryption during a depth-first-search on a message
+ * to indicate that a part was successfully decrypted.
+ */
+notmuch_status_t
+_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto);
+
+/* call potential_payload during a depth-first-search on a message
+ * when encountering a message part that is not part of the envelope.
+ *
+ * Returns true if part is the root of the cryptographic payload of
+ * this message.
+ */
+bool
+_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part,
+ GMimeObject *parent, int childnum);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/error_util.c b/util/error_util.c
new file mode 100644
index 00000000..e64162c7
--- /dev/null
+++ b/util/error_util.c
@@ -0,0 +1,40 @@
+/* error_util.c - internal error utilities for notmuch.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "error_util.h"
+
+void
+_internal_error (const char *format, ...)
+{
+ va_list va_args;
+
+ va_start (va_args, format);
+
+ fprintf (stderr, "Internal error: ");
+ vfprintf (stderr, format, va_args);
+
+ va_end (va_args);
+ exit (1);
+}
+
diff --git a/util/error_util.h b/util/error_util.h
new file mode 100644
index 00000000..a51f001f
--- /dev/null
+++ b/util/error_util.h
@@ -0,0 +1,54 @@
+/* error_util.h - Provide the INTERNAL_ERROR macro
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#ifndef ERROR_UTIL_H
+#define ERROR_UTIL_H
+
+#include <talloc.h>
+
+#include "function-attributes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* There's no point in continuing when we've detected that we've done
+ * something wrong internally (as opposed to the user passing in a
+ * bogus value).
+ *
+ * Note that PRINTF_ATTRIBUTE comes from talloc.h
+ */
+void
+_internal_error (const char *format, ...) PRINTF_ATTRIBUTE (1, 2) NORETURN_ATTRIBUTE;
+
+/* There's no point in continuing when we've detected that we've done
+ * something wrong internally (as opposed to the user passing in a
+ * bogus value).
+ *
+ * Note that __location__ comes from talloc.h.
+ */
+#define INTERNAL_ERROR(format, ...) \
+ _internal_error (format " (%s).\n", \
+ ##__VA_ARGS__, __location__)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/gmime-extra.c b/util/gmime-extra.c
new file mode 100644
index 00000000..192cb078
--- /dev/null
+++ b/util/gmime-extra.c
@@ -0,0 +1,221 @@
+#include "gmime-extra.h"
+#include <string.h>
+
+static
+GMimeStream *
+_gzfile_maybe_filter (GMimeStream *file_stream)
+{
+ char buf[4];
+ int bytes_read;
+
+ if ((bytes_read = g_mime_stream_read (file_stream, buf, sizeof (buf))) < 0)
+ return NULL;
+
+ if (g_mime_stream_reset (file_stream))
+ return NULL;
+
+ /* check for gzipped input */
+ if (bytes_read >= 2 && buf[0] == 0x1f && (unsigned char) buf[1] == 0x8b) {
+ GMimeStream *gzstream;
+ GMimeFilter *gzfilter;
+
+ gzfilter = g_mime_filter_gzip_new (GMIME_FILTER_GZIP_MODE_UNZIP, 0);
+ if (! gzfilter)
+ return NULL;
+
+ gzstream = g_mime_stream_filter_new (file_stream);
+ if (! gzstream)
+ return NULL;
+
+ /* ignore filter id */
+ (void) g_mime_stream_filter_add ((GMimeStreamFilter *) gzstream, gzfilter);
+ g_object_unref (gzfilter);
+ g_object_unref (file_stream);
+ return gzstream;
+ } else {
+ return file_stream;
+ }
+}
+
+GMimeStream *
+g_mime_stream_gzfile_new (int fd)
+{
+ GMimeStream *file_stream;
+
+ file_stream = g_mime_stream_fs_new (fd);
+ if (! file_stream)
+ return NULL;
+
+ return _gzfile_maybe_filter (file_stream);
+}
+
+GMimeStream *
+g_mime_stream_gzfile_open (const char *filename)
+{
+ GMimeStream *file_stream;
+
+ file_stream = g_mime_stream_fs_open (filename, 0, 0, NULL);
+ if (! file_stream)
+ return NULL;
+
+ return _gzfile_maybe_filter (file_stream);
+}
+
+GMimeStream *
+g_mime_stream_stdout_new ()
+{
+ GMimeStream *stream_stdout = NULL;
+ GMimeStream *stream_buffered = NULL;
+
+ stream_stdout = g_mime_stream_pipe_new (STDOUT_FILENO);
+ if (! stream_stdout)
+ return NULL;
+
+ g_mime_stream_pipe_set_owner (GMIME_STREAM_PIPE (stream_stdout), FALSE);
+
+ stream_buffered = g_mime_stream_buffer_new (stream_stdout, GMIME_STREAM_BUFFER_BLOCK_WRITE);
+
+ g_object_unref (stream_stdout);
+
+ return stream_buffered;
+}
+
+/**
+ * copy a glib string into a talloc context, and free it.
+ */
+static char *
+g_string_talloc_strdup (void *ctx, char *g_string)
+{
+ char *new_str = talloc_strdup (ctx, g_string);
+
+ g_free (g_string);
+ return new_str;
+}
+
+const char *
+g_mime_certificate_get_valid_userid (GMimeCertificate *cert)
+{
+ /* output user id only if validity is FULL or ULTIMATE. */
+ const char *uid = g_mime_certificate_get_user_id (cert);
+
+ if (uid == NULL)
+ return uid;
+ GMimeValidity validity = g_mime_certificate_get_id_validity (cert);
+
+ if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE)
+ return uid;
+ return NULL;
+}
+
+const char *
+g_mime_certificate_get_valid_email (GMimeCertificate *cert)
+{
+ /* output e-mail address only if validity is FULL or ULTIMATE. */
+ const char *email = g_mime_certificate_get_email(cert);
+
+ if (email == NULL)
+ return email;
+ GMimeValidity validity = g_mime_certificate_get_id_validity (cert);
+
+ if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE)
+ return email;
+ return NULL;
+}
+
+const char *
+g_mime_certificate_get_fpr16 (GMimeCertificate *cert)
+{
+ const char *fpr = g_mime_certificate_get_fingerprint (cert);
+
+ if (! fpr || strlen (fpr) < 16)
+ return fpr;
+
+ return fpr + (strlen (fpr) - 16);
+}
+
+char *
+g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type)
+{
+ InternetAddressList *list = g_mime_message_get_addresses (message, type);
+
+ return internet_address_list_to_string (list, NULL, 0);
+}
+
+char *
+g_mime_message_get_date_string (void *ctx, GMimeMessage *message)
+{
+ GDateTime *parsed_date = g_mime_message_get_date (message);
+
+ if (parsed_date) {
+ char *date = g_mime_utils_header_format_date (parsed_date);
+ return g_string_talloc_strdup (ctx, date);
+ } else {
+ return talloc_strdup (ctx, "Thu, 01 Jan 1970 00:00:00 +0000");
+ }
+}
+
+InternetAddressList *
+g_mime_message_get_reply_to_list (GMimeMessage *message)
+{
+ return g_mime_message_get_reply_to (message);
+}
+
+const char *
+g_mime_message_get_from_string (GMimeMessage *message)
+{
+ return g_mime_object_get_header (GMIME_OBJECT (message), "From");
+}
+
+char *
+g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message)
+{
+ InternetAddressList *list = g_mime_message_get_reply_to (message);
+
+ return g_string_talloc_strdup (ctx, internet_address_list_to_string (list, NULL, 0));
+}
+
+void
+g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag)
+{
+ g_mime_parser_set_format (parser, flag ? GMIME_FORMAT_MBOX : GMIME_FORMAT_MESSAGE);
+}
+
+/* In GMime 3.0, status GOOD and VALID both imply something about the
+ * validity of the UIDs attached to the signing key. This forces us to
+ * use following somewhat relaxed definition of a "good" signature to
+ * preserve current notmuch semantics.
+ */
+
+gboolean
+g_mime_signature_status_good (GMimeSignatureStatus status)
+{
+ return ((status & (GMIME_SIGNATURE_STATUS_RED | GMIME_SIGNATURE_STATUS_ERROR_MASK)) == 0);
+}
+
+gboolean
+g_mime_signature_status_bad (GMimeSignatureStatus status)
+{
+ return (status & GMIME_SIGNATURE_STATUS_RED);
+}
+
+gboolean
+g_mime_signature_status_error (GMimeSignatureStatus status)
+{
+ return (status & GMIME_SIGNATURE_STATUS_ERROR_MASK);
+}
+
+gint64
+g_mime_utils_header_decode_date_unix (const char *date)
+{
+ GDateTime *parsed_date = g_mime_utils_header_decode_date (date);
+ time_t ret;
+
+ if (parsed_date) {
+ ret = g_date_time_to_unix (parsed_date);
+ g_date_time_unref (parsed_date);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
diff --git a/util/gmime-extra.h b/util/gmime-extra.h
new file mode 100644
index 00000000..889e91f3
--- /dev/null
+++ b/util/gmime-extra.h
@@ -0,0 +1,81 @@
+#ifndef _GMIME_EXTRA_H
+#define _GMIME_EXTRA_H
+#include <gmime/gmime.h>
+#include <talloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+GMimeStream *g_mime_stream_stdout_new (void);
+
+/* Return a GMime stream for this open file descriptor, un-gzipping if
+ * necessary */
+GMimeStream *g_mime_stream_gzfile_new (int fd);
+
+/* Return a GMime stream for this path, un-gzipping if
+ * necessary */
+GMimeStream *g_mime_stream_gzfile_open (const char *filename);
+
+/**
+ * Get last 16 hex digits of fingerprint ("keyid")
+ */
+const char *g_mime_certificate_get_fpr16 (GMimeCertificate *cert);
+/**
+ * Return the contents of the appropriate address header as a string
+ * Should be freed using g_free
+ */
+char *g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type);
+
+InternetAddressList *g_mime_message_get_addresses (GMimeMessage *message, GMimeAddressType type);
+
+/**
+ * return talloc allocated date string
+ */
+
+char *g_mime_message_get_date_string (void *ctx, GMimeMessage *message);
+
+/**
+ * glib allocated list of From: addresses
+ */
+
+InternetAddressList *g_mime_message_get_from (GMimeMessage *message);
+
+
+/**
+ * return string for From: address
+ * (owned by gmime)
+ */
+const char *g_mime_message_get_from_string (GMimeMessage *message);
+
+InternetAddressList *g_mime_message_get_reply_to_list (GMimeMessage *message);
+
+/**
+ * return talloc allocated reply-to string
+ */
+char *g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message);
+
+void g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag);
+
+gboolean g_mime_signature_status_good (GMimeSignatureStatus status);
+
+gboolean g_mime_signature_status_bad (GMimeSignatureStatus status);
+
+gboolean g_mime_signature_status_error (GMimeSignatureStatus status);
+
+gint64 g_mime_utils_header_decode_date_unix (const char *date);
+
+/**
+ * Return string for valid User ID (or NULL if no valid User ID exists)
+ */
+const char *g_mime_certificate_get_valid_userid (GMimeCertificate *cert);
+/**
+ * Return string for valid e-mail address (or NULL if no valid e-mail address exists)
+ */
+const char *g_mime_certificate_get_valid_email (GMimeCertificate *cert);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 00000000..81534a8c
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,159 @@
+/* hex-escape.c - Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include <ctype.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const char *output_charset =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.,";
+
+static const char escape_char = '%';
+
+static int
+is_output (char c)
+{
+ return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+ if (*out_size < needed) {
+
+ if (*out == NULL)
+ *out = talloc_size (ctx, needed);
+ else
+ *out = talloc_realloc (ctx, *out, char, needed);
+
+ if (*out == NULL)
+ return 0;
+
+ *out_size = needed;
+ }
+ return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+ const char *p;
+ char *q;
+
+ size_t needed = 1; /* for the NUL */
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = in; *p; p++) {
+ needed += is_output (*p) ? 1 : 3;
+ }
+
+ if (*out == NULL)
+ *out_size = 0;
+
+ if (! maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ q = *out;
+ p = in;
+
+ while (*p) {
+ if (is_output (*p)) {
+ *q++ = *p++;
+ } else {
+ sprintf (q, "%%%02x", (unsigned char) *p++);
+ q += 3;
+ }
+ }
+
+ *q = '\0';
+ return HEX_SUCCESS;
+}
+
+/* Hex decode 'in' to 'out'.
+ *
+ * This must succeed for in == out to support hex_decode_inplace().
+ */
+static hex_status_t
+hex_decode_internal (const char *in, unsigned char *out)
+{
+ char buf[3];
+
+ while (*in) {
+ if (*in == escape_char) {
+ char *endp;
+
+ /* This also handles unexpected end-of-string. */
+ if (! isxdigit ((unsigned char) in[1]) ||
+ ! isxdigit ((unsigned char) in[2]))
+ return HEX_SYNTAX_ERROR;
+
+ buf[0] = in[1];
+ buf[1] = in[2];
+ buf[2] = '\0';
+
+ *out = strtoul (buf, &endp, 16);
+
+ if (endp != buf + 2)
+ return HEX_SYNTAX_ERROR;
+
+ in += 3;
+ out++;
+ } else {
+ *out++ = *in++;
+ }
+ }
+
+ *out = '\0';
+
+ return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode_inplace (char *s)
+{
+ /* A decoded string is never longer than the encoded one, so it is
+ * safe to decode a string onto itself. */
+ return hex_decode_internal (s, (unsigned char *) s);
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+ const char *p;
+ size_t needed = 1; /* for the NUL */
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = in; *p; p++)
+ if ((p[0] == escape_char) && isxdigit (p[1]) && isxdigit (p[2]))
+ needed -= 1;
+ else
+ needed += 1;
+
+ if (! maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ return hex_decode_internal (in, (unsigned char *) *out);
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 00000000..83a4c6f1
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,50 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ HEX_SUCCESS = 0,
+ HEX_SYNTAX_ERROR,
+ HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API for hex_encode() and hex_decode() is modelled on that for
+ * getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size. This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+/*
+ * Non-allocating hex decode to decode 's' in-place. The length of the
+ * result is always equal to or shorter than the length of the
+ * original.
+ */
+hex_status_t
+hex_decode_inplace (char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/path-util.c b/util/path-util.c
new file mode 100644
index 00000000..3267a967
--- /dev/null
+++ b/util/path-util.c
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define _GNU_SOURCE
+
+#include "path-util.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+
+char *
+notmuch_canonicalize_file_name (const char *path)
+{
+#if HAVE_CANONICALIZE_FILE_NAME
+ return canonicalize_file_name (path);
+#elif defined(PATH_MAX)
+ char *resolved_path = malloc (PATH_MAX + 1);
+ if (resolved_path == NULL)
+ return NULL;
+
+ return realpath (path, resolved_path);
+#else
+#error undefined PATH_MAX _and_ missing canonicalize_file_name not supported
+#endif
+}
diff --git a/util/path-util.h b/util/path-util.h
new file mode 100644
index 00000000..ac85f696
--- /dev/null
+++ b/util/path-util.h
@@ -0,0 +1,19 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef NOTMUCH_UTIL_PATH_UTIL_H_
+#define NOTMUCH_UTIL_PATH_UTIL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *
+notmuch_canonicalize_file_name (const char *path);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NOTMUCH_UTIL_PATH_UTIL_H_ */
diff --git a/util/repair.c b/util/repair.c
new file mode 100644
index 00000000..5b0dfdf4
--- /dev/null
+++ b/util/repair.c
@@ -0,0 +1,158 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2019 Daniel Kahn Gillmor
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Authors: Daniel Kahn Gillmor <dkg@fifthhorseman.net>
+ */
+
+#include <stdbool.h>
+#include "repair.h"
+
+
+static bool
+_notmuch_crypto_payload_has_legacy_display (GMimeObject *payload)
+{
+ GMimeMultipart *mpayload;
+ const char *protected_header_parameter;
+ GMimeObject *first;
+
+ if (! g_mime_content_type_is_type (g_mime_object_get_content_type (payload),
+ "multipart", "mixed"))
+ return false;
+ protected_header_parameter = g_mime_object_get_content_type_parameter (payload,
+ "protected-headers");
+ if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1"))
+ return false;
+ if (! GMIME_IS_MULTIPART (payload))
+ return false;
+ mpayload = GMIME_MULTIPART (payload);
+ if (mpayload == NULL)
+ return false;
+ if (g_mime_multipart_get_count (mpayload) != 2)
+ return false;
+ first = g_mime_multipart_get_part (mpayload, 0);
+ /* Early implementations that generated "Legacy Display" parts used
+ * Content-Type: text/rfc822-headers, but text/plain is more widely
+ * rendered, so it is now the standard choice. We accept either as a
+ * Legacy Display part. */
+ if (! (g_mime_content_type_is_type (g_mime_object_get_content_type (first),
+ "text", "plain") ||
+ g_mime_content_type_is_type (g_mime_object_get_content_type (first),
+ "text", "rfc822-headers")))
+ return false;
+ protected_header_parameter = g_mime_object_get_content_type_parameter (first,
+ "protected-headers");
+ if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1"))
+ return false;
+ if (! GMIME_IS_TEXT_PART (first))
+ return false;
+
+ return true;
+}
+
+GMimeObject *
+_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload)
+{
+ if (_notmuch_crypto_payload_has_legacy_display (payload)) {
+ return g_mime_multipart_get_part (GMIME_MULTIPART (payload), 1);
+ } else {
+ return payload;
+ }
+}
+
+/* see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.1 */
+static bool
+_notmuch_is_mixed_up_mangled (GMimeObject *part)
+{
+ GMimeMultipart *mpart = NULL;
+ GMimeObject *parts[3] = { NULL, NULL, NULL };
+ GMimeContentType *type = NULL;
+ char *prelude_string = NULL;
+ bool prelude_is_empty;
+
+ if (part == NULL)
+ return false;
+ type = g_mime_object_get_content_type (part);
+ if (type == NULL)
+ return false;
+ if (! g_mime_content_type_is_type (type, "multipart", "mixed"))
+ return false;
+ if (! GMIME_IS_MULTIPART (part)) /* probably impossible */
+ return false;
+ mpart = GMIME_MULTIPART (part);
+ if (mpart == NULL)
+ return false;
+ if (g_mime_multipart_get_count (mpart) != 3)
+ return false;
+ parts[0] = g_mime_multipart_get_part (mpart, 0);
+ if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[0]),
+ "text", "plain"))
+ return false;
+ if (! GMIME_IS_TEXT_PART (parts[0]))
+ return false;
+ parts[1] = g_mime_multipart_get_part (mpart, 1);
+ if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[1]),
+ "application", "pgp-encrypted"))
+ return false;
+ parts[2] = g_mime_multipart_get_part (mpart, 2);
+ if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[2]),
+ "application", "octet-stream"))
+ return false;
+
+ /* Is parts[0] length 0? */
+ prelude_string = g_mime_text_part_get_text (GMIME_TEXT_PART (parts[0]));
+ prelude_is_empty = (prelude_string[0] == '\0');
+ g_free (prelude_string);
+ if (! prelude_is_empty)
+ return false;
+
+ /* FIXME: after decoding and stripping whitespace, is parts[1]
+ * subpart just "Version: 1" ? */
+
+ /* FIXME: can we determine that parts[2] subpart is *only* PGP
+ * encrypted data? I tried g_mime_part_get_openpgp_data () but
+ * found https://github.com/jstedfast/gmime/issues/60 */
+
+ return true;
+}
+
+
+/* see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.2 */
+GMimeObject *
+_notmuch_repair_mixed_up_mangled (GMimeObject *part)
+{
+ GMimeMultipart *mpart = NULL, *mpart_ret = NULL;
+ GMimeObject *ret = NULL;
+
+ if (! _notmuch_is_mixed_up_mangled (part))
+ return NULL;
+ mpart = GMIME_MULTIPART (part);
+ ret = GMIME_OBJECT (g_mime_multipart_encrypted_new ());
+ if (ret == NULL)
+ return NULL;
+ mpart_ret = GMIME_MULTIPART (ret);
+ if (mpart_ret == NULL) {
+ g_object_unref (ret);
+ return NULL;
+ }
+ g_mime_object_set_content_type_parameter (ret, "protocol", "application/pgp-encrypted");
+
+ g_mime_multipart_insert (mpart_ret, 0, g_mime_multipart_get_part (mpart, 1));
+ g_mime_multipart_insert (mpart_ret, 1, g_mime_multipart_get_part (mpart, 2));
+ return ret;
+}
diff --git a/util/repair.h b/util/repair.h
new file mode 100644
index 00000000..492f5a20
--- /dev/null
+++ b/util/repair.h
@@ -0,0 +1,44 @@
+#ifndef _REPAIR_H
+#define _REPAIR_H
+
+#include "gmime-extra.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This is a collection of message structure and message format repair
+ * techniques that are designed to improve the user experience of
+ * notmuch */
+
+/* If payload is a cryptographic payload within an encrypted message, and
+ * it has a "legacy display" part, then we can skip over it and jump
+ * to the actual content, because notmuch already handles protected
+ * headers appropriately.
+ *
+ * This function either returns payload directly (if it does not have
+ * a "legacy display" part), or it returns a pointer to its
+ * content-bearing subpart, with the "legacy display" part and the
+ * surrounding multipart/mixed object bypassed.
+ *
+ * No new objects are created by calling this function, and the
+ * returned object will only be released when the original part is
+ * disposed of.
+ */
+
+GMimeObject *
+_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload);
+
+/* Detecting and repairing "Mixed-Up MIME mangling". see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1
+ * If this returns NULL, the message was probably not "Mixed up". If
+ * it returns non-NULL, then there is a newly-allocated MIME part that
+ * represents the repaired version. The caller is responsible for
+ * ensuring that any returned object is freed with g_object_unref. */
+GMimeObject *
+_notmuch_repair_mixed_up_mangled (GMimeObject *part);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/string-util.c b/util/string-util.c
new file mode 100644
index 00000000..03d7648d
--- /dev/null
+++ b/util/string-util.c
@@ -0,0 +1,298 @@
+/* string-util.c - Extra or enhanced routines for null terminated strings.
+ *
+ * Copyright (c) 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Jani Nikula <jani@nikula.org>
+ */
+
+
+#include "string-util.h"
+#include "talloc.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+
+char *
+strtok_len (char *s, const char *delim, size_t *len)
+{
+ /* skip initial delims */
+ s += strspn (s, delim);
+
+ /* length of token */
+ *len = strcspn (s, delim);
+
+ return *len ? s : NULL;
+}
+
+const char *
+strsplit_len (const char *s, char delim, size_t *len)
+{
+ bool escaping = false;
+ size_t count = 0, last_nonspace = 0;
+
+ /* Skip initial unescaped delimiters and whitespace */
+ while (*s && (*s == delim || isspace (*s)))
+ s++;
+
+ while (s[count] && (escaping || s[count] != delim)) {
+ if (! isspace (s[count]))
+ last_nonspace = count;
+ escaping = (s[count] == '\\');
+ count++;
+ }
+
+ if (count == 0)
+ return NULL;
+
+ *len = last_nonspace + 1;
+ return s;
+}
+
+const char *
+strtok_len_c (const char *s, const char *delim, size_t *len)
+{
+ /* strtok_len is already const-safe, but we can't express both
+ * versions in the C type system. */
+ return strtok_len ((char *) s, delim, len);
+}
+
+char *
+sanitize_string (const void *ctx, const char *str)
+{
+ char *out, *loop;
+
+ if (! str)
+ return NULL;
+
+ out = talloc_strdup (ctx, str);
+ if (! out)
+ return NULL;
+
+ for (loop = out; *loop; loop++) {
+ if (*loop == '\t' || *loop == '\n')
+ *loop = ' ';
+ else if ((unsigned char) (*loop) < 32)
+ *loop = '?';
+ }
+
+ return out;
+}
+
+static int
+is_unquoted_terminator (unsigned char c)
+{
+ return c == 0 || c <= ' ' || c == ')';
+}
+
+int
+make_boolean_term (void *ctx, const char *prefix, const char *term,
+ char **buf, size_t *len)
+{
+ const char *in;
+ char *out;
+ size_t needed = 3;
+ int need_quoting = 0;
+
+ /* Do we need quoting? To be paranoid, we quote anything
+ * containing a quote or '(', even though these only matter at the
+ * beginning, and anything containing non-ASCII text. */
+ if (! term[0])
+ need_quoting = 1;
+ for (in = term; *in && ! need_quoting; in++)
+ if (is_unquoted_terminator (*in) || *in == '"' || *in == '('
+ || (unsigned char) *in > 127)
+ need_quoting = 1;
+
+ if (need_quoting)
+ for (in = term; *in; in++)
+ needed += (*in == '"') ? 2 : 1;
+ else
+ needed = strlen (term) + 1;
+
+ /* Reserve space for the prefix */
+ if (prefix)
+ needed += strlen (prefix) + 1;
+
+ if ((*buf == NULL) || (needed > *len)) {
+ *len = 2 * needed;
+ *buf = talloc_realloc (ctx, *buf, char, *len);
+ }
+
+ if (! *buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ out = *buf;
+
+ /* Copy in the prefix */
+ if (prefix) {
+ strcpy (out, prefix);
+ out += strlen (prefix);
+ *out++ = ':';
+ }
+
+ if (! need_quoting) {
+ strcpy (out, term);
+ return 0;
+ }
+
+ /* Quote term by enclosing it in double quotes and doubling any
+ * internal double quotes. */
+ *out++ = '"';
+ in = term;
+ while (*in) {
+ if (*in == '"')
+ *out++ = '"';
+ *out++ = *in++;
+ }
+ *out++ = '"';
+ *out = '\0';
+
+ return 0;
+}
+
+const char *
+skip_space (const char *str)
+{
+ while (*str && isspace ((unsigned char) *str))
+ ++str;
+ return str;
+}
+
+int
+parse_boolean_term (void *ctx, const char *str,
+ char **prefix_out, char **term_out)
+{
+ int err = EINVAL;
+
+ *prefix_out = *term_out = NULL;
+
+ /* Parse prefix */
+ str = skip_space (str);
+ const char *pos = strchr (str, ':');
+
+ if (! pos || pos == str)
+ goto FAIL;
+ *prefix_out = talloc_strndup (ctx, str, pos - str);
+ if (! *prefix_out) {
+ err = ENOMEM;
+ goto FAIL;
+ }
+ ++pos;
+
+ /* Implement de-quoting compatible with make_boolean_term. */
+ if (*pos == '"') {
+ char *out = talloc_array (ctx, char, strlen (pos));
+ int closed = 0;
+ if (! out) {
+ err = ENOMEM;
+ goto FAIL;
+ }
+ *term_out = out;
+ /* Skip the opening quote, find the closing quote, and
+ * un-double doubled internal quotes. */
+ for (++pos; *pos; ) {
+ if (*pos == '"') {
+ ++pos;
+ if (*pos != '"') {
+ /* Found the closing quote. */
+ closed = 1;
+ pos = skip_space (pos);
+ break;
+ }
+ }
+ *out++ = *pos++;
+ }
+ /* Did the term terminate without a closing quote or is there
+ * trailing text after the closing quote? */
+ if (! closed || *pos)
+ goto FAIL;
+ *out = '\0';
+ } else {
+ const char *start = pos;
+ /* Check for text after the boolean term. */
+ while (! is_unquoted_terminator (*pos))
+ ++pos;
+ if (*skip_space (pos)) {
+ err = EINVAL;
+ goto FAIL;
+ }
+ /* No trailing text; dup the string so the caller can free
+ * it. */
+ *term_out = talloc_strndup (ctx, start, pos - start);
+ if (! *term_out) {
+ err = ENOMEM;
+ goto FAIL;
+ }
+ }
+ return 0;
+
+ FAIL:
+ talloc_free (*prefix_out);
+ talloc_free (*term_out);
+ errno = err;
+ return -1;
+}
+
+int
+strcmp_null (const char *s1, const char *s2)
+{
+ if (s1 && s2)
+ return strcmp (s1, s2);
+ else if (! s1 && ! s2)
+ return 0;
+ else if (s1)
+ return 1; /* s1 (non-NULL) is greater than s2 (NULL) */
+ else
+ return -1; /* s1 (NULL) is less than s2 (non-NULL) */
+}
+
+int
+strcase_equal (const void *a, const void *b)
+{
+ return strcasecmp (a, b) == 0;
+}
+
+unsigned int
+strcase_hash (const void *ptr)
+{
+ const char *s = ptr;
+
+ /* This is the djb2 hash. */
+ unsigned int hash = 5381;
+
+ while (s && *s) {
+ hash = ((hash << 5) + hash) + tolower (*s);
+ s++;
+ }
+
+ return hash;
+}
+
+void
+strip_trailing (char *str, char ch)
+{
+ int i;
+
+ for (i = strlen (str) - 1; i >= 0; i--) {
+ if (str[i] == ch)
+ str[i] = '\0';
+ else
+ break;
+ }
+}
diff --git a/util/string-util.h b/util/string-util.h
new file mode 100644
index 00000000..80647c5f
--- /dev/null
+++ b/util/string-util.h
@@ -0,0 +1,100 @@
+#ifndef _STRING_UTIL_H
+#define _STRING_UTIL_H
+
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* like strtok(3), but without state, and doesn't modify s. Return
+ * value is indicated by pointer and length, not null terminator.
+ *
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " \t";
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ * // do stuff with string tok of length tok_len
+ * }
+ */
+
+char *strtok_len (char *s, const char *delim, size_t *len);
+
+/* Const version of strtok_len. */
+const char *strtok_len_c (const char *s, const char *delim, size_t *len);
+
+/* Simplified version of strtok_len, with a single delimiter.
+ * Handles escaping delimiters with \
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = ';';
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strsplit_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ * // do stuff with string tok of length tok_len
+ * }
+ */
+const char *strsplit_len (const char *s, char delim, size_t *len);
+
+/* Return a talloced string with str sanitized.
+ *
+ * Whitespace characters (tabs and newlines) are replaced with spaces,
+ * non-printable characters with question marks.
+ */
+char *sanitize_string (const void *ctx, const char *str);
+
+/* Construct a boolean term query with the specified prefix (e.g.,
+ * "id") and search term, quoting term as necessary. Specifically, if
+ * term contains any non-printable ASCII characters, non-ASCII
+ * characters, close parenthesis or double quotes, it will be enclosed
+ * in double quotes and any internal double quotes will be doubled
+ * (e.g. a"b -> "a""b"). The result will be a valid notmuch query and
+ * can be parsed by parse_boolean_term.
+ *
+ * Output is into buf; it may be talloc_realloced.
+ * Return: 0 on success, -1 on error. errno will be set to ENOMEM if
+ * there is an allocation failure.
+ */
+int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
+ char **buf, size_t *len);
+
+/* Parse a boolean term query consisting of a prefix, a colon, and a
+ * term that may be quoted as described for make_boolean_term. If the
+ * term is not quoted, then it ends at the first whitespace or close
+ * parenthesis. str may containing leading or trailing whitespace,
+ * but anything else is considered a parse error. This is compatible
+ * with anything produced by make_boolean_term, and supports a subset
+ * of the quoting styles supported by Xapian (and hence notmuch).
+ * *prefix_out and *term_out will be talloc'd with context ctx.
+ *
+ * Return: 0 on success, -1 on error. errno will be set to EINVAL if
+ * there is a parse error or ENOMEM if there is an allocation failure.
+ */
+int
+parse_boolean_term (void *ctx, const char *str,
+ char **prefix_out, char **term_out);
+
+/* strcmp that handles NULL strings; in strcmp terms a NULL string is
+ * considered to be less than a non-NULL string.
+ */
+int strcmp_null (const char *s1, const char *s2);
+
+/* GLib GEqualFunc compatible strcasecmp wrapper */
+int strcase_equal (const void *a, const void *b);
+
+/* GLib GHashFunc compatible case insensitive hash function */
+unsigned int strcase_hash (const void *ptr);
+
+void strip_trailing (char *str, char ch);
+
+const char *skip_space (const char *str);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/talloc-extra.c b/util/talloc-extra.c
new file mode 100644
index 00000000..96262470
--- /dev/null
+++ b/util/talloc-extra.c
@@ -0,0 +1,14 @@
+#include <string.h>
+#include "talloc-extra.h"
+
+char *
+talloc_strndup_named_const (void *ctx, const char *str,
+ size_t len, const char *name)
+{
+ char *ptr = talloc_strndup (ctx, str, len);
+
+ if (ptr)
+ talloc_set_name_const (ptr, name);
+
+ return ptr;
+}
diff --git a/util/talloc-extra.h b/util/talloc-extra.h
new file mode 100644
index 00000000..e2e61734
--- /dev/null
+++ b/util/talloc-extra.h
@@ -0,0 +1,26 @@
+#ifndef _TALLOC_EXTRA_H
+#define _TALLOC_EXTRA_H
+
+#include <talloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Like talloc_strndup, but take an extra parameter for the internal talloc
+ * name (for debugging) */
+
+char *
+talloc_strndup_named_const (void *ctx, const char *str,
+ size_t len, const char *name);
+
+/* use the __location__ macro from talloc.h to name a string according to its
+ * source location */
+
+#define talloc_strndup_debug(ctx, str, len) talloc_strndup_named_const (ctx, str, len, __location__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/unicode-util.c b/util/unicode-util.c
new file mode 100644
index 00000000..ccb787e2
--- /dev/null
+++ b/util/unicode-util.c
@@ -0,0 +1,43 @@
+#include "unicode-util.h"
+
+/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to
+ * link directly to libxapian.
+ */
+
+static bool
+unicode_is_wordchar (notmuch_unichar ch)
+{
+ switch (g_unichar_type (ch)) {
+ case G_UNICODE_UPPERCASE_LETTER:
+ case G_UNICODE_LOWERCASE_LETTER:
+ case G_UNICODE_TITLECASE_LETTER:
+ case G_UNICODE_MODIFIER_LETTER:
+ case G_UNICODE_OTHER_LETTER:
+ case G_UNICODE_NON_SPACING_MARK:
+ case G_UNICODE_ENCLOSING_MARK:
+ case G_UNICODE_SPACING_MARK:
+ case G_UNICODE_DECIMAL_NUMBER:
+ case G_UNICODE_LETTER_NUMBER:
+ case G_UNICODE_OTHER_NUMBER:
+ case G_UNICODE_CONNECT_PUNCTUATION:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+unicode_word_utf8 (const char *utf8_str)
+{
+ gunichar *decoded = g_utf8_to_ucs4_fast (utf8_str, -1, NULL);
+ const gunichar *p = decoded;
+ bool ret;
+
+ while (*p && unicode_is_wordchar (*p))
+ p++;
+
+ ret = (*p == '\0');
+
+ g_free (decoded);
+ return ret;
+}
diff --git a/util/unicode-util.h b/util/unicode-util.h
new file mode 100644
index 00000000..1bb9336a
--- /dev/null
+++ b/util/unicode-util.h
@@ -0,0 +1,19 @@
+#ifndef UNICODE_UTIL_H
+#define UNICODE_UTIL_H
+
+#include <stdbool.h>
+#include <gmodule.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The utf8 encoded string would tokenize as a single word, according
+ * to xapian. */
+bool unicode_word_utf8 (const char *str);
+typedef gunichar notmuch_unichar;
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/util.c b/util/util.c
new file mode 100644
index 00000000..6abe2215
--- /dev/null
+++ b/util/util.c
@@ -0,0 +1,24 @@
+#include "util.h"
+#include "error_util.h"
+#include <string.h>
+#include <errno.h>
+
+const char *
+util_error_string (util_status_t errnum)
+{
+ switch (errnum) {
+ case UTIL_SUCCESS:
+ return "success";
+ case UTIL_OUT_OF_MEMORY:
+ return "out of memory";
+ case UTIL_EOF:
+ return "end of file";
+ case UTIL_ERRNO:
+ return strerror (errno);
+ case UTIL_GZERROR:
+ /* we lack context to be more informative here */
+ return "zlib error";
+ default:
+ INTERNAL_ERROR ("unexpected error status %d", errnum);
+ }
+}
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 00000000..b24860af
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,29 @@
+#ifndef _UTIL_H
+#define _UTIL_H
+
+typedef enum util_status {
+ /**
+ * No error occurred.
+ */
+ UTIL_SUCCESS = 0,
+ /**
+ * Out of memory.
+ */
+ UTIL_OUT_OF_MEMORY,
+ /**
+ * End of stream reached while attempting to read.
+ */
+ UTIL_EOF,
+ /**
+ * Low level error occurred, consult errno.
+ */
+ UTIL_ERRNO,
+ /**
+ * Zlib error occurred, call gzerror for details.
+ */
+ UTIL_GZERROR
+} util_status_t;
+
+const char *
+util_error_string (util_status_t status);
+#endif
diff --git a/util/xapian-extra.h b/util/xapian-extra.h
new file mode 100644
index 00000000..39c7f48f
--- /dev/null
+++ b/util/xapian-extra.h
@@ -0,0 +1,15 @@
+#ifndef _XAPIAN_EXTRA_H
+#define _XAPIAN_EXTRA_H
+
+#include <string>
+#include <xapian.h>
+
+inline Xapian::Query
+xapian_query_match_all (void)
+{
+ // Xapian::Query::MatchAll isn't thread safe (a static object with reference
+ // counting) so instead reconstruct the equivalent on demand.
+ return Xapian::Query (std::string ());
+}
+
+#endif
diff --git a/util/xutil.c b/util/xutil.c
new file mode 100644
index 00000000..07a00343
--- /dev/null
+++ b/util/xutil.c
@@ -0,0 +1,139 @@
+/* xutil.c - Various wrapper functions to abort on error.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "xutil.h"
+#include "error_util.h"
+
+void *
+xcalloc (size_t nmemb, size_t size)
+{
+ void *ret;
+
+ ret = calloc (nmemb, size);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+
+ return ret;
+}
+
+void *
+xmalloc (size_t size)
+{
+ void *ret;
+
+ ret = malloc (size);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+
+ return ret;
+}
+
+void *
+xrealloc (void *ptr, size_t size)
+{
+ void *ret;
+
+ ret = realloc (ptr, size);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+
+ return ret;
+}
+
+char *
+xstrdup (const char *s)
+{
+ char *ret;
+
+ ret = strdup (s);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+
+ return ret;
+}
+
+char *
+xstrndup (const char *s, size_t n)
+{
+ char *ret;
+
+ if (strlen (s) <= n)
+ n = strlen (s);
+
+ ret = malloc (n + 1);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+ memcpy (ret, s, n);
+ ret[n] = '\0';
+
+ return ret;
+}
+
+int
+xregcomp (regex_t *preg, const char *regex, int cflags)
+{
+ int rerr;
+
+ rerr = regcomp (preg, regex, cflags);
+ if (rerr) {
+ size_t error_size = regerror (rerr, preg, NULL, 0);
+ char *error = xmalloc (error_size);
+
+ regerror (rerr, preg, error, error_size);
+ fprintf (stderr, "compiling regex %s: %s\n",
+ regex, error);
+ free (error);
+ return 1;
+ }
+ return 0;
+}
+
+int
+xregexec (const regex_t *preg, const char *string,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ unsigned int i;
+ int rerr;
+
+ rerr = regexec (preg, string, nmatch, pmatch, eflags);
+ if (rerr)
+ return rerr;
+
+ for (i = 0; i < nmatch; i++) {
+ if (pmatch[i].rm_so == -1)
+ INTERNAL_ERROR ("matching regex against %s: Sub-match %d not found\n",
+ string, i);
+ }
+
+ return 0;
+}
diff --git a/util/xutil.h b/util/xutil.h
new file mode 100644
index 00000000..e2707000
--- /dev/null
+++ b/util/xutil.h
@@ -0,0 +1,60 @@
+/* xutil.h - Various wrapper functions to abort on error.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#ifndef NOTMUCH_XUTIL_H
+#define NOTMUCH_XUTIL_H
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* xutil.c */
+void *
+xcalloc (size_t nmemb, size_t size);
+
+void *
+xmalloc (size_t size);
+
+void *
+xrealloc (void *ptrr, size_t size);
+
+char *
+xstrdup (const char *s);
+
+char *
+xstrndup (const char *s, size_t n);
+
+/* Returns 0 for successful compilation, 1 otherwise */
+int
+xregcomp (regex_t *preg, const char *regex, int cflags);
+
+int
+xregexec (const regex_t *preg, const char *string,
+ size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
new file mode 100644
index 00000000..1f5f9dbe
--- /dev/null
+++ b/util/zlib-extra.c
@@ -0,0 +1,95 @@
+/* zlib-extra.c - Extra or enhanced routines for compressed I/O.
+ *
+ * Copyright (c) 2014 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include "zlib-extra.h"
+#include <talloc.h>
+#include <stdio.h>
+#include <string.h>
+
+/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
+util_status_t
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
+{
+ char *buf = *bufptr;
+ unsigned int len;
+ size_t offset = 0;
+
+ if (buf) {
+ len = talloc_array_length (buf);
+ } else {
+ /* same as getdelim from gnulib */
+ len = 120;
+ buf = talloc_array (talloc_ctx, char, len);
+ if (buf == NULL)
+ return UTIL_OUT_OF_MEMORY;
+ }
+
+ while (1) {
+ if (! gzgets (stream, buf + offset, len - offset)) {
+ /* Null indicates EOF or error */
+ int zlib_status = 0;
+ (void) gzerror (stream, &zlib_status);
+ switch (zlib_status) {
+ case Z_STREAM_END:
+ case Z_OK:
+ /* no data read before EOF */
+ if (offset == 0)
+ return UTIL_EOF;
+ else
+ goto SUCCESS;
+ case Z_ERRNO:
+ return UTIL_ERRNO;
+ default:
+ return UTIL_GZERROR;
+ }
+ }
+
+ offset += strlen (buf + offset);
+
+ if (buf[offset - 1] == '\n')
+ goto SUCCESS;
+
+ len *= 2;
+ buf = talloc_realloc (talloc_ctx, buf, char, len);
+ if (buf == NULL)
+ return UTIL_OUT_OF_MEMORY;
+ }
+ SUCCESS:
+ *bufptr = buf;
+ *bytes_read = offset;
+ return UTIL_SUCCESS;
+}
+
+const char *
+gz_error_string (util_status_t status, gzFile file)
+{
+ if (status == UTIL_GZERROR)
+ return gzerror_str (file);
+ else
+ return util_error_string (status);
+}
+
+const char *
+gzerror_str (gzFile file)
+{
+ int dummy;
+
+ return gzerror (file, &dummy);
+}
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
new file mode 100644
index 00000000..7532339b
--- /dev/null
+++ b/util/zlib-extra.h
@@ -0,0 +1,39 @@
+#ifndef _ZLIB_EXTRA_H
+#define _ZLIB_EXTRA_H
+
+#include "util.h"
+#include <zlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Like getline, but read from a gzFile. Allocation is with talloc.
+ * Returns:
+ *
+ * UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR
+ * Consult util.h for description
+ *
+ * UTIL_EOF End of file encountered before
+ * any characters read
+ */
+util_status_t
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
+
+/* return a suitable error string based on the return status
+ * from gz_readline
+ */
+
+const char *
+gz_error_string (util_status_t status, gzFile stream);
+
+/* Call gzerror with a dummy errno argument, the docs don't promise to
+ * support the NULL case */
+const char *
+gzerror_str (gzFile file);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif