Import notmuch_0.38.2.orig.tar.xz

[dgit import orig notmuch_0.38.2.orig.tar.xz]
author: David Bremner <bremner@debian.org> 2023-12-01 07:51:09 -0400
committer: David Bremner <bremner@debian.org> 2023-12-01 07:51:09 -0400
commit: 126347b6942dd4b0291beb67b119431ebd750a2a (patch)
tree: 532c5163cb0972c8b9e6c8b4577b86afb9c6a6a2 /util
27 files changed, 2128 insertions, 0 deletions
diff --git a/util/Makefile b/util/Makefile
new file mode 100644
index 00000000..fa25832e
--- /dev/null
+++ b/util/Makefile
@@ -0,0 +1,5 @@
+all:
+	$(MAKE) -C .. all
+
+.DEFAULT:
+	$(MAKE) -C .. $@
diff --git a/util/Makefile.local b/util/Makefile.local
new file mode 100644
index 00000000..8a0b9bc3
--- /dev/null
+++ b/util/Makefile.local
@@ -0,0 +1,18 @@
+# -*- makefile-gmake -*-
+
+dir := util
+extra_cflags += -I$(srcdir)/$(dir)
+
+libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+		  $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
+		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \
+		$(dir)/repair.c $(dir)/path-util.c \
+		$(dir)/unicode-util.c
+
+libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
+
+$(dir)/libnotmuch_util.a: $(libnotmuch_util_modules)
+	$(call quiet,AR) rcs $@ $^
+
+SRCS := $(SRCS) $(libnotmuch_util_c_srcs)
+CLEAN := $(CLEAN) $(libnotmuch_util_modules) $(dir)/libnotmuch_util.a
diff --git a/util/crypto.c b/util/crypto.c
new file mode 100644
index 00000000..156a6550
--- /dev/null
+++ b/util/crypto.c
@@ -0,0 +1,245 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2012 Jameson Rollins
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Authors: Jameson Rollins <jrollins@finestructure.net>
+ */
+
+#include "crypto.h"
+#include <strings.h>
+#include "error_util.h"
+#define unused(x) x __attribute__ ((unused))
+
+#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
+
+void
+_notmuch_crypto_cleanup (unused(_notmuch_crypto_t *crypto))
+{
+}
+
+GMimeObject *
+_notmuch_crypto_decrypt (bool *attempted,
+			 notmuch_decryption_policy_t decrypt,
+			 notmuch_message_t *message,
+			 GMimeObject *part,
+			 GMimeDecryptResult **decrypt_result,
+			 GError **err)
+{
+    GMimeObject *ret = NULL;
+
+    if (decrypt == NOTMUCH_DECRYPT_FALSE)
+	return NULL;
+
+    /* try decryption with session key if one is stashed */
+    if (message) {
+	notmuch_message_properties_t *list = NULL;
+
+	for (list = notmuch_message_get_properties (message, "session-key", TRUE);
+	     notmuch_message_properties_valid (list); notmuch_message_properties_move_to_next (
+		 list)) {
+	    if (err && *err) {
+		g_error_free (*err);
+		*err = NULL;
+	    }
+	    if (attempted)
+		*attempted = true;
+	    if (GMIME_IS_MULTIPART_ENCRYPTED (part)) {
+		ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part),
+							  GMIME_DECRYPT_NONE,
+							  notmuch_message_properties_value (list),
+							  decrypt_result, err);
+	    } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) {
+		GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part);
+		GMimeSecureMimeType type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7);
+		if (type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) {
+		    ret = g_mime_application_pkcs7_mime_decrypt (pkcs7,
+								 GMIME_DECRYPT_NONE,
+								 notmuch_message_properties_value (
+								     list),
+								 decrypt_result, err);
+		}
+	    }
+	    if (ret)
+		break;
+	}
+	if (list)
+	    notmuch_message_properties_destroy (list);
+	if (ret)
+	    return ret;
+    }
+
+    if (err && *err) {
+	g_error_free (*err);
+	*err = NULL;
+    }
+
+    if (decrypt == NOTMUCH_DECRYPT_AUTO)
+	return ret;
+
+    if (attempted)
+	*attempted = true;
+    GMimeDecryptFlags flags = GMIME_DECRYPT_NONE;
+
+    if (decrypt == NOTMUCH_DECRYPT_TRUE && decrypt_result)
+	flags |= GMIME_DECRYPT_EXPORT_SESSION_KEY;
+    if (GMIME_IS_MULTIPART_ENCRYPTED (part)) {
+	ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part), flags, NULL,
+						  decrypt_result, err);
+    } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) {
+	GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part);
+	GMimeSecureMimeType p7type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7);
+	if (p7type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) {
+	    ret = g_mime_application_pkcs7_mime_decrypt (pkcs7, flags, NULL,
+							 decrypt_result, err);
+	}
+    }
+    return ret;
+}
+
+static int
+_notmuch_message_crypto_destructor (_notmuch_message_crypto_t *msg_crypto)
+{
+    if (! msg_crypto)
+	return 0;
+    if (msg_crypto->sig_list)
+	g_object_unref (msg_crypto->sig_list);
+    if (msg_crypto->payload_subject)
+	talloc_free (msg_crypto->payload_subject);
+    return 0;
+}
+
+_notmuch_message_crypto_t *
+_notmuch_message_crypto_new (void *ctx)
+{
+    _notmuch_message_crypto_t *ret = talloc_zero (ctx, _notmuch_message_crypto_t);
+
+    talloc_set_destructor (ret, _notmuch_message_crypto_destructor);
+    return ret;
+}
+
+notmuch_status_t
+_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto,
+					    GMimeSignatureList *sigs)
+{
+    if (! msg_crypto)
+	return NOTMUCH_STATUS_NULL_POINTER;
+
+    /* Signatures that arrive after a payload part during DFS are not
+     * part of the cryptographic envelope: */
+    if (msg_crypto->payload_encountered)
+	return NOTMUCH_STATUS_SUCCESS;
+
+    if (msg_crypto->sig_list)
+	g_object_unref (msg_crypto->sig_list);
+
+    /* This signature list needs to persist as long as the _n_m_crypto
+     * object survives. Increasing its reference counter prevents
+     * garbage-collection until after _n_m_crypto_destroy is
+     * called. */
+    msg_crypto->sig_list = sigs;
+    if (sigs)
+	g_object_ref (sigs);
+
+    if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_FULL)
+	msg_crypto->signature_encrypted = true;
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
+
+bool
+_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part,
+					   GMimeObject *parent, int childnum)
+{
+    const char *protected_headers = NULL;
+    const char *forwarded = NULL;
+    const char *subject = NULL;
+
+    if ((! msg_crypto) || (! part))
+	INTERNAL_ERROR ("_notmuch_message_crypto_potential_payload() got NULL for %s\n",
+			msg_crypto? "part" : "msg_crypto");
+
+    /* only fire on the first payload part encountered */
+    if (msg_crypto->payload_encountered)
+	return false;
+
+    /* the first child of multipart/encrypted that matches the
+     * encryption protocol should be "control information" metadata,
+     * not payload.  So we skip it. (see
+     * https://tools.ietf.org/html/rfc1847#page-8) */
+    if (parent && GMIME_IS_MULTIPART_ENCRYPTED (parent) && childnum ==
+	GMIME_MULTIPART_ENCRYPTED_VERSION) {
+	const char *enc_type = g_mime_object_get_content_type_parameter (parent, "protocol");
+	GMimeContentType *ct = g_mime_object_get_content_type (part);
+	if (ct && enc_type) {
+	    const char *part_type = g_mime_content_type_get_mime_type (ct);
+	    if (part_type && strcmp (part_type, enc_type) == 0)
+		return false;
+	}
+    }
+
+    msg_crypto->payload_encountered = true;
+
+    /* don't bother recording anything if there is no cryptographic
+     * envelope: */
+    if ((msg_crypto->decryption_status != NOTMUCH_MESSAGE_DECRYPTED_FULL) &&
+	(msg_crypto->sig_list == NULL))
+	return false;
+
+    /* Verify that this payload has headers that are intended to be
+     * exported to the larger message: */
+
+    /* Consider a payload that uses Alexei Melinkov's forwarded="no" for
+     * message/global or message/rfc822:
+     * https://tools.ietf.org/html/draft-melnikov-smime-header-signing-05#section-4 */
+    forwarded = g_mime_object_get_content_type_parameter (part, "forwarded");
+    if (GMIME_IS_MESSAGE_PART (part) && forwarded && strcmp (forwarded, "no") == 0) {
+	GMimeMessage *message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part));
+	subject = g_mime_message_get_subject (message);
+	/* FIXME: handle more than just Subject: at some point */
+    } else {
+	/* Consider "memoryhole"-style protected headers as practiced by Enigmail and K-9 */
+	protected_headers = g_mime_object_get_content_type_parameter (part, "protected-headers");
+	if (protected_headers && strcasecmp ("v1", protected_headers) == 0)
+	    subject = g_mime_object_get_header (part, "Subject");
+	/* FIXME: handle more than just Subject: at some point */
+    }
+
+    if (subject) {
+	if (msg_crypto->payload_subject)
+	    talloc_free (msg_crypto->payload_subject);
+	msg_crypto->payload_subject = talloc_strdup (msg_crypto, subject);
+    }
+
+    return true;
+}
+
+
+notmuch_status_t
+_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto)
+{
+    if (! msg_crypto)
+	return NOTMUCH_STATUS_NULL_POINTER;
+
+    /* see the rationale for different values of
+     * _notmuch_message_decryption_status_t in util/crypto.h */
+    if (! msg_crypto->payload_encountered)
+	msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_FULL;
+    else if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_NONE)
+	msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_PARTIAL;
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/util/crypto.h b/util/crypto.h
new file mode 100644
index 00000000..3c5d384b
--- /dev/null
+++ b/util/crypto.h
@@ -0,0 +1,106 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include <stdbool.h>
+#include "gmime-extra.h"
+#include "notmuch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _notmuch_crypto {
+    bool verify;
+    notmuch_decryption_policy_t decrypt;
+} _notmuch_crypto_t;
+
+GMimeObject *
+_notmuch_crypto_decrypt (bool *attempted,
+			 notmuch_decryption_policy_t decrypt,
+			 notmuch_message_t *message,
+			 GMimeObject *part,
+			 GMimeDecryptResult **decrypt_result,
+			 GError **err);
+
+void
+_notmuch_crypto_cleanup (_notmuch_crypto_t *crypto);
+
+/* The user probably wants to know if the entire message was in the
+ * clear.  When replying, the MUA probably wants to know whether there
+ * was any part decrypted in the message.  And when displaying to the
+ * user, we probably only want to display "encrypted message" if the
+ * entire message was covered by encryption. */
+typedef enum {
+    NOTMUCH_MESSAGE_DECRYPTED_NONE = 0,
+    NOTMUCH_MESSAGE_DECRYPTED_PARTIAL,
+    NOTMUCH_MESSAGE_DECRYPTED_FULL,
+} _notmuch_message_decryption_status_t;
+
+/* description of the cryptographic state of a given message overall;
+ * for use by simple user agents.
+ */
+typedef struct _notmuch_message_crypto {
+    /* encryption status: partial, full, none */
+    _notmuch_message_decryption_status_t decryption_status;
+    /* FIXME: can we show what key(s) a fully-encrypted message was
+     * encrypted to? This data is not necessarily cryptographically
+     * reliable; even when we decrypt, we might not know which public
+     * key was used (e.g. if we're using a session key). */
+
+    /* signature status of the whole message (either the whole message
+     * is signed, or it is not) -- this means that partially-signed
+     * messages will get no signature status. */
+    GMimeSignatureList *sig_list;
+    /* if part of the message was signed, and the MUA is clever, it
+     * can determine on its own exactly which part and try to make
+     * more sense of it. */
+
+    /* mark this flag once we encounter a payload (i.e. something that
+     * is not part of the cryptographic envelope) */
+    bool payload_encountered;
+
+    /* the value of any "Subject:" header in the cryptographic payload
+     * (the top level part within the crypto envelope), converted to
+     * UTF-8 */
+    char *payload_subject;
+
+    /* if both signed and encrypted, was the signature encrypted? */
+    bool signature_encrypted;
+} _notmuch_message_crypto_t;
+
+
+/* _notmuch_message_crypto_t objects should be released with
+ * talloc_free (), or they will be released along with their parent
+ * context.
+ */
+_notmuch_message_crypto_t *
+_notmuch_message_crypto_new (void *ctx);
+
+/* call potential_sig_list during a depth-first-search on a message to
+ * consider a particular signature as relevant for the message.
+ */
+notmuch_status_t
+_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto,
+					    GMimeSignatureList *sigs);
+
+/* call successful_decryption during a depth-first-search on a message
+ * to indicate that a part was successfully decrypted.
+ */
+notmuch_status_t
+_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto);
+
+/* call potential_payload during a depth-first-search on a message
+ * when encountering a message part that is not part of the envelope.
+ *
+ * Returns true if part is the root of the cryptographic payload of
+ * this message.
+ */
+bool
+_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part,
+					   GMimeObject *parent, int childnum);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/error_util.c b/util/error_util.c
new file mode 100644
index 00000000..e64162c7
--- /dev/null
+++ b/util/error_util.c
@@ -0,0 +1,40 @@
+/* error_util.c - internal error utilities for notmuch.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "error_util.h"
+
+void
+_internal_error (const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+
+    fprintf (stderr, "Internal error: ");
+    vfprintf (stderr, format, va_args);
+
+    va_end (va_args);
+    exit (1);
+}
+
diff --git a/util/error_util.h b/util/error_util.h
new file mode 100644
index 00000000..a51f001f
--- /dev/null
+++ b/util/error_util.h
@@ -0,0 +1,54 @@
+/* error_util.h - Provide the INTERNAL_ERROR macro
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#ifndef ERROR_UTIL_H
+#define ERROR_UTIL_H
+
+#include <talloc.h>
+
+#include "function-attributes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* There's no point in continuing when we've detected that we've done
+ * something wrong internally (as opposed to the user passing in a
+ * bogus value).
+ *
+ * Note that PRINTF_ATTRIBUTE comes from talloc.h
+ */
+void
+_internal_error (const char *format, ...) PRINTF_ATTRIBUTE (1, 2) NORETURN_ATTRIBUTE;
+
+/* There's no point in continuing when we've detected that we've done
+ * something wrong internally (as opposed to the user passing in a
+ * bogus value).
+ *
+ * Note that __location__ comes from talloc.h.
+ */
+#define INTERNAL_ERROR(format, ...)                     \
+    _internal_error (format " (%s).\n",                 \
+		     ##__VA_ARGS__, __location__)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/gmime-extra.c b/util/gmime-extra.c
new file mode 100644
index 00000000..192cb078
--- /dev/null
+++ b/util/gmime-extra.c
@@ -0,0 +1,221 @@
+#include "gmime-extra.h"
+#include <string.h>
+
+static
+GMimeStream *
+_gzfile_maybe_filter (GMimeStream *file_stream)
+{
+    char buf[4];
+    int bytes_read;
+
+    if ((bytes_read = g_mime_stream_read (file_stream, buf, sizeof (buf))) < 0)
+	return NULL;
+
+    if (g_mime_stream_reset (file_stream))
+	return NULL;
+
+    /* check for gzipped input */
+    if (bytes_read >= 2 && buf[0] == 0x1f && (unsigned char) buf[1] == 0x8b) {
+	GMimeStream *gzstream;
+	GMimeFilter *gzfilter;
+
+	gzfilter = g_mime_filter_gzip_new (GMIME_FILTER_GZIP_MODE_UNZIP, 0);
+	if (! gzfilter)
+	    return NULL;
+
+	gzstream = g_mime_stream_filter_new (file_stream);
+	if (! gzstream)
+	    return NULL;
+
+	/* ignore filter id */
+	(void) g_mime_stream_filter_add ((GMimeStreamFilter *) gzstream, gzfilter);
+	g_object_unref (gzfilter);
+	g_object_unref (file_stream);
+	return gzstream;
+    } else {
+	return file_stream;
+    }
+}
+
+GMimeStream *
+g_mime_stream_gzfile_new (int fd)
+{
+    GMimeStream *file_stream;
+
+    file_stream = g_mime_stream_fs_new (fd);
+    if (! file_stream)
+	return NULL;
+
+    return _gzfile_maybe_filter (file_stream);
+}
+
+GMimeStream *
+g_mime_stream_gzfile_open (const char *filename)
+{
+    GMimeStream *file_stream;
+
+    file_stream = g_mime_stream_fs_open (filename, 0, 0, NULL);
+    if (! file_stream)
+	return NULL;
+
+    return _gzfile_maybe_filter (file_stream);
+}
+
+GMimeStream *
+g_mime_stream_stdout_new ()
+{
+    GMimeStream *stream_stdout = NULL;
+    GMimeStream *stream_buffered = NULL;
+
+    stream_stdout = g_mime_stream_pipe_new (STDOUT_FILENO);
+    if (! stream_stdout)
+	return NULL;
+
+    g_mime_stream_pipe_set_owner (GMIME_STREAM_PIPE (stream_stdout), FALSE);
+
+    stream_buffered = g_mime_stream_buffer_new (stream_stdout, GMIME_STREAM_BUFFER_BLOCK_WRITE);
+
+    g_object_unref (stream_stdout);
+
+    return stream_buffered;
+}
+
+/**
+ * copy a glib string into a talloc context, and free it.
+ */
+static char *
+g_string_talloc_strdup (void *ctx, char *g_string)
+{
+    char *new_str = talloc_strdup (ctx, g_string);
+
+    g_free (g_string);
+    return new_str;
+}
+
+const char *
+g_mime_certificate_get_valid_userid (GMimeCertificate *cert)
+{
+    /* output user id only if validity is FULL or ULTIMATE. */
+    const char *uid = g_mime_certificate_get_user_id (cert);
+
+    if (uid == NULL)
+	return uid;
+    GMimeValidity validity = g_mime_certificate_get_id_validity (cert);
+
+    if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE)
+	return uid;
+    return NULL;
+}
+
+const char *
+g_mime_certificate_get_valid_email (GMimeCertificate *cert)
+{
+    /* output e-mail address only if validity is FULL or ULTIMATE. */
+    const char *email = g_mime_certificate_get_email(cert);
+
+    if (email == NULL)
+	return email;
+    GMimeValidity validity = g_mime_certificate_get_id_validity (cert);
+
+    if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE)
+	return email;
+    return NULL;
+}
+
+const char *
+g_mime_certificate_get_fpr16 (GMimeCertificate *cert)
+{
+    const char *fpr = g_mime_certificate_get_fingerprint (cert);
+
+    if (! fpr || strlen (fpr) < 16)
+	return fpr;
+
+    return fpr + (strlen (fpr) - 16);
+}
+
+char *
+g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type)
+{
+    InternetAddressList *list = g_mime_message_get_addresses (message, type);
+
+    return internet_address_list_to_string (list, NULL, 0);
+}
+
+char *
+g_mime_message_get_date_string (void *ctx, GMimeMessage *message)
+{
+    GDateTime *parsed_date = g_mime_message_get_date (message);
+
+    if (parsed_date) {
+	char *date = g_mime_utils_header_format_date (parsed_date);
+	return g_string_talloc_strdup (ctx, date);
+    } else {
+	return talloc_strdup (ctx, "Thu, 01 Jan 1970 00:00:00 +0000");
+    }
+}
+
+InternetAddressList *
+g_mime_message_get_reply_to_list (GMimeMessage *message)
+{
+    return g_mime_message_get_reply_to (message);
+}
+
+const char *
+g_mime_message_get_from_string (GMimeMessage *message)
+{
+    return g_mime_object_get_header (GMIME_OBJECT (message), "From");
+}
+
+char *
+g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message)
+{
+    InternetAddressList *list = g_mime_message_get_reply_to (message);
+
+    return g_string_talloc_strdup (ctx, internet_address_list_to_string (list, NULL, 0));
+}
+
+void
+g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag)
+{
+    g_mime_parser_set_format (parser, flag ? GMIME_FORMAT_MBOX : GMIME_FORMAT_MESSAGE);
+}
+
+/* In GMime 3.0, status GOOD and VALID both imply something about the
+ * validity of the UIDs attached to the signing key. This forces us to
+ * use following somewhat relaxed definition of a "good" signature to
+ * preserve current notmuch semantics.
+ */
+
+gboolean
+g_mime_signature_status_good (GMimeSignatureStatus status)
+{
+    return ((status & (GMIME_SIGNATURE_STATUS_RED | GMIME_SIGNATURE_STATUS_ERROR_MASK)) == 0);
+}
+
+gboolean
+g_mime_signature_status_bad (GMimeSignatureStatus status)
+{
+    return (status & GMIME_SIGNATURE_STATUS_RED);
+}
+
+gboolean
+g_mime_signature_status_error (GMimeSignatureStatus status)
+{
+    return (status & GMIME_SIGNATURE_STATUS_ERROR_MASK);
+}
+
+gint64
+g_mime_utils_header_decode_date_unix (const char *date)
+{
+    GDateTime *parsed_date = g_mime_utils_header_decode_date (date);
+    time_t ret;
+
+    if (parsed_date) {
+	ret = g_date_time_to_unix (parsed_date);
+	g_date_time_unref (parsed_date);
+    } else {
+	ret = 0;
+    }
+
+    return ret;
+}
diff --git a/util/gmime-extra.h b/util/gmime-extra.h
new file mode 100644
index 00000000..889e91f3
--- /dev/null
+++ b/util/gmime-extra.h
@@ -0,0 +1,81 @@
+#ifndef _GMIME_EXTRA_H
+#define _GMIME_EXTRA_H
+#include <gmime/gmime.h>
+#include <talloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+GMimeStream *g_mime_stream_stdout_new (void);
+
+/* Return a GMime stream for this open file descriptor, un-gzipping if
+ * necessary */
+GMimeStream *g_mime_stream_gzfile_new (int fd);
+
+/* Return a GMime stream for this path, un-gzipping if
+ * necessary */
+GMimeStream *g_mime_stream_gzfile_open (const char *filename);
+
+/**
+ * Get last 16 hex digits of fingerprint ("keyid")
+ */
+const char *g_mime_certificate_get_fpr16 (GMimeCertificate *cert);
+/**
+ * Return the contents of the appropriate address header as a string
+ * Should be freed using g_free
+ */
+char *g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type);
+
+InternetAddressList *g_mime_message_get_addresses (GMimeMessage *message, GMimeAddressType type);
+
+/**
+ * return talloc allocated date string
+ */
+
+char *g_mime_message_get_date_string (void *ctx, GMimeMessage *message);
+
+/**
+ * glib allocated list of From: addresses
+ */
+
+InternetAddressList *g_mime_message_get_from (GMimeMessage *message);
+
+
+/**
+ * return string for From: address
+ * (owned by gmime)
+ */
+const char *g_mime_message_get_from_string (GMimeMessage *message);
+
+InternetAddressList *g_mime_message_get_reply_to_list (GMimeMessage *message);
+
+/**
+ * return talloc allocated reply-to string
+ */
+char *g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message);
+
+void g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag);
+
+gboolean g_mime_signature_status_good (GMimeSignatureStatus status);
+
+gboolean g_mime_signature_status_bad (GMimeSignatureStatus status);
+
+gboolean g_mime_signature_status_error (GMimeSignatureStatus status);
+
+gint64 g_mime_utils_header_decode_date_unix (const char *date);
+
+/**
+ * Return string for valid User ID (or NULL if no valid User ID exists)
+ */
+const char *g_mime_certificate_get_valid_userid (GMimeCertificate *cert);
+/**
+ * Return string for valid e-mail address (or NULL if no valid e-mail address exists)
+ */
+const char *g_mime_certificate_get_valid_email (GMimeCertificate *cert);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 00000000..81534a8c
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,159 @@
+/* hex-escape.c -  Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include <ctype.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const char *output_charset =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.,";
+
+static const char escape_char = '%';
+
+static int
+is_output (char c)
+{
+    return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+    if (*out_size < needed) {
+
+	if (*out == NULL)
+	    *out = talloc_size (ctx, needed);
+	else
+	    *out = talloc_realloc (ctx, *out, char, needed);
+
+	if (*out == NULL)
+	    return 0;
+
+	*out_size = needed;
+    }
+    return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+    const char *p;
+    char *q;
+
+    size_t needed = 1;  /* for the NUL */
+
+    assert (ctx); assert (in); assert (out); assert (out_size);
+
+    for (p = in; *p; p++) {
+	needed += is_output (*p) ? 1 : 3;
+    }
+
+    if (*out == NULL)
+	*out_size = 0;
+
+    if (! maybe_realloc (ctx, needed, out, out_size))
+	return HEX_OUT_OF_MEMORY;
+
+    q = *out;
+    p = in;
+
+    while (*p) {
+	if (is_output (*p)) {
+	    *q++ = *p++;
+	} else {
+	    sprintf (q, "%%%02x", (unsigned char) *p++);
+	    q += 3;
+	}
+    }
+
+    *q = '\0';
+    return HEX_SUCCESS;
+}
+
+/* Hex decode 'in' to 'out'.
+ *
+ * This must succeed for in == out to support hex_decode_inplace().
+ */
+static hex_status_t
+hex_decode_internal (const char *in, unsigned char *out)
+{
+    char buf[3];
+
+    while (*in) {
+	if (*in == escape_char) {
+	    char *endp;
+
+	    /* This also handles unexpected end-of-string. */
+	    if (! isxdigit ((unsigned char) in[1]) ||
+		! isxdigit ((unsigned char) in[2]))
+		return HEX_SYNTAX_ERROR;
+
+	    buf[0] = in[1];
+	    buf[1] = in[2];
+	    buf[2] = '\0';
+
+	    *out = strtoul (buf, &endp, 16);
+
+	    if (endp != buf + 2)
+		return HEX_SYNTAX_ERROR;
+
+	    in += 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	}
+    }
+
+    *out = '\0';
+
+    return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode_inplace (char *s)
+{
+    /* A decoded string is never longer than the encoded one, so it is
+     * safe to decode a string onto itself. */
+    return hex_decode_internal (s, (unsigned char *) s);
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+    const char *p;
+    size_t needed = 1;  /* for the NUL */
+
+    assert (ctx); assert (in); assert (out); assert (out_size);
+
+    for (p = in; *p; p++)
+	if ((p[0] == escape_char) && isxdigit (p[1]) && isxdigit (p[2]))
+	    needed -= 1;
+	else
+	    needed += 1;
+
+    if (! maybe_realloc (ctx, needed, out, out_size))
+	return HEX_OUT_OF_MEMORY;
+
+    return hex_decode_internal (in, (unsigned char *) *out);
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 00000000..83a4c6f1
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,50 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+    HEX_SUCCESS = 0,
+    HEX_SYNTAX_ERROR,
+    HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API for hex_encode() and hex_decode() is modelled on that for
+ * getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size.  This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+	    size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+	    size_t *out_size);
+
+/*
+ * Non-allocating hex decode to decode 's' in-place. The length of the
+ * result is always equal to or shorter than the length of the
+ * original.
+ */
+hex_status_t
+hex_decode_inplace (char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/path-util.c b/util/path-util.c
new file mode 100644
index 00000000..3267a967
--- /dev/null
+++ b/util/path-util.c
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define _GNU_SOURCE
+
+#include "path-util.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+
+char *
+notmuch_canonicalize_file_name (const char *path)
+{
+#if HAVE_CANONICALIZE_FILE_NAME
+    return canonicalize_file_name (path);
+#elif defined(PATH_MAX)
+    char *resolved_path =  malloc (PATH_MAX + 1);
+    if (resolved_path == NULL)
+	return NULL;
+
+    return realpath (path, resolved_path);
+#else
+#error undefined PATH_MAX _and_ missing canonicalize_file_name not supported
+#endif
+}
diff --git a/util/path-util.h b/util/path-util.h
new file mode 100644
index 00000000..ac85f696
--- /dev/null
+++ b/util/path-util.h
@@ -0,0 +1,19 @@
+/*
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef NOTMUCH_UTIL_PATH_UTIL_H_
+#define NOTMUCH_UTIL_PATH_UTIL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *
+notmuch_canonicalize_file_name (const char *path);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NOTMUCH_UTIL_PATH_UTIL_H_ */
diff --git a/util/repair.c b/util/repair.c
new file mode 100644
index 00000000..5b0dfdf4
--- /dev/null
+++ b/util/repair.c
@@ -0,0 +1,158 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2019 Daniel Kahn Gillmor
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Authors: Daniel Kahn Gillmor <dkg@fifthhorseman.net>
+ */
+
+#include <stdbool.h>
+#include "repair.h"
+
+
+static bool
+_notmuch_crypto_payload_has_legacy_display (GMimeObject *payload)
+{
+    GMimeMultipart *mpayload;
+    const char *protected_header_parameter;
+    GMimeObject *first;
+
+    if (! g_mime_content_type_is_type (g_mime_object_get_content_type (payload),
+				       "multipart", "mixed"))
+	return false;
+    protected_header_parameter = g_mime_object_get_content_type_parameter (payload,
+									   "protected-headers");
+    if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1"))
+	return false;
+    if (! GMIME_IS_MULTIPART (payload))
+	return false;
+    mpayload = GMIME_MULTIPART (payload);
+    if (mpayload == NULL)
+	return false;
+    if (g_mime_multipart_get_count (mpayload) != 2)
+	return false;
+    first = g_mime_multipart_get_part (mpayload, 0);
+    /* Early implementations that generated "Legacy Display" parts used
+     * Content-Type: text/rfc822-headers, but text/plain is more widely
+     * rendered, so it is now the standard choice.  We accept either as a
+     * Legacy Display part. */
+    if (! (g_mime_content_type_is_type (g_mime_object_get_content_type (first),
+					"text", "plain") ||
+	   g_mime_content_type_is_type (g_mime_object_get_content_type (first),
+					"text", "rfc822-headers")))
+	return false;
+    protected_header_parameter = g_mime_object_get_content_type_parameter (first,
+									   "protected-headers");
+    if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1"))
+	return false;
+    if (! GMIME_IS_TEXT_PART (first))
+	return false;
+
+    return true;
+}
+
+GMimeObject *
+_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload)
+{
+    if (_notmuch_crypto_payload_has_legacy_display (payload)) {
+	return g_mime_multipart_get_part (GMIME_MULTIPART (payload), 1);
+    } else {
+	return payload;
+    }
+}
+
+/* see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.1 */
+static bool
+_notmuch_is_mixed_up_mangled (GMimeObject *part)
+{
+    GMimeMultipart *mpart = NULL;
+    GMimeObject *parts[3] = { NULL, NULL, NULL };
+    GMimeContentType *type = NULL;
+    char *prelude_string = NULL;
+    bool prelude_is_empty;
+
+    if (part == NULL)
+	return false;
+    type = g_mime_object_get_content_type (part);
+    if (type == NULL)
+	return false;
+    if (! g_mime_content_type_is_type (type, "multipart", "mixed"))
+	return false;
+    if (! GMIME_IS_MULTIPART (part)) /* probably impossible */
+	return false;
+    mpart = GMIME_MULTIPART (part);
+    if (mpart == NULL)
+	return false;
+    if (g_mime_multipart_get_count (mpart) != 3)
+	return false;
+    parts[0] = g_mime_multipart_get_part (mpart, 0);
+    if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[0]),
+				       "text", "plain"))
+	return false;
+    if (! GMIME_IS_TEXT_PART (parts[0]))
+	return false;
+    parts[1] = g_mime_multipart_get_part (mpart, 1);
+    if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[1]),
+				       "application", "pgp-encrypted"))
+	return false;
+    parts[2] = g_mime_multipart_get_part (mpart, 2);
+    if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[2]),
+				       "application", "octet-stream"))
+	return false;
+
+    /* Is parts[0] length 0? */
+    prelude_string = g_mime_text_part_get_text (GMIME_TEXT_PART (parts[0]));
+    prelude_is_empty = (prelude_string[0] == '\0');
+    g_free (prelude_string);
+    if (! prelude_is_empty)
+	return false;
+
+    /* FIXME: after decoding and stripping whitespace, is parts[1]
+     * subpart just "Version: 1" ? */
+
+    /* FIXME: can we determine that parts[2] subpart is *only* PGP
+     * encrypted data?  I tried g_mime_part_get_openpgp_data () but
+     * found https://github.com/jstedfast/gmime/issues/60 */
+
+    return true;
+}
+
+
+/* see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.2 */
+GMimeObject *
+_notmuch_repair_mixed_up_mangled (GMimeObject *part)
+{
+    GMimeMultipart *mpart = NULL, *mpart_ret = NULL;
+    GMimeObject *ret = NULL;
+
+    if (! _notmuch_is_mixed_up_mangled (part))
+	return NULL;
+    mpart = GMIME_MULTIPART (part);
+    ret = GMIME_OBJECT (g_mime_multipart_encrypted_new ());
+    if (ret == NULL)
+	return NULL;
+    mpart_ret = GMIME_MULTIPART (ret);
+    if (mpart_ret == NULL) {
+	g_object_unref (ret);
+	return NULL;
+    }
+    g_mime_object_set_content_type_parameter (ret, "protocol", "application/pgp-encrypted");
+
+    g_mime_multipart_insert (mpart_ret, 0, g_mime_multipart_get_part (mpart, 1));
+    g_mime_multipart_insert (mpart_ret, 1, g_mime_multipart_get_part (mpart, 2));
+    return ret;
+}
diff --git a/util/repair.h b/util/repair.h
new file mode 100644
index 00000000..492f5a20
--- /dev/null
+++ b/util/repair.h
@@ -0,0 +1,44 @@
+#ifndef _REPAIR_H
+#define _REPAIR_H
+
+#include "gmime-extra.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This is a collection of message structure and message format repair
+ * techniques that are designed to improve the user experience of
+ * notmuch */
+
+/* If payload is a cryptographic payload within an encrypted message, and
+ * it has a "legacy display" part, then we can skip over it and jump
+ * to the actual content, because notmuch already handles protected
+ * headers appropriately.
+ *
+ * This function either returns payload directly (if it does not have
+ * a "legacy display" part), or it returns a pointer to its
+ * content-bearing subpart, with the "legacy display" part and the
+ * surrounding multipart/mixed object bypassed.
+ *
+ * No new objects are created by calling this function, and the
+ * returned object will only be released when the original part is
+ * disposed of.
+ */
+
+GMimeObject *
+_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload);
+
+/* Detecting and repairing "Mixed-Up MIME mangling". see
+ * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1
+ * If this returns NULL, the message was probably not "Mixed up".  If
+ * it returns non-NULL, then there is a newly-allocated MIME part that
+ * represents the repaired version.  The caller is responsible for
+ * ensuring that any returned object is freed with g_object_unref. */
+GMimeObject *
+_notmuch_repair_mixed_up_mangled (GMimeObject *part);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/string-util.c b/util/string-util.c
new file mode 100644
index 00000000..03d7648d
--- /dev/null
+++ b/util/string-util.c
@@ -0,0 +1,298 @@
+/* string-util.c -  Extra or enhanced routines for null terminated strings.
+ *
+ * Copyright (c) 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Jani Nikula <jani@nikula.org>
+ */
+
+
+#include "string-util.h"
+#include "talloc.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+
+char *
+strtok_len (char *s, const char *delim, size_t *len)
+{
+    /* skip initial delims */
+    s += strspn (s, delim);
+
+    /* length of token */
+    *len = strcspn (s, delim);
+
+    return *len ? s : NULL;
+}
+
+const char *
+strsplit_len (const char *s, char delim, size_t *len)
+{
+    bool escaping = false;
+    size_t count = 0, last_nonspace = 0;
+
+    /* Skip initial unescaped delimiters and whitespace */
+    while (*s && (*s == delim || isspace (*s)))
+	s++;
+
+    while (s[count] && (escaping || s[count] != delim)) {
+	if (! isspace (s[count]))
+	    last_nonspace = count;
+	escaping = (s[count] == '\\');
+	count++;
+    }
+
+    if (count == 0)
+	return NULL;
+
+    *len = last_nonspace + 1;
+    return s;
+}
+
+const char *
+strtok_len_c (const char *s, const char *delim, size_t *len)
+{
+    /* strtok_len is already const-safe, but we can't express both
+     * versions in the C type system. */
+    return strtok_len ((char *) s, delim, len);
+}
+
+char *
+sanitize_string (const void *ctx, const char *str)
+{
+    char *out, *loop;
+
+    if (! str)
+	return NULL;
+
+    out = talloc_strdup (ctx, str);
+    if (! out)
+	return NULL;
+
+    for (loop = out; *loop; loop++) {
+	if (*loop == '\t' || *loop == '\n')
+	    *loop = ' ';
+	else if ((unsigned char) (*loop) < 32)
+	    *loop = '?';
+    }
+
+    return out;
+}
+
+static int
+is_unquoted_terminator (unsigned char c)
+{
+    return c == 0 || c <= ' ' || c == ')';
+}
+
+int
+make_boolean_term (void *ctx, const char *prefix, const char *term,
+		   char **buf, size_t *len)
+{
+    const char *in;
+    char *out;
+    size_t needed = 3;
+    int need_quoting = 0;
+
+    /* Do we need quoting?  To be paranoid, we quote anything
+     * containing a quote or '(', even though these only matter at the
+     * beginning, and anything containing non-ASCII text. */
+    if (! term[0])
+	need_quoting = 1;
+    for (in = term; *in && ! need_quoting; in++)
+	if (is_unquoted_terminator (*in) || *in == '"' || *in == '('
+	    || (unsigned char) *in > 127)
+	    need_quoting = 1;
+
+    if (need_quoting)
+	for (in = term; *in; in++)
+	    needed += (*in == '"') ? 2 : 1;
+    else
+	needed = strlen (term) + 1;
+
+    /* Reserve space for the prefix */
+    if (prefix)
+	needed += strlen (prefix) + 1;
+
+    if ((*buf == NULL) || (needed > *len)) {
+	*len = 2 * needed;
+	*buf = talloc_realloc (ctx, *buf, char, *len);
+    }
+
+    if (! *buf) {
+	errno = ENOMEM;
+	return -1;
+    }
+
+    out = *buf;
+
+    /* Copy in the prefix */
+    if (prefix) {
+	strcpy (out, prefix);
+	out += strlen (prefix);
+	*out++ = ':';
+    }
+
+    if (! need_quoting) {
+	strcpy (out, term);
+	return 0;
+    }
+
+    /* Quote term by enclosing it in double quotes and doubling any
+     * internal double quotes. */
+    *out++ = '"';
+    in = term;
+    while (*in) {
+	if (*in == '"')
+	    *out++ = '"';
+	*out++ = *in++;
+    }
+    *out++ = '"';
+    *out = '\0';
+
+    return 0;
+}
+
+const char *
+skip_space (const char *str)
+{
+    while (*str && isspace ((unsigned char) *str))
+	++str;
+    return str;
+}
+
+int
+parse_boolean_term (void *ctx, const char *str,
+		    char **prefix_out, char **term_out)
+{
+    int err = EINVAL;
+
+    *prefix_out = *term_out = NULL;
+
+    /* Parse prefix */
+    str = skip_space (str);
+    const char *pos = strchr (str, ':');
+
+    if (! pos || pos == str)
+	goto FAIL;
+    *prefix_out = talloc_strndup (ctx, str, pos - str);
+    if (! *prefix_out) {
+	err = ENOMEM;
+	goto FAIL;
+    }
+    ++pos;
+
+    /* Implement de-quoting compatible with make_boolean_term. */
+    if (*pos == '"') {
+	char *out = talloc_array (ctx, char, strlen (pos));
+	int closed = 0;
+	if (! out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
+	*term_out = out;
+	/* Skip the opening quote, find the closing quote, and
+	 * un-double doubled internal quotes. */
+	for (++pos; *pos; ) {
+	    if (*pos == '"') {
+		++pos;
+		if (*pos != '"') {
+		    /* Found the closing quote. */
+		    closed = 1;
+		    pos = skip_space (pos);
+		    break;
+		}
+	    }
+	    *out++ = *pos++;
+	}
+	/* Did the term terminate without a closing quote or is there
+	 * trailing text after the closing quote? */
+	if (! closed || *pos)
+	    goto FAIL;
+	*out = '\0';
+    } else {
+	const char *start = pos;
+	/* Check for text after the boolean term. */
+	while (! is_unquoted_terminator (*pos))
+	    ++pos;
+	if (*skip_space (pos)) {
+	    err = EINVAL;
+	    goto FAIL;
+	}
+	/* No trailing text; dup the string so the caller can free
+	 * it. */
+	*term_out = talloc_strndup (ctx, start, pos - start);
+	if (! *term_out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
+    }
+    return 0;
+
+  FAIL:
+    talloc_free (*prefix_out);
+    talloc_free (*term_out);
+    errno = err;
+    return -1;
+}
+
+int
+strcmp_null (const char *s1, const char *s2)
+{
+    if (s1 && s2)
+	return strcmp (s1, s2);
+    else if (! s1 && ! s2)
+	return 0;
+    else if (s1)
+	return 1;       /* s1 (non-NULL) is greater than s2 (NULL) */
+    else
+	return -1;      /* s1 (NULL) is less than s2 (non-NULL) */
+}
+
+int
+strcase_equal (const void *a, const void *b)
+{
+    return strcasecmp (a, b) == 0;
+}
+
+unsigned int
+strcase_hash (const void *ptr)
+{
+    const char *s = ptr;
+
+    /* This is the djb2 hash. */
+    unsigned int hash = 5381;
+
+    while (s && *s) {
+	hash = ((hash << 5) + hash) + tolower (*s);
+	s++;
+    }
+
+    return hash;
+}
+
+void
+strip_trailing (char *str, char ch)
+{
+    int i;
+
+    for (i = strlen (str) - 1; i >= 0; i--) {
+	if (str[i] == ch)
+	    str[i] = '\0';
+	else
+	    break;
+    }
+}
diff --git a/util/string-util.h b/util/string-util.h
new file mode 100644
index 00000000..80647c5f
--- /dev/null
+++ b/util/string-util.h
@@ -0,0 +1,100 @@
+#ifndef _STRING_UTIL_H
+#define _STRING_UTIL_H
+
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* like strtok(3), but without state, and doesn't modify s.  Return
+ * value is indicated by pointer and length, not null terminator.
+ *
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " \t";
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ *     // do stuff with string tok of length tok_len
+ * }
+ */
+
+char *strtok_len (char *s, const char *delim, size_t *len);
+
+/* Const version of strtok_len. */
+const char *strtok_len_c (const char *s, const char *delim, size_t *len);
+
+/* Simplified version of strtok_len, with a single delimiter.
+ * Handles escaping delimiters with \
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = ';';
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strsplit_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ *     // do stuff with string tok of length tok_len
+ * }
+ */
+const char *strsplit_len (const char *s, char delim, size_t *len);
+
+/* Return a talloced string with str sanitized.
+ *
+ * Whitespace characters (tabs and newlines) are replaced with spaces,
+ * non-printable characters with question marks.
+ */
+char *sanitize_string (const void *ctx, const char *str);
+
+/* Construct a boolean term query with the specified prefix (e.g.,
+ * "id") and search term, quoting term as necessary.  Specifically, if
+ * term contains any non-printable ASCII characters, non-ASCII
+ * characters, close parenthesis or double quotes, it will be enclosed
+ * in double quotes and any internal double quotes will be doubled
+ * (e.g. a"b -> "a""b").  The result will be a valid notmuch query and
+ * can be parsed by parse_boolean_term.
+ *
+ * Output is into buf; it may be talloc_realloced.
+ * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if
+ * there is an allocation failure.
+ */
+int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
+		       char **buf, size_t *len);
+
+/* Parse a boolean term query consisting of a prefix, a colon, and a
+ * term that may be quoted as described for make_boolean_term.  If the
+ * term is not quoted, then it ends at the first whitespace or close
+ * parenthesis.  str may containing leading or trailing whitespace,
+ * but anything else is considered a parse error.  This is compatible
+ * with anything produced by make_boolean_term, and supports a subset
+ * of the quoting styles supported by Xapian (and hence notmuch).
+ * *prefix_out and *term_out will be talloc'd with context ctx.
+ *
+ * Return: 0 on success, -1 on error.  errno will be set to EINVAL if
+ * there is a parse error or ENOMEM if there is an allocation failure.
+ */
+int
+parse_boolean_term (void *ctx, const char *str,
+		    char **prefix_out, char **term_out);
+
+/* strcmp that handles NULL strings; in strcmp terms a NULL string is
+ * considered to be less than a non-NULL string.
+ */
+int strcmp_null (const char *s1, const char *s2);
+
+/* GLib GEqualFunc compatible strcasecmp wrapper */
+int strcase_equal (const void *a, const void *b);
+
+/* GLib GHashFunc compatible case insensitive hash function */
+unsigned int strcase_hash (const void *ptr);
+
+void strip_trailing (char *str, char ch);
+
+const char *skip_space (const char *str);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/talloc-extra.c b/util/talloc-extra.c
new file mode 100644
index 00000000..96262470
--- /dev/null
+++ b/util/talloc-extra.c
@@ -0,0 +1,14 @@
+#include <string.h>
+#include "talloc-extra.h"
+
+char *
+talloc_strndup_named_const (void *ctx, const char *str,
+			    size_t len, const char *name)
+{
+    char *ptr = talloc_strndup (ctx, str, len);
+
+    if (ptr)
+	talloc_set_name_const (ptr, name);
+
+    return ptr;
+}
diff --git a/util/talloc-extra.h b/util/talloc-extra.h
new file mode 100644
index 00000000..e2e61734
--- /dev/null
+++ b/util/talloc-extra.h
@@ -0,0 +1,26 @@
+#ifndef _TALLOC_EXTRA_H
+#define _TALLOC_EXTRA_H
+
+#include <talloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Like talloc_strndup, but take an extra parameter for the internal talloc
+ * name (for debugging) */
+
+char *
+talloc_strndup_named_const (void *ctx, const char *str,
+			    size_t len, const char *name);
+
+/* use the __location__ macro from talloc.h to name a string according to its
+ * source location */
+
+#define talloc_strndup_debug(ctx, str, len) talloc_strndup_named_const (ctx, str, len, __location__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/unicode-util.c b/util/unicode-util.c
new file mode 100644
index 00000000..ccb787e2
--- /dev/null
+++ b/util/unicode-util.c
@@ -0,0 +1,43 @@
+#include "unicode-util.h"
+
+/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to
+ * link directly to libxapian.
+ */
+
+static bool
+unicode_is_wordchar (notmuch_unichar ch)
+{
+    switch (g_unichar_type (ch)) {
+    case G_UNICODE_UPPERCASE_LETTER:
+    case G_UNICODE_LOWERCASE_LETTER:
+    case G_UNICODE_TITLECASE_LETTER:
+    case G_UNICODE_MODIFIER_LETTER:
+    case G_UNICODE_OTHER_LETTER:
+    case G_UNICODE_NON_SPACING_MARK:
+    case G_UNICODE_ENCLOSING_MARK:
+    case G_UNICODE_SPACING_MARK:
+    case G_UNICODE_DECIMAL_NUMBER:
+    case G_UNICODE_LETTER_NUMBER:
+    case G_UNICODE_OTHER_NUMBER:
+    case G_UNICODE_CONNECT_PUNCTUATION:
+	return true;
+    default:
+	return false;
+    }
+}
+
+bool
+unicode_word_utf8 (const char *utf8_str)
+{
+    gunichar *decoded = g_utf8_to_ucs4_fast (utf8_str, -1, NULL);
+    const gunichar *p = decoded;
+    bool ret;
+
+    while (*p && unicode_is_wordchar (*p))
+	p++;
+
+    ret =  (*p == '\0');
+
+    g_free (decoded);
+    return ret;
+}
diff --git a/util/unicode-util.h b/util/unicode-util.h
new file mode 100644
index 00000000..1bb9336a
--- /dev/null
+++ b/util/unicode-util.h
@@ -0,0 +1,19 @@
+#ifndef UNICODE_UTIL_H
+#define UNICODE_UTIL_H
+
+#include <stdbool.h>
+#include <gmodule.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The utf8 encoded string would tokenize as a single word, according
+ * to xapian. */
+bool unicode_word_utf8 (const char *str);
+typedef gunichar notmuch_unichar;
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/util.c b/util/util.c
new file mode 100644
index 00000000..6abe2215
--- /dev/null
+++ b/util/util.c
@@ -0,0 +1,24 @@
+#include "util.h"
+#include "error_util.h"
+#include <string.h>
+#include <errno.h>
+
+const char *
+util_error_string (util_status_t errnum)
+{
+    switch (errnum) {
+    case UTIL_SUCCESS:
+	return "success";
+    case UTIL_OUT_OF_MEMORY:
+	return "out of memory";
+    case UTIL_EOF:
+	return "end of file";
+    case UTIL_ERRNO:
+	return strerror (errno);
+    case UTIL_GZERROR:
+	/* we lack context to be more informative here */
+	return "zlib error";
+    default:
+	INTERNAL_ERROR ("unexpected error status %d", errnum);
+    }
+}
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 00000000..b24860af
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,29 @@
+#ifndef _UTIL_H
+#define _UTIL_H
+
+typedef enum util_status {
+    /**
+     * No error occurred.
+     */
+    UTIL_SUCCESS = 0,
+    /**
+     * Out of memory.
+     */
+    UTIL_OUT_OF_MEMORY,
+    /**
+     * End of stream reached while attempting to read.
+     */
+    UTIL_EOF,
+    /**
+     * Low level error occurred, consult errno.
+     */
+    UTIL_ERRNO,
+    /**
+     * Zlib error occurred, call gzerror for details.
+     */
+    UTIL_GZERROR
+} util_status_t;
+
+const char *
+util_error_string (util_status_t status);
+#endif
diff --git a/util/xapian-extra.h b/util/xapian-extra.h
new file mode 100644
index 00000000..39c7f48f
--- /dev/null
+++ b/util/xapian-extra.h
@@ -0,0 +1,15 @@
+#ifndef _XAPIAN_EXTRA_H
+#define _XAPIAN_EXTRA_H
+
+#include <string>
+#include <xapian.h>
+
+inline Xapian::Query
+xapian_query_match_all (void)
+{
+    // Xapian::Query::MatchAll isn't thread safe (a static object with reference
+    // counting) so instead reconstruct the equivalent on demand.
+    return Xapian::Query (std::string ());
+}
+
+#endif
diff --git a/util/xutil.c b/util/xutil.c
new file mode 100644
index 00000000..07a00343
--- /dev/null
+++ b/util/xutil.c
@@ -0,0 +1,139 @@
+/* xutil.c - Various wrapper functions to abort on error.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "xutil.h"
+#include "error_util.h"
+
+void *
+xcalloc (size_t nmemb, size_t size)
+{
+    void *ret;
+
+    ret = calloc (nmemb, size);
+    if (ret == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	exit (1);
+    }
+
+    return ret;
+}
+
+void *
+xmalloc (size_t size)
+{
+    void *ret;
+
+    ret = malloc (size);
+    if (ret == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	exit (1);
+    }
+
+    return ret;
+}
+
+void *
+xrealloc (void *ptr, size_t size)
+{
+    void *ret;
+
+    ret = realloc (ptr, size);
+    if (ret == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	exit (1);
+    }
+
+    return ret;
+}
+
+char *
+xstrdup (const char *s)
+{
+    char *ret;
+
+    ret = strdup (s);
+    if (ret == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	exit (1);
+    }
+
+    return ret;
+}
+
+char *
+xstrndup (const char *s, size_t n)
+{
+    char *ret;
+
+    if (strlen (s) <= n)
+	n = strlen (s);
+
+    ret = malloc (n + 1);
+    if (ret == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	exit (1);
+    }
+    memcpy (ret, s, n);
+    ret[n] = '\0';
+
+    return ret;
+}
+
+int
+xregcomp (regex_t *preg, const char *regex, int cflags)
+{
+    int rerr;
+
+    rerr = regcomp (preg, regex, cflags);
+    if (rerr) {
+	size_t error_size = regerror (rerr, preg, NULL, 0);
+	char *error = xmalloc (error_size);
+
+	regerror (rerr, preg, error, error_size);
+	fprintf (stderr, "compiling regex %s: %s\n",
+		 regex, error);
+	free (error);
+	return 1;
+    }
+    return 0;
+}
+
+int
+xregexec (const regex_t *preg, const char *string,
+	  size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+    unsigned int i;
+    int rerr;
+
+    rerr = regexec (preg, string, nmatch, pmatch, eflags);
+    if (rerr)
+	return rerr;
+
+    for (i = 0; i < nmatch; i++) {
+	if (pmatch[i].rm_so == -1)
+	    INTERNAL_ERROR ("matching regex against %s: Sub-match %d not found\n",
+			    string, i);
+    }
+
+    return 0;
+}
diff --git a/util/xutil.h b/util/xutil.h
new file mode 100644
index 00000000..e2707000
--- /dev/null
+++ b/util/xutil.h
@@ -0,0 +1,60 @@
+/* xutil.h - Various wrapper functions to abort on error.
+ *
+ * Copyright © 2009 Carl Worth
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Carl Worth <cworth@cworth.org>
+ */
+
+#ifndef NOTMUCH_XUTIL_H
+#define NOTMUCH_XUTIL_H
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* xutil.c */
+void *
+xcalloc (size_t nmemb, size_t size);
+
+void *
+xmalloc (size_t size);
+
+void *
+xrealloc (void *ptrr, size_t size);
+
+char *
+xstrdup (const char *s);
+
+char *
+xstrndup (const char *s, size_t n);
+
+/* Returns 0 for successful compilation, 1 otherwise */
+int
+xregcomp (regex_t *preg, const char *regex, int cflags);
+
+int
+xregexec (const regex_t *preg, const char *string,
+	  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
new file mode 100644
index 00000000..1f5f9dbe
--- /dev/null
+++ b/util/zlib-extra.c
@@ -0,0 +1,95 @@
+/* zlib-extra.c -  Extra or enhanced routines for compressed I/O.
+ *
+ * Copyright (c) 2014 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include "zlib-extra.h"
+#include <talloc.h>
+#include <stdio.h>
+#include <string.h>
+
+/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
+util_status_t
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
+{
+    char *buf = *bufptr;
+    unsigned int len;
+    size_t offset = 0;
+
+    if (buf) {
+	len = talloc_array_length (buf);
+    } else {
+	/* same as getdelim from gnulib */
+	len = 120;
+	buf = talloc_array (talloc_ctx, char, len);
+	if (buf == NULL)
+	    return UTIL_OUT_OF_MEMORY;
+    }
+
+    while (1) {
+	if (! gzgets (stream, buf + offset, len - offset)) {
+	    /* Null indicates EOF or error */
+	    int zlib_status = 0;
+	    (void) gzerror (stream, &zlib_status);
+	    switch (zlib_status) {
+	    case Z_STREAM_END:
+	    case Z_OK:
+		/* no data read before EOF */
+		if (offset == 0)
+		    return UTIL_EOF;
+		else
+		    goto SUCCESS;
+	    case Z_ERRNO:
+		return UTIL_ERRNO;
+	    default:
+		return UTIL_GZERROR;
+	    }
+	}
+
+	offset += strlen (buf + offset);
+
+	if (buf[offset - 1] == '\n')
+	    goto SUCCESS;
+
+	len *= 2;
+	buf = talloc_realloc (talloc_ctx, buf, char, len);
+	if (buf == NULL)
+	    return UTIL_OUT_OF_MEMORY;
+    }
+  SUCCESS:
+    *bufptr = buf;
+    *bytes_read = offset;
+    return UTIL_SUCCESS;
+}
+
+const char *
+gz_error_string (util_status_t status, gzFile file)
+{
+    if (status == UTIL_GZERROR)
+	return gzerror_str (file);
+    else
+	return util_error_string (status);
+}
+
+const char *
+gzerror_str (gzFile file)
+{
+    int dummy;
+
+    return gzerror (file, &dummy);
+}
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
new file mode 100644
index 00000000..7532339b
--- /dev/null
+++ b/util/zlib-extra.h
@@ -0,0 +1,39 @@
+#ifndef _ZLIB_EXTRA_H
+#define _ZLIB_EXTRA_H
+
+#include "util.h"
+#include <zlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Like getline, but read from a gzFile. Allocation is with talloc.
+ * Returns:
+ *
+ *   UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR
+ *			Consult util.h for description
+ *
+ *   UTIL_EOF		End of file encountered before
+ *			any characters read
+ */
+util_status_t
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
+
+/* return a suitable error string based on the return status
+ *  from gz_readline
+ */
+
+const char *
+gz_error_string (util_status_t status, gzFile stream);
+
+/* Call gzerror with a dummy errno argument, the docs don't promise to
+ * support the NULL case */
+const char *
+gzerror_str (gzFile file);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
author	David Bremner <bremner@debian.org>	2023-12-01 07:51:09 -0400
committer	David Bremner <bremner@debian.org>	2023-12-01 07:51:09 -0400
commit	126347b6942dd4b0291beb67b119431ebd750a2a (patch)
tree	532c5163cb0972c8b9e6c8b4577b86afb9c6a6a2 /util