diff options
| author | David Bremner <bremner@debian.org> | 2023-12-01 07:51:09 -0400 |
|---|---|---|
| committer | David Bremner <bremner@debian.org> | 2023-12-01 07:51:09 -0400 |
| commit | 126347b6942dd4b0291beb67b119431ebd750a2a (patch) | |
| tree | 532c5163cb0972c8b9e6c8b4577b86afb9c6a6a2 /util | |
Import notmuch_0.38.2.orig.tar.xz
[dgit import orig notmuch_0.38.2.orig.tar.xz]
Diffstat (limited to 'util')
| -rw-r--r-- | util/Makefile | 5 | ||||
| -rw-r--r-- | util/Makefile.local | 18 | ||||
| -rw-r--r-- | util/crypto.c | 245 | ||||
| -rw-r--r-- | util/crypto.h | 106 | ||||
| -rw-r--r-- | util/error_util.c | 40 | ||||
| -rw-r--r-- | util/error_util.h | 54 | ||||
| -rw-r--r-- | util/gmime-extra.c | 221 | ||||
| -rw-r--r-- | util/gmime-extra.h | 81 | ||||
| -rw-r--r-- | util/hex-escape.c | 159 | ||||
| -rw-r--r-- | util/hex-escape.h | 50 | ||||
| -rw-r--r-- | util/path-util.c | 27 | ||||
| -rw-r--r-- | util/path-util.h | 19 | ||||
| -rw-r--r-- | util/repair.c | 158 | ||||
| -rw-r--r-- | util/repair.h | 44 | ||||
| -rw-r--r-- | util/string-util.c | 298 | ||||
| -rw-r--r-- | util/string-util.h | 100 | ||||
| -rw-r--r-- | util/talloc-extra.c | 14 | ||||
| -rw-r--r-- | util/talloc-extra.h | 26 | ||||
| -rw-r--r-- | util/unicode-util.c | 43 | ||||
| -rw-r--r-- | util/unicode-util.h | 19 | ||||
| -rw-r--r-- | util/util.c | 24 | ||||
| -rw-r--r-- | util/util.h | 29 | ||||
| -rw-r--r-- | util/xapian-extra.h | 15 | ||||
| -rw-r--r-- | util/xutil.c | 139 | ||||
| -rw-r--r-- | util/xutil.h | 60 | ||||
| -rw-r--r-- | util/zlib-extra.c | 95 | ||||
| -rw-r--r-- | util/zlib-extra.h | 39 |
27 files changed, 2128 insertions, 0 deletions
diff --git a/util/Makefile b/util/Makefile new file mode 100644 index 00000000..fa25832e --- /dev/null +++ b/util/Makefile @@ -0,0 +1,5 @@ +all: + $(MAKE) -C .. all + +.DEFAULT: + $(MAKE) -C .. $@ diff --git a/util/Makefile.local b/util/Makefile.local new file mode 100644 index 00000000..8a0b9bc3 --- /dev/null +++ b/util/Makefile.local @@ -0,0 +1,18 @@ +# -*- makefile-gmake -*- + +dir := util +extra_cflags += -I$(srcdir)/$(dir) + +libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \ + $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \ + $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \ + $(dir)/repair.c $(dir)/path-util.c \ + $(dir)/unicode-util.c + +libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o) + +$(dir)/libnotmuch_util.a: $(libnotmuch_util_modules) + $(call quiet,AR) rcs $@ $^ + +SRCS := $(SRCS) $(libnotmuch_util_c_srcs) +CLEAN := $(CLEAN) $(libnotmuch_util_modules) $(dir)/libnotmuch_util.a diff --git a/util/crypto.c b/util/crypto.c new file mode 100644 index 00000000..156a6550 --- /dev/null +++ b/util/crypto.c @@ -0,0 +1,245 @@ +/* notmuch - Not much of an email program, (just index and search) + * + * Copyright © 2012 Jameson Rollins + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Authors: Jameson Rollins <jrollins@finestructure.net> + */ + +#include "crypto.h" +#include <strings.h> +#include "error_util.h" +#define unused(x) x __attribute__ ((unused)) + +#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) + +void +_notmuch_crypto_cleanup (unused(_notmuch_crypto_t *crypto)) +{ +} + +GMimeObject * +_notmuch_crypto_decrypt (bool *attempted, + notmuch_decryption_policy_t decrypt, + notmuch_message_t *message, + GMimeObject *part, + GMimeDecryptResult **decrypt_result, + GError **err) +{ + GMimeObject *ret = NULL; + + if (decrypt == NOTMUCH_DECRYPT_FALSE) + return NULL; + + /* try decryption with session key if one is stashed */ + if (message) { + notmuch_message_properties_t *list = NULL; + + for (list = notmuch_message_get_properties (message, "session-key", TRUE); + notmuch_message_properties_valid (list); notmuch_message_properties_move_to_next ( + list)) { + if (err && *err) { + g_error_free (*err); + *err = NULL; + } + if (attempted) + *attempted = true; + if (GMIME_IS_MULTIPART_ENCRYPTED (part)) { + ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part), + GMIME_DECRYPT_NONE, + notmuch_message_properties_value (list), + decrypt_result, err); + } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) { + GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part); + GMimeSecureMimeType type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7); + if (type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) { + ret = g_mime_application_pkcs7_mime_decrypt (pkcs7, + GMIME_DECRYPT_NONE, + notmuch_message_properties_value ( + list), + decrypt_result, err); + } + } + if (ret) + break; + } + if (list) + notmuch_message_properties_destroy (list); + if (ret) + return ret; + } + + if (err && *err) { + g_error_free (*err); + *err = NULL; + } + + if (decrypt == NOTMUCH_DECRYPT_AUTO) + return ret; + + if (attempted) + *attempted = true; + GMimeDecryptFlags flags = GMIME_DECRYPT_NONE; + + if (decrypt == NOTMUCH_DECRYPT_TRUE && decrypt_result) + flags |= GMIME_DECRYPT_EXPORT_SESSION_KEY; + if (GMIME_IS_MULTIPART_ENCRYPTED (part)) { + ret = g_mime_multipart_encrypted_decrypt (GMIME_MULTIPART_ENCRYPTED (part), flags, NULL, + decrypt_result, err); + } else if (GMIME_IS_APPLICATION_PKCS7_MIME (part)) { + GMimeApplicationPkcs7Mime *pkcs7 = GMIME_APPLICATION_PKCS7_MIME (part); + GMimeSecureMimeType p7type = g_mime_application_pkcs7_mime_get_smime_type (pkcs7); + if (p7type == GMIME_SECURE_MIME_TYPE_ENVELOPED_DATA) { + ret = g_mime_application_pkcs7_mime_decrypt (pkcs7, flags, NULL, + decrypt_result, err); + } + } + return ret; +} + +static int +_notmuch_message_crypto_destructor (_notmuch_message_crypto_t *msg_crypto) +{ + if (! msg_crypto) + return 0; + if (msg_crypto->sig_list) + g_object_unref (msg_crypto->sig_list); + if (msg_crypto->payload_subject) + talloc_free (msg_crypto->payload_subject); + return 0; +} + +_notmuch_message_crypto_t * +_notmuch_message_crypto_new (void *ctx) +{ + _notmuch_message_crypto_t *ret = talloc_zero (ctx, _notmuch_message_crypto_t); + + talloc_set_destructor (ret, _notmuch_message_crypto_destructor); + return ret; +} + +notmuch_status_t +_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto, + GMimeSignatureList *sigs) +{ + if (! msg_crypto) + return NOTMUCH_STATUS_NULL_POINTER; + + /* Signatures that arrive after a payload part during DFS are not + * part of the cryptographic envelope: */ + if (msg_crypto->payload_encountered) + return NOTMUCH_STATUS_SUCCESS; + + if (msg_crypto->sig_list) + g_object_unref (msg_crypto->sig_list); + + /* This signature list needs to persist as long as the _n_m_crypto + * object survives. Increasing its reference counter prevents + * garbage-collection until after _n_m_crypto_destroy is + * called. */ + msg_crypto->sig_list = sigs; + if (sigs) + g_object_ref (sigs); + + if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_FULL) + msg_crypto->signature_encrypted = true; + + return NOTMUCH_STATUS_SUCCESS; +} + + +bool +_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part, + GMimeObject *parent, int childnum) +{ + const char *protected_headers = NULL; + const char *forwarded = NULL; + const char *subject = NULL; + + if ((! msg_crypto) || (! part)) + INTERNAL_ERROR ("_notmuch_message_crypto_potential_payload() got NULL for %s\n", + msg_crypto? "part" : "msg_crypto"); + + /* only fire on the first payload part encountered */ + if (msg_crypto->payload_encountered) + return false; + + /* the first child of multipart/encrypted that matches the + * encryption protocol should be "control information" metadata, + * not payload. So we skip it. (see + * https://tools.ietf.org/html/rfc1847#page-8) */ + if (parent && GMIME_IS_MULTIPART_ENCRYPTED (parent) && childnum == + GMIME_MULTIPART_ENCRYPTED_VERSION) { + const char *enc_type = g_mime_object_get_content_type_parameter (parent, "protocol"); + GMimeContentType *ct = g_mime_object_get_content_type (part); + if (ct && enc_type) { + const char *part_type = g_mime_content_type_get_mime_type (ct); + if (part_type && strcmp (part_type, enc_type) == 0) + return false; + } + } + + msg_crypto->payload_encountered = true; + + /* don't bother recording anything if there is no cryptographic + * envelope: */ + if ((msg_crypto->decryption_status != NOTMUCH_MESSAGE_DECRYPTED_FULL) && + (msg_crypto->sig_list == NULL)) + return false; + + /* Verify that this payload has headers that are intended to be + * exported to the larger message: */ + + /* Consider a payload that uses Alexei Melinkov's forwarded="no" for + * message/global or message/rfc822: + * https://tools.ietf.org/html/draft-melnikov-smime-header-signing-05#section-4 */ + forwarded = g_mime_object_get_content_type_parameter (part, "forwarded"); + if (GMIME_IS_MESSAGE_PART (part) && forwarded && strcmp (forwarded, "no") == 0) { + GMimeMessage *message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part)); + subject = g_mime_message_get_subject (message); + /* FIXME: handle more than just Subject: at some point */ + } else { + /* Consider "memoryhole"-style protected headers as practiced by Enigmail and K-9 */ + protected_headers = g_mime_object_get_content_type_parameter (part, "protected-headers"); + if (protected_headers && strcasecmp ("v1", protected_headers) == 0) + subject = g_mime_object_get_header (part, "Subject"); + /* FIXME: handle more than just Subject: at some point */ + } + + if (subject) { + if (msg_crypto->payload_subject) + talloc_free (msg_crypto->payload_subject); + msg_crypto->payload_subject = talloc_strdup (msg_crypto, subject); + } + + return true; +} + + +notmuch_status_t +_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto) +{ + if (! msg_crypto) + return NOTMUCH_STATUS_NULL_POINTER; + + /* see the rationale for different values of + * _notmuch_message_decryption_status_t in util/crypto.h */ + if (! msg_crypto->payload_encountered) + msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_FULL; + else if (msg_crypto->decryption_status == NOTMUCH_MESSAGE_DECRYPTED_NONE) + msg_crypto->decryption_status = NOTMUCH_MESSAGE_DECRYPTED_PARTIAL; + + return NOTMUCH_STATUS_SUCCESS; +} diff --git a/util/crypto.h b/util/crypto.h new file mode 100644 index 00000000..3c5d384b --- /dev/null +++ b/util/crypto.h @@ -0,0 +1,106 @@ +#ifndef _CRYPTO_H +#define _CRYPTO_H + +#include <stdbool.h> +#include "gmime-extra.h" +#include "notmuch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _notmuch_crypto { + bool verify; + notmuch_decryption_policy_t decrypt; +} _notmuch_crypto_t; + +GMimeObject * +_notmuch_crypto_decrypt (bool *attempted, + notmuch_decryption_policy_t decrypt, + notmuch_message_t *message, + GMimeObject *part, + GMimeDecryptResult **decrypt_result, + GError **err); + +void +_notmuch_crypto_cleanup (_notmuch_crypto_t *crypto); + +/* The user probably wants to know if the entire message was in the + * clear. When replying, the MUA probably wants to know whether there + * was any part decrypted in the message. And when displaying to the + * user, we probably only want to display "encrypted message" if the + * entire message was covered by encryption. */ +typedef enum { + NOTMUCH_MESSAGE_DECRYPTED_NONE = 0, + NOTMUCH_MESSAGE_DECRYPTED_PARTIAL, + NOTMUCH_MESSAGE_DECRYPTED_FULL, +} _notmuch_message_decryption_status_t; + +/* description of the cryptographic state of a given message overall; + * for use by simple user agents. + */ +typedef struct _notmuch_message_crypto { + /* encryption status: partial, full, none */ + _notmuch_message_decryption_status_t decryption_status; + /* FIXME: can we show what key(s) a fully-encrypted message was + * encrypted to? This data is not necessarily cryptographically + * reliable; even when we decrypt, we might not know which public + * key was used (e.g. if we're using a session key). */ + + /* signature status of the whole message (either the whole message + * is signed, or it is not) -- this means that partially-signed + * messages will get no signature status. */ + GMimeSignatureList *sig_list; + /* if part of the message was signed, and the MUA is clever, it + * can determine on its own exactly which part and try to make + * more sense of it. */ + + /* mark this flag once we encounter a payload (i.e. something that + * is not part of the cryptographic envelope) */ + bool payload_encountered; + + /* the value of any "Subject:" header in the cryptographic payload + * (the top level part within the crypto envelope), converted to + * UTF-8 */ + char *payload_subject; + + /* if both signed and encrypted, was the signature encrypted? */ + bool signature_encrypted; +} _notmuch_message_crypto_t; + + +/* _notmuch_message_crypto_t objects should be released with + * talloc_free (), or they will be released along with their parent + * context. + */ +_notmuch_message_crypto_t * +_notmuch_message_crypto_new (void *ctx); + +/* call potential_sig_list during a depth-first-search on a message to + * consider a particular signature as relevant for the message. + */ +notmuch_status_t +_notmuch_message_crypto_potential_sig_list (_notmuch_message_crypto_t *msg_crypto, + GMimeSignatureList *sigs); + +/* call successful_decryption during a depth-first-search on a message + * to indicate that a part was successfully decrypted. + */ +notmuch_status_t +_notmuch_message_crypto_successful_decryption (_notmuch_message_crypto_t *msg_crypto); + +/* call potential_payload during a depth-first-search on a message + * when encountering a message part that is not part of the envelope. + * + * Returns true if part is the root of the cryptographic payload of + * this message. + */ +bool +_notmuch_message_crypto_potential_payload (_notmuch_message_crypto_t *msg_crypto, GMimeObject *part, + GMimeObject *parent, int childnum); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/util/error_util.c b/util/error_util.c new file mode 100644 index 00000000..e64162c7 --- /dev/null +++ b/util/error_util.c @@ -0,0 +1,40 @@ +/* error_util.c - internal error utilities for notmuch. + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Carl Worth <cworth@cworth.org> + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> + +#include "error_util.h" + +void +_internal_error (const char *format, ...) +{ + va_list va_args; + + va_start (va_args, format); + + fprintf (stderr, "Internal error: "); + vfprintf (stderr, format, va_args); + + va_end (va_args); + exit (1); +} + diff --git a/util/error_util.h b/util/error_util.h new file mode 100644 index 00000000..a51f001f --- /dev/null +++ b/util/error_util.h @@ -0,0 +1,54 @@ +/* error_util.h - Provide the INTERNAL_ERROR macro + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Carl Worth <cworth@cworth.org> + */ + +#ifndef ERROR_UTIL_H +#define ERROR_UTIL_H + +#include <talloc.h> + +#include "function-attributes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* There's no point in continuing when we've detected that we've done + * something wrong internally (as opposed to the user passing in a + * bogus value). + * + * Note that PRINTF_ATTRIBUTE comes from talloc.h + */ +void +_internal_error (const char *format, ...) PRINTF_ATTRIBUTE (1, 2) NORETURN_ATTRIBUTE; + +/* There's no point in continuing when we've detected that we've done + * something wrong internally (as opposed to the user passing in a + * bogus value). + * + * Note that __location__ comes from talloc.h. + */ +#define INTERNAL_ERROR(format, ...) \ + _internal_error (format " (%s).\n", \ + ##__VA_ARGS__, __location__) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/util/gmime-extra.c b/util/gmime-extra.c new file mode 100644 index 00000000..192cb078 --- /dev/null +++ b/util/gmime-extra.c @@ -0,0 +1,221 @@ +#include "gmime-extra.h" +#include <string.h> + +static +GMimeStream * +_gzfile_maybe_filter (GMimeStream *file_stream) +{ + char buf[4]; + int bytes_read; + + if ((bytes_read = g_mime_stream_read (file_stream, buf, sizeof (buf))) < 0) + return NULL; + + if (g_mime_stream_reset (file_stream)) + return NULL; + + /* check for gzipped input */ + if (bytes_read >= 2 && buf[0] == 0x1f && (unsigned char) buf[1] == 0x8b) { + GMimeStream *gzstream; + GMimeFilter *gzfilter; + + gzfilter = g_mime_filter_gzip_new (GMIME_FILTER_GZIP_MODE_UNZIP, 0); + if (! gzfilter) + return NULL; + + gzstream = g_mime_stream_filter_new (file_stream); + if (! gzstream) + return NULL; + + /* ignore filter id */ + (void) g_mime_stream_filter_add ((GMimeStreamFilter *) gzstream, gzfilter); + g_object_unref (gzfilter); + g_object_unref (file_stream); + return gzstream; + } else { + return file_stream; + } +} + +GMimeStream * +g_mime_stream_gzfile_new (int fd) +{ + GMimeStream *file_stream; + + file_stream = g_mime_stream_fs_new (fd); + if (! file_stream) + return NULL; + + return _gzfile_maybe_filter (file_stream); +} + +GMimeStream * +g_mime_stream_gzfile_open (const char *filename) +{ + GMimeStream *file_stream; + + file_stream = g_mime_stream_fs_open (filename, 0, 0, NULL); + if (! file_stream) + return NULL; + + return _gzfile_maybe_filter (file_stream); +} + +GMimeStream * +g_mime_stream_stdout_new () +{ + GMimeStream *stream_stdout = NULL; + GMimeStream *stream_buffered = NULL; + + stream_stdout = g_mime_stream_pipe_new (STDOUT_FILENO); + if (! stream_stdout) + return NULL; + + g_mime_stream_pipe_set_owner (GMIME_STREAM_PIPE (stream_stdout), FALSE); + + stream_buffered = g_mime_stream_buffer_new (stream_stdout, GMIME_STREAM_BUFFER_BLOCK_WRITE); + + g_object_unref (stream_stdout); + + return stream_buffered; +} + +/** + * copy a glib string into a talloc context, and free it. + */ +static char * +g_string_talloc_strdup (void *ctx, char *g_string) +{ + char *new_str = talloc_strdup (ctx, g_string); + + g_free (g_string); + return new_str; +} + +const char * +g_mime_certificate_get_valid_userid (GMimeCertificate *cert) +{ + /* output user id only if validity is FULL or ULTIMATE. */ + const char *uid = g_mime_certificate_get_user_id (cert); + + if (uid == NULL) + return uid; + GMimeValidity validity = g_mime_certificate_get_id_validity (cert); + + if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE) + return uid; + return NULL; +} + +const char * +g_mime_certificate_get_valid_email (GMimeCertificate *cert) +{ + /* output e-mail address only if validity is FULL or ULTIMATE. */ + const char *email = g_mime_certificate_get_email(cert); + + if (email == NULL) + return email; + GMimeValidity validity = g_mime_certificate_get_id_validity (cert); + + if (validity == GMIME_VALIDITY_FULL || validity == GMIME_VALIDITY_ULTIMATE) + return email; + return NULL; +} + +const char * +g_mime_certificate_get_fpr16 (GMimeCertificate *cert) +{ + const char *fpr = g_mime_certificate_get_fingerprint (cert); + + if (! fpr || strlen (fpr) < 16) + return fpr; + + return fpr + (strlen (fpr) - 16); +} + +char * +g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type) +{ + InternetAddressList *list = g_mime_message_get_addresses (message, type); + + return internet_address_list_to_string (list, NULL, 0); +} + +char * +g_mime_message_get_date_string (void *ctx, GMimeMessage *message) +{ + GDateTime *parsed_date = g_mime_message_get_date (message); + + if (parsed_date) { + char *date = g_mime_utils_header_format_date (parsed_date); + return g_string_talloc_strdup (ctx, date); + } else { + return talloc_strdup (ctx, "Thu, 01 Jan 1970 00:00:00 +0000"); + } +} + +InternetAddressList * +g_mime_message_get_reply_to_list (GMimeMessage *message) +{ + return g_mime_message_get_reply_to (message); +} + +const char * +g_mime_message_get_from_string (GMimeMessage *message) +{ + return g_mime_object_get_header (GMIME_OBJECT (message), "From"); +} + +char * +g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message) +{ + InternetAddressList *list = g_mime_message_get_reply_to (message); + + return g_string_talloc_strdup (ctx, internet_address_list_to_string (list, NULL, 0)); +} + +void +g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag) +{ + g_mime_parser_set_format (parser, flag ? GMIME_FORMAT_MBOX : GMIME_FORMAT_MESSAGE); +} + +/* In GMime 3.0, status GOOD and VALID both imply something about the + * validity of the UIDs attached to the signing key. This forces us to + * use following somewhat relaxed definition of a "good" signature to + * preserve current notmuch semantics. + */ + +gboolean +g_mime_signature_status_good (GMimeSignatureStatus status) +{ + return ((status & (GMIME_SIGNATURE_STATUS_RED | GMIME_SIGNATURE_STATUS_ERROR_MASK)) == 0); +} + +gboolean +g_mime_signature_status_bad (GMimeSignatureStatus status) +{ + return (status & GMIME_SIGNATURE_STATUS_RED); +} + +gboolean +g_mime_signature_status_error (GMimeSignatureStatus status) +{ + return (status & GMIME_SIGNATURE_STATUS_ERROR_MASK); +} + +gint64 +g_mime_utils_header_decode_date_unix (const char *date) +{ + GDateTime *parsed_date = g_mime_utils_header_decode_date (date); + time_t ret; + + if (parsed_date) { + ret = g_date_time_to_unix (parsed_date); + g_date_time_unref (parsed_date); + } else { + ret = 0; + } + + return ret; +} diff --git a/util/gmime-extra.h b/util/gmime-extra.h new file mode 100644 index 00000000..889e91f3 --- /dev/null +++ b/util/gmime-extra.h @@ -0,0 +1,81 @@ +#ifndef _GMIME_EXTRA_H +#define _GMIME_EXTRA_H +#include <gmime/gmime.h> +#include <talloc.h> + +#ifdef __cplusplus +extern "C" { +#endif + +GMimeStream *g_mime_stream_stdout_new (void); + +/* Return a GMime stream for this open file descriptor, un-gzipping if + * necessary */ +GMimeStream *g_mime_stream_gzfile_new (int fd); + +/* Return a GMime stream for this path, un-gzipping if + * necessary */ +GMimeStream *g_mime_stream_gzfile_open (const char *filename); + +/** + * Get last 16 hex digits of fingerprint ("keyid") + */ +const char *g_mime_certificate_get_fpr16 (GMimeCertificate *cert); +/** + * Return the contents of the appropriate address header as a string + * Should be freed using g_free + */ +char *g_mime_message_get_address_string (GMimeMessage *message, GMimeAddressType type); + +InternetAddressList *g_mime_message_get_addresses (GMimeMessage *message, GMimeAddressType type); + +/** + * return talloc allocated date string + */ + +char *g_mime_message_get_date_string (void *ctx, GMimeMessage *message); + +/** + * glib allocated list of From: addresses + */ + +InternetAddressList *g_mime_message_get_from (GMimeMessage *message); + + +/** + * return string for From: address + * (owned by gmime) + */ +const char *g_mime_message_get_from_string (GMimeMessage *message); + +InternetAddressList *g_mime_message_get_reply_to_list (GMimeMessage *message); + +/** + * return talloc allocated reply-to string + */ +char *g_mime_message_get_reply_to_string (void *ctx, GMimeMessage *message); + +void g_mime_parser_set_scan_from (GMimeParser *parser, gboolean flag); + +gboolean g_mime_signature_status_good (GMimeSignatureStatus status); + +gboolean g_mime_signature_status_bad (GMimeSignatureStatus status); + +gboolean g_mime_signature_status_error (GMimeSignatureStatus status); + +gint64 g_mime_utils_header_decode_date_unix (const char *date); + +/** + * Return string for valid User ID (or NULL if no valid User ID exists) + */ +const char *g_mime_certificate_get_valid_userid (GMimeCertificate *cert); +/** + * Return string for valid e-mail address (or NULL if no valid e-mail address exists) + */ +const char *g_mime_certificate_get_valid_email (GMimeCertificate *cert); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util/hex-escape.c b/util/hex-escape.c new file mode 100644 index 00000000..81534a8c --- /dev/null +++ b/util/hex-escape.c @@ -0,0 +1,159 @@ +/* hex-escape.c - Manage encoding and decoding of byte strings into path names + * + * Copyright (c) 2011 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: David Bremner <david@tethera.net> + */ + +#include <assert.h> +#include <string.h> +#include <talloc.h> +#include <ctype.h> +#include "error_util.h" +#include "hex-escape.h" + +static const char *output_charset = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.,"; + +static const char escape_char = '%'; + +static int +is_output (char c) +{ + return (strchr (output_charset, c) != NULL); +} + +static int +maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size) +{ + if (*out_size < needed) { + + if (*out == NULL) + *out = talloc_size (ctx, needed); + else + *out = talloc_realloc (ctx, *out, char, needed); + + if (*out == NULL) + return 0; + + *out_size = needed; + } + return 1; +} + +hex_status_t +hex_encode (void *ctx, const char *in, char **out, size_t *out_size) +{ + + const char *p; + char *q; + + size_t needed = 1; /* for the NUL */ + + assert (ctx); assert (in); assert (out); assert (out_size); + + for (p = in; *p; p++) { + needed += is_output (*p) ? 1 : 3; + } + + if (*out == NULL) + *out_size = 0; + + if (! maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + q = *out; + p = in; + + while (*p) { + if (is_output (*p)) { + *q++ = *p++; + } else { + sprintf (q, "%%%02x", (unsigned char) *p++); + q += 3; + } + } + + *q = '\0'; + return HEX_SUCCESS; +} + +/* Hex decode 'in' to 'out'. + * + * This must succeed for in == out to support hex_decode_inplace(). + */ +static hex_status_t +hex_decode_internal (const char *in, unsigned char *out) +{ + char buf[3]; + + while (*in) { + if (*in == escape_char) { + char *endp; + + /* This also handles unexpected end-of-string. */ + if (! isxdigit ((unsigned char) in[1]) || + ! isxdigit ((unsigned char) in[2])) + return HEX_SYNTAX_ERROR; + + buf[0] = in[1]; + buf[1] = in[2]; + buf[2] = '\0'; + + *out = strtoul (buf, &endp, 16); + + if (endp != buf + 2) + return HEX_SYNTAX_ERROR; + + in += 3; + out++; + } else { + *out++ = *in++; + } + } + + *out = '\0'; + + return HEX_SUCCESS; +} + +hex_status_t +hex_decode_inplace (char *s) +{ + /* A decoded string is never longer than the encoded one, so it is + * safe to decode a string onto itself. */ + return hex_decode_internal (s, (unsigned char *) s); +} + +hex_status_t +hex_decode (void *ctx, const char *in, char **out, size_t *out_size) +{ + const char *p; + size_t needed = 1; /* for the NUL */ + + assert (ctx); assert (in); assert (out); assert (out_size); + + for (p = in; *p; p++) + if ((p[0] == escape_char) && isxdigit (p[1]) && isxdigit (p[2])) + needed -= 1; + else + needed += 1; + + if (! maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + return hex_decode_internal (in, (unsigned char *) *out); +} diff --git a/util/hex-escape.h b/util/hex-escape.h new file mode 100644 index 00000000..83a4c6f1 --- /dev/null +++ b/util/hex-escape.h @@ -0,0 +1,50 @@ +#ifndef _HEX_ESCAPE_H +#define _HEX_ESCAPE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + HEX_SUCCESS = 0, + HEX_SYNTAX_ERROR, + HEX_OUT_OF_MEMORY +} hex_status_t; + +/* + * The API for hex_encode() and hex_decode() is modelled on that for + * getline. + * + * If 'out' points to a NULL pointer a char array of the appropriate + * size is allocated using talloc, and out_size is updated. + * + * If 'out' points to a non-NULL pointer, it assumed to describe an + * existing char array, with the size given in *out_size. This array + * may be resized by talloc_realloc if needed; in this case *out_size + * will also be updated. + * + * Note that it is an error to pass a NULL pointer for any parameter + * of these routines. + */ + +hex_status_t +hex_encode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); + +hex_status_t +hex_decode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); + +/* + * Non-allocating hex decode to decode 's' in-place. The length of the + * result is always equal to or shorter than the length of the + * original. + */ +hex_status_t +hex_decode_inplace (char *s); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util/path-util.c b/util/path-util.c new file mode 100644 index 00000000..3267a967 --- /dev/null +++ b/util/path-util.c @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#define _GNU_SOURCE + +#include "path-util.h" + +#include <limits.h> +#include <stdlib.h> + + +char * +notmuch_canonicalize_file_name (const char *path) +{ +#if HAVE_CANONICALIZE_FILE_NAME + return canonicalize_file_name (path); +#elif defined(PATH_MAX) + char *resolved_path = malloc (PATH_MAX + 1); + if (resolved_path == NULL) + return NULL; + + return realpath (path, resolved_path); +#else +#error undefined PATH_MAX _and_ missing canonicalize_file_name not supported +#endif +} diff --git a/util/path-util.h b/util/path-util.h new file mode 100644 index 00000000..ac85f696 --- /dev/null +++ b/util/path-util.h @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#ifndef NOTMUCH_UTIL_PATH_UTIL_H_ +#define NOTMUCH_UTIL_PATH_UTIL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +char * +notmuch_canonicalize_file_name (const char *path); + +#ifdef __cplusplus +} +#endif + +#endif /* NOTMUCH_UTIL_PATH_UTIL_H_ */ diff --git a/util/repair.c b/util/repair.c new file mode 100644 index 00000000..5b0dfdf4 --- /dev/null +++ b/util/repair.c @@ -0,0 +1,158 @@ +/* notmuch - Not much of an email program, (just index and search) + * + * Copyright © 2019 Daniel Kahn Gillmor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Authors: Daniel Kahn Gillmor <dkg@fifthhorseman.net> + */ + +#include <stdbool.h> +#include "repair.h" + + +static bool +_notmuch_crypto_payload_has_legacy_display (GMimeObject *payload) +{ + GMimeMultipart *mpayload; + const char *protected_header_parameter; + GMimeObject *first; + + if (! g_mime_content_type_is_type (g_mime_object_get_content_type (payload), + "multipart", "mixed")) + return false; + protected_header_parameter = g_mime_object_get_content_type_parameter (payload, + "protected-headers"); + if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1")) + return false; + if (! GMIME_IS_MULTIPART (payload)) + return false; + mpayload = GMIME_MULTIPART (payload); + if (mpayload == NULL) + return false; + if (g_mime_multipart_get_count (mpayload) != 2) + return false; + first = g_mime_multipart_get_part (mpayload, 0); + /* Early implementations that generated "Legacy Display" parts used + * Content-Type: text/rfc822-headers, but text/plain is more widely + * rendered, so it is now the standard choice. We accept either as a + * Legacy Display part. */ + if (! (g_mime_content_type_is_type (g_mime_object_get_content_type (first), + "text", "plain") || + g_mime_content_type_is_type (g_mime_object_get_content_type (first), + "text", "rfc822-headers"))) + return false; + protected_header_parameter = g_mime_object_get_content_type_parameter (first, + "protected-headers"); + if ((! protected_header_parameter) || strcmp (protected_header_parameter, "v1")) + return false; + if (! GMIME_IS_TEXT_PART (first)) + return false; + + return true; +} + +GMimeObject * +_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload) +{ + if (_notmuch_crypto_payload_has_legacy_display (payload)) { + return g_mime_multipart_get_part (GMIME_MULTIPART (payload), 1); + } else { + return payload; + } +} + +/* see + * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.1 */ +static bool +_notmuch_is_mixed_up_mangled (GMimeObject *part) +{ + GMimeMultipart *mpart = NULL; + GMimeObject *parts[3] = { NULL, NULL, NULL }; + GMimeContentType *type = NULL; + char *prelude_string = NULL; + bool prelude_is_empty; + + if (part == NULL) + return false; + type = g_mime_object_get_content_type (part); + if (type == NULL) + return false; + if (! g_mime_content_type_is_type (type, "multipart", "mixed")) + return false; + if (! GMIME_IS_MULTIPART (part)) /* probably impossible */ + return false; + mpart = GMIME_MULTIPART (part); + if (mpart == NULL) + return false; + if (g_mime_multipart_get_count (mpart) != 3) + return false; + parts[0] = g_mime_multipart_get_part (mpart, 0); + if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[0]), + "text", "plain")) + return false; + if (! GMIME_IS_TEXT_PART (parts[0])) + return false; + parts[1] = g_mime_multipart_get_part (mpart, 1); + if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[1]), + "application", "pgp-encrypted")) + return false; + parts[2] = g_mime_multipart_get_part (mpart, 2); + if (! g_mime_content_type_is_type (g_mime_object_get_content_type (parts[2]), + "application", "octet-stream")) + return false; + + /* Is parts[0] length 0? */ + prelude_string = g_mime_text_part_get_text (GMIME_TEXT_PART (parts[0])); + prelude_is_empty = (prelude_string[0] == '\0'); + g_free (prelude_string); + if (! prelude_is_empty) + return false; + + /* FIXME: after decoding and stripping whitespace, is parts[1] + * subpart just "Version: 1" ? */ + + /* FIXME: can we determine that parts[2] subpart is *only* PGP + * encrypted data? I tried g_mime_part_get_openpgp_data () but + * found https://github.com/jstedfast/gmime/issues/60 */ + + return true; +} + + +/* see + * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1.2 */ +GMimeObject * +_notmuch_repair_mixed_up_mangled (GMimeObject *part) +{ + GMimeMultipart *mpart = NULL, *mpart_ret = NULL; + GMimeObject *ret = NULL; + + if (! _notmuch_is_mixed_up_mangled (part)) + return NULL; + mpart = GMIME_MULTIPART (part); + ret = GMIME_OBJECT (g_mime_multipart_encrypted_new ()); + if (ret == NULL) + return NULL; + mpart_ret = GMIME_MULTIPART (ret); + if (mpart_ret == NULL) { + g_object_unref (ret); + return NULL; + } + g_mime_object_set_content_type_parameter (ret, "protocol", "application/pgp-encrypted"); + + g_mime_multipart_insert (mpart_ret, 0, g_mime_multipart_get_part (mpart, 1)); + g_mime_multipart_insert (mpart_ret, 1, g_mime_multipart_get_part (mpart, 2)); + return ret; +} diff --git a/util/repair.h b/util/repair.h new file mode 100644 index 00000000..492f5a20 --- /dev/null +++ b/util/repair.h @@ -0,0 +1,44 @@ +#ifndef _REPAIR_H +#define _REPAIR_H + +#include "gmime-extra.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* This is a collection of message structure and message format repair + * techniques that are designed to improve the user experience of + * notmuch */ + +/* If payload is a cryptographic payload within an encrypted message, and + * it has a "legacy display" part, then we can skip over it and jump + * to the actual content, because notmuch already handles protected + * headers appropriately. + * + * This function either returns payload directly (if it does not have + * a "legacy display" part), or it returns a pointer to its + * content-bearing subpart, with the "legacy display" part and the + * surrounding multipart/mixed object bypassed. + * + * No new objects are created by calling this function, and the + * returned object will only be released when the original part is + * disposed of. + */ + +GMimeObject * +_notmuch_repair_crypto_payload_skip_legacy_display (GMimeObject *payload); + +/* Detecting and repairing "Mixed-Up MIME mangling". see + * https://tools.ietf.org/html/draft-dkg-openpgp-pgpmime-message-mangling-00#section-4.1 + * If this returns NULL, the message was probably not "Mixed up". If + * it returns non-NULL, then there is a newly-allocated MIME part that + * represents the repaired version. The caller is responsible for + * ensuring that any returned object is freed with g_object_unref. */ +GMimeObject * +_notmuch_repair_mixed_up_mangled (GMimeObject *part); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/util/string-util.c b/util/string-util.c new file mode 100644 index 00000000..03d7648d --- /dev/null +++ b/util/string-util.c @@ -0,0 +1,298 @@ +/* string-util.c - Extra or enhanced routines for null terminated strings. + * + * Copyright (c) 2012 Jani Nikula + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Jani Nikula <jani@nikula.org> + */ + + +#include "string-util.h" +#include "talloc.h" + +#include <ctype.h> +#include <errno.h> +#include <stdbool.h> + +char * +strtok_len (char *s, const char *delim, size_t *len) +{ + /* skip initial delims */ + s += strspn (s, delim); + + /* length of token */ + *len = strcspn (s, delim); + + return *len ? s : NULL; +} + +const char * +strsplit_len (const char *s, char delim, size_t *len) +{ + bool escaping = false; + size_t count = 0, last_nonspace = 0; + + /* Skip initial unescaped delimiters and whitespace */ + while (*s && (*s == delim || isspace (*s))) + s++; + + while (s[count] && (escaping || s[count] != delim)) { + if (! isspace (s[count])) + last_nonspace = count; + escaping = (s[count] == '\\'); + count++; + } + + if (count == 0) + return NULL; + + *len = last_nonspace + 1; + return s; +} + +const char * +strtok_len_c (const char *s, const char *delim, size_t *len) +{ + /* strtok_len is already const-safe, but we can't express both + * versions in the C type system. */ + return strtok_len ((char *) s, delim, len); +} + +char * +sanitize_string (const void *ctx, const char *str) +{ + char *out, *loop; + + if (! str) + return NULL; + + out = talloc_strdup (ctx, str); + if (! out) + return NULL; + + for (loop = out; *loop; loop++) { + if (*loop == '\t' || *loop == '\n') + *loop = ' '; + else if ((unsigned char) (*loop) < 32) + *loop = '?'; + } + + return out; +} + +static int +is_unquoted_terminator (unsigned char c) +{ + return c == 0 || c <= ' ' || c == ')'; +} + +int +make_boolean_term (void *ctx, const char *prefix, const char *term, + char **buf, size_t *len) +{ + const char *in; + char *out; + size_t needed = 3; + int need_quoting = 0; + + /* Do we need quoting? To be paranoid, we quote anything + * containing a quote or '(', even though these only matter at the + * beginning, and anything containing non-ASCII text. */ + if (! term[0]) + need_quoting = 1; + for (in = term; *in && ! need_quoting; in++) + if (is_unquoted_terminator (*in) || *in == '"' || *in == '(' + || (unsigned char) *in > 127) + need_quoting = 1; + + if (need_quoting) + for (in = term; *in; in++) + needed += (*in == '"') ? 2 : 1; + else + needed = strlen (term) + 1; + + /* Reserve space for the prefix */ + if (prefix) + needed += strlen (prefix) + 1; + + if ((*buf == NULL) || (needed > *len)) { + *len = 2 * needed; + *buf = talloc_realloc (ctx, *buf, char, *len); + } + + if (! *buf) { + errno = ENOMEM; + return -1; + } + + out = *buf; + + /* Copy in the prefix */ + if (prefix) { + strcpy (out, prefix); + out += strlen (prefix); + *out++ = ':'; + } + + if (! need_quoting) { + strcpy (out, term); + return 0; + } + + /* Quote term by enclosing it in double quotes and doubling any + * internal double quotes. */ + *out++ = '"'; + in = term; + while (*in) { + if (*in == '"') + *out++ = '"'; + *out++ = *in++; + } + *out++ = '"'; + *out = '\0'; + + return 0; +} + +const char * +skip_space (const char *str) +{ + while (*str && isspace ((unsigned char) *str)) + ++str; + return str; +} + +int +parse_boolean_term (void *ctx, const char *str, + char **prefix_out, char **term_out) +{ + int err = EINVAL; + + *prefix_out = *term_out = NULL; + + /* Parse prefix */ + str = skip_space (str); + const char *pos = strchr (str, ':'); + + if (! pos || pos == str) + goto FAIL; + *prefix_out = talloc_strndup (ctx, str, pos - str); + if (! *prefix_out) { + err = ENOMEM; + goto FAIL; + } + ++pos; + + /* Implement de-quoting compatible with make_boolean_term. */ + if (*pos == '"') { + char *out = talloc_array (ctx, char, strlen (pos)); + int closed = 0; + if (! out) { + err = ENOMEM; + goto FAIL; + } + *term_out = out; + /* Skip the opening quote, find the closing quote, and + * un-double doubled internal quotes. */ + for (++pos; *pos; ) { + if (*pos == '"') { + ++pos; + if (*pos != '"') { + /* Found the closing quote. */ + closed = 1; + pos = skip_space (pos); + break; + } + } + *out++ = *pos++; + } + /* Did the term terminate without a closing quote or is there + * trailing text after the closing quote? */ + if (! closed || *pos) + goto FAIL; + *out = '\0'; + } else { + const char *start = pos; + /* Check for text after the boolean term. */ + while (! is_unquoted_terminator (*pos)) + ++pos; + if (*skip_space (pos)) { + err = EINVAL; + goto FAIL; + } + /* No trailing text; dup the string so the caller can free + * it. */ + *term_out = talloc_strndup (ctx, start, pos - start); + if (! *term_out) { + err = ENOMEM; + goto FAIL; + } + } + return 0; + + FAIL: + talloc_free (*prefix_out); + talloc_free (*term_out); + errno = err; + return -1; +} + +int +strcmp_null (const char *s1, const char *s2) +{ + if (s1 && s2) + return strcmp (s1, s2); + else if (! s1 && ! s2) + return 0; + else if (s1) + return 1; /* s1 (non-NULL) is greater than s2 (NULL) */ + else + return -1; /* s1 (NULL) is less than s2 (non-NULL) */ +} + +int +strcase_equal (const void *a, const void *b) +{ + return strcasecmp (a, b) == 0; +} + +unsigned int +strcase_hash (const void *ptr) +{ + const char *s = ptr; + + /* This is the djb2 hash. */ + unsigned int hash = 5381; + + while (s && *s) { + hash = ((hash << 5) + hash) + tolower (*s); + s++; + } + + return hash; +} + +void +strip_trailing (char *str, char ch) +{ + int i; + + for (i = strlen (str) - 1; i >= 0; i--) { + if (str[i] == ch) + str[i] = '\0'; + else + break; + } +} diff --git a/util/string-util.h b/util/string-util.h new file mode 100644 index 00000000..80647c5f --- /dev/null +++ b/util/string-util.h @@ -0,0 +1,100 @@ +#ifndef _STRING_UTIL_H +#define _STRING_UTIL_H + +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* like strtok(3), but without state, and doesn't modify s. Return + * value is indicated by pointer and length, not null terminator. + * + * Usage pattern: + * + * const char *tok = input; + * const char *delim = " \t"; + * size_t tok_len = 0; + * + * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) { + * // do stuff with string tok of length tok_len + * } + */ + +char *strtok_len (char *s, const char *delim, size_t *len); + +/* Const version of strtok_len. */ +const char *strtok_len_c (const char *s, const char *delim, size_t *len); + +/* Simplified version of strtok_len, with a single delimiter. + * Handles escaping delimiters with \ + * Usage pattern: + * + * const char *tok = input; + * const char *delim = ';'; + * size_t tok_len = 0; + * + * while ((tok = strsplit_len (tok + tok_len, delim, &tok_len)) != NULL) { + * // do stuff with string tok of length tok_len + * } + */ +const char *strsplit_len (const char *s, char delim, size_t *len); + +/* Return a talloced string with str sanitized. + * + * Whitespace characters (tabs and newlines) are replaced with spaces, + * non-printable characters with question marks. + */ +char *sanitize_string (const void *ctx, const char *str); + +/* Construct a boolean term query with the specified prefix (e.g., + * "id") and search term, quoting term as necessary. Specifically, if + * term contains any non-printable ASCII characters, non-ASCII + * characters, close parenthesis or double quotes, it will be enclosed + * in double quotes and any internal double quotes will be doubled + * (e.g. a"b -> "a""b"). The result will be a valid notmuch query and + * can be parsed by parse_boolean_term. + * + * Output is into buf; it may be talloc_realloced. + * Return: 0 on success, -1 on error. errno will be set to ENOMEM if + * there is an allocation failure. + */ +int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term, + char **buf, size_t *len); + +/* Parse a boolean term query consisting of a prefix, a colon, and a + * term that may be quoted as described for make_boolean_term. If the + * term is not quoted, then it ends at the first whitespace or close + * parenthesis. str may containing leading or trailing whitespace, + * but anything else is considered a parse error. This is compatible + * with anything produced by make_boolean_term, and supports a subset + * of the quoting styles supported by Xapian (and hence notmuch). + * *prefix_out and *term_out will be talloc'd with context ctx. + * + * Return: 0 on success, -1 on error. errno will be set to EINVAL if + * there is a parse error or ENOMEM if there is an allocation failure. + */ +int +parse_boolean_term (void *ctx, const char *str, + char **prefix_out, char **term_out); + +/* strcmp that handles NULL strings; in strcmp terms a NULL string is + * considered to be less than a non-NULL string. + */ +int strcmp_null (const char *s1, const char *s2); + +/* GLib GEqualFunc compatible strcasecmp wrapper */ +int strcase_equal (const void *a, const void *b); + +/* GLib GHashFunc compatible case insensitive hash function */ +unsigned int strcase_hash (const void *ptr); + +void strip_trailing (char *str, char ch); + +const char *skip_space (const char *str); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util/talloc-extra.c b/util/talloc-extra.c new file mode 100644 index 00000000..96262470 --- /dev/null +++ b/util/talloc-extra.c @@ -0,0 +1,14 @@ +#include <string.h> +#include "talloc-extra.h" + +char * +talloc_strndup_named_const (void *ctx, const char *str, + size_t len, const char *name) +{ + char *ptr = talloc_strndup (ctx, str, len); + + if (ptr) + talloc_set_name_const (ptr, name); + + return ptr; +} diff --git a/util/talloc-extra.h b/util/talloc-extra.h new file mode 100644 index 00000000..e2e61734 --- /dev/null +++ b/util/talloc-extra.h @@ -0,0 +1,26 @@ +#ifndef _TALLOC_EXTRA_H +#define _TALLOC_EXTRA_H + +#include <talloc.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Like talloc_strndup, but take an extra parameter for the internal talloc + * name (for debugging) */ + +char * +talloc_strndup_named_const (void *ctx, const char *str, + size_t len, const char *name); + +/* use the __location__ macro from talloc.h to name a string according to its + * source location */ + +#define talloc_strndup_debug(ctx, str, len) talloc_strndup_named_const (ctx, str, len, __location__) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util/unicode-util.c b/util/unicode-util.c new file mode 100644 index 00000000..ccb787e2 --- /dev/null +++ b/util/unicode-util.c @@ -0,0 +1,43 @@ +#include "unicode-util.h" + +/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to + * link directly to libxapian. + */ + +static bool +unicode_is_wordchar (notmuch_unichar ch) +{ + switch (g_unichar_type (ch)) { + case G_UNICODE_UPPERCASE_LETTER: + case G_UNICODE_LOWERCASE_LETTER: + case G_UNICODE_TITLECASE_LETTER: + case G_UNICODE_MODIFIER_LETTER: + case G_UNICODE_OTHER_LETTER: + case G_UNICODE_NON_SPACING_MARK: + case G_UNICODE_ENCLOSING_MARK: + case G_UNICODE_SPACING_MARK: + case G_UNICODE_DECIMAL_NUMBER: + case G_UNICODE_LETTER_NUMBER: + case G_UNICODE_OTHER_NUMBER: + case G_UNICODE_CONNECT_PUNCTUATION: + return true; + default: + return false; + } +} + +bool +unicode_word_utf8 (const char *utf8_str) +{ + gunichar *decoded = g_utf8_to_ucs4_fast (utf8_str, -1, NULL); + const gunichar *p = decoded; + bool ret; + + while (*p && unicode_is_wordchar (*p)) + p++; + + ret = (*p == '\0'); + + g_free (decoded); + return ret; +} diff --git a/util/unicode-util.h b/util/unicode-util.h new file mode 100644 index 00000000..1bb9336a --- /dev/null +++ b/util/unicode-util.h @@ -0,0 +1,19 @@ +#ifndef UNICODE_UTIL_H +#define UNICODE_UTIL_H + +#include <stdbool.h> +#include <gmodule.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* The utf8 encoded string would tokenize as a single word, according + * to xapian. */ +bool unicode_word_utf8 (const char *str); +typedef gunichar notmuch_unichar; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/util/util.c b/util/util.c new file mode 100644 index 00000000..6abe2215 --- /dev/null +++ b/util/util.c @@ -0,0 +1,24 @@ +#include "util.h" +#include "error_util.h" +#include <string.h> +#include <errno.h> + +const char * +util_error_string (util_status_t errnum) +{ + switch (errnum) { + case UTIL_SUCCESS: + return "success"; + case UTIL_OUT_OF_MEMORY: + return "out of memory"; + case UTIL_EOF: + return "end of file"; + case UTIL_ERRNO: + return strerror (errno); + case UTIL_GZERROR: + /* we lack context to be more informative here */ + return "zlib error"; + default: + INTERNAL_ERROR ("unexpected error status %d", errnum); + } +} diff --git a/util/util.h b/util/util.h new file mode 100644 index 00000000..b24860af --- /dev/null +++ b/util/util.h @@ -0,0 +1,29 @@ +#ifndef _UTIL_H +#define _UTIL_H + +typedef enum util_status { + /** + * No error occurred. + */ + UTIL_SUCCESS = 0, + /** + * Out of memory. + */ + UTIL_OUT_OF_MEMORY, + /** + * End of stream reached while attempting to read. + */ + UTIL_EOF, + /** + * Low level error occurred, consult errno. + */ + UTIL_ERRNO, + /** + * Zlib error occurred, call gzerror for details. + */ + UTIL_GZERROR +} util_status_t; + +const char * +util_error_string (util_status_t status); +#endif diff --git a/util/xapian-extra.h b/util/xapian-extra.h new file mode 100644 index 00000000..39c7f48f --- /dev/null +++ b/util/xapian-extra.h @@ -0,0 +1,15 @@ +#ifndef _XAPIAN_EXTRA_H +#define _XAPIAN_EXTRA_H + +#include <string> +#include <xapian.h> + +inline Xapian::Query +xapian_query_match_all (void) +{ + // Xapian::Query::MatchAll isn't thread safe (a static object with reference + // counting) so instead reconstruct the equivalent on demand. + return Xapian::Query (std::string ()); +} + +#endif diff --git a/util/xutil.c b/util/xutil.c new file mode 100644 index 00000000..07a00343 --- /dev/null +++ b/util/xutil.c @@ -0,0 +1,139 @@ +/* xutil.c - Various wrapper functions to abort on error. + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Carl Worth <cworth@cworth.org> + */ + +#include <stdio.h> +#include <string.h> + +#include "xutil.h" +#include "error_util.h" + +void * +xcalloc (size_t nmemb, size_t size) +{ + void *ret; + + ret = calloc (nmemb, size); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +void * +xmalloc (size_t size) +{ + void *ret; + + ret = malloc (size); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +void * +xrealloc (void *ptr, size_t size) +{ + void *ret; + + ret = realloc (ptr, size); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +char * +xstrdup (const char *s) +{ + char *ret; + + ret = strdup (s); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +char * +xstrndup (const char *s, size_t n) +{ + char *ret; + + if (strlen (s) <= n) + n = strlen (s); + + ret = malloc (n + 1); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + memcpy (ret, s, n); + ret[n] = '\0'; + + return ret; +} + +int +xregcomp (regex_t *preg, const char *regex, int cflags) +{ + int rerr; + + rerr = regcomp (preg, regex, cflags); + if (rerr) { + size_t error_size = regerror (rerr, preg, NULL, 0); + char *error = xmalloc (error_size); + + regerror (rerr, preg, error, error_size); + fprintf (stderr, "compiling regex %s: %s\n", + regex, error); + free (error); + return 1; + } + return 0; +} + +int +xregexec (const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + unsigned int i; + int rerr; + + rerr = regexec (preg, string, nmatch, pmatch, eflags); + if (rerr) + return rerr; + + for (i = 0; i < nmatch; i++) { + if (pmatch[i].rm_so == -1) + INTERNAL_ERROR ("matching regex against %s: Sub-match %d not found\n", + string, i); + } + + return 0; +} diff --git a/util/xutil.h b/util/xutil.h new file mode 100644 index 00000000..e2707000 --- /dev/null +++ b/util/xutil.h @@ -0,0 +1,60 @@ +/* xutil.h - Various wrapper functions to abort on error. + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Carl Worth <cworth@cworth.org> + */ + +#ifndef NOTMUCH_XUTIL_H +#define NOTMUCH_XUTIL_H + +#include <stdlib.h> +#include <sys/types.h> +#include <regex.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* xutil.c */ +void * +xcalloc (size_t nmemb, size_t size); + +void * +xmalloc (size_t size); + +void * +xrealloc (void *ptrr, size_t size); + +char * +xstrdup (const char *s); + +char * +xstrndup (const char *s, size_t n); + +/* Returns 0 for successful compilation, 1 otherwise */ +int +xregcomp (regex_t *preg, const char *regex, int cflags); + +int +xregexec (const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util/zlib-extra.c b/util/zlib-extra.c new file mode 100644 index 00000000..1f5f9dbe --- /dev/null +++ b/util/zlib-extra.c @@ -0,0 +1,95 @@ +/* zlib-extra.c - Extra or enhanced routines for compressed I/O. + * + * Copyright (c) 2014 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: David Bremner <david@tethera.net> + */ + +#include "zlib-extra.h" +#include <talloc.h> +#include <stdio.h> +#include <string.h> + +/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ +util_status_t +gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) +{ + char *buf = *bufptr; + unsigned int len; + size_t offset = 0; + + if (buf) { + len = talloc_array_length (buf); + } else { + /* same as getdelim from gnulib */ + len = 120; + buf = talloc_array (talloc_ctx, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } + + while (1) { + if (! gzgets (stream, buf + offset, len - offset)) { + /* Null indicates EOF or error */ + int zlib_status = 0; + (void) gzerror (stream, &zlib_status); + switch (zlib_status) { + case Z_STREAM_END: + case Z_OK: + /* no data read before EOF */ + if (offset == 0) + return UTIL_EOF; + else + goto SUCCESS; + case Z_ERRNO: + return UTIL_ERRNO; + default: + return UTIL_GZERROR; + } + } + + offset += strlen (buf + offset); + + if (buf[offset - 1] == '\n') + goto SUCCESS; + + len *= 2; + buf = talloc_realloc (talloc_ctx, buf, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } + SUCCESS: + *bufptr = buf; + *bytes_read = offset; + return UTIL_SUCCESS; +} + +const char * +gz_error_string (util_status_t status, gzFile file) +{ + if (status == UTIL_GZERROR) + return gzerror_str (file); + else + return util_error_string (status); +} + +const char * +gzerror_str (gzFile file) +{ + int dummy; + + return gzerror (file, &dummy); +} diff --git a/util/zlib-extra.h b/util/zlib-extra.h new file mode 100644 index 00000000..7532339b --- /dev/null +++ b/util/zlib-extra.h @@ -0,0 +1,39 @@ +#ifndef _ZLIB_EXTRA_H +#define _ZLIB_EXTRA_H + +#include "util.h" +#include <zlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Like getline, but read from a gzFile. Allocation is with talloc. + * Returns: + * + * UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR + * Consult util.h for description + * + * UTIL_EOF End of file encountered before + * any characters read + */ +util_status_t +gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); + +/* return a suitable error string based on the return status + * from gz_readline + */ + +const char * +gz_error_string (util_status_t status, gzFile stream); + +/* Call gzerror with a dummy errno argument, the docs don't promise to + * support the NULL case */ +const char * +gzerror_str (gzFile file); + +#ifdef __cplusplus +} +#endif + +#endif |
