1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2000-2009 Jeffrey Stedfast
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free
17 * Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
33 #ifdef HAVE_SYS_PARAM_H
34 #include <sys/param.h> /* for MAXHOSTNAMELEN */
36 #define MAXHOSTNAMELEN 64
38 #ifdef HAVE_UTSNAME_DOMAINNAME
39 #include <sys/utsname.h> /* for uname() */
41 #include <sys/types.h>
43 #include <unistd.h> /* Unix header for getpid() */
49 #define getpid() _getpid()
57 #include "gmime-utils.h"
58 #include "gmime-table-private.h"
59 #include "gmime-parse-utils.h"
60 #include "gmime-part.h"
61 #include "gmime-charset.h"
62 #include "gmime-iconv.h"
63 #include "gmime-iconv-utils.h"
65 #ifdef ENABLE_WARNINGS
69 #endif /* ENABLE_WARNINGS */
75 * SECTION: gmime-utils
77 * @short_description: MIME utility functions
80 * Utility functions to parse, encode and decode various MIME tokens
84 extern gboolean _g_mime_enable_rfc2047_workarounds (void);
86 #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
88 /* date parser macros */
89 #define NUMERIC_CHARS "1234567890"
90 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
91 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
92 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
93 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
94 #define TIME_CHARS "1234567890:"
96 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
97 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
98 #define DATE_TOKEN_NON_MONTH (1 << 2)
99 #define DATE_TOKEN_NON_TIME (1 << 3)
100 #define DATE_TOKEN_HAS_COLON (1 << 4)
101 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
102 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
103 #define DATE_TOKEN_HAS_SIGN (1 << 7)
105 static unsigned char tohex[16] = {
106 '0', '1', '2', '3', '4', '5', '6', '7',
107 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
110 static unsigned char gmime_datetok_table[256] = {
111 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
112 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
113 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
114 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
115 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
116 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
117 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
118 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
119 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
120 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
121 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
122 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
123 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
124 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
125 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
126 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
129 /* hrm, is there a library for this shit? */
136 { "EST", -500 }, /* these are all US timezones. bloody yanks */
151 static char *tm_months[] = {
152 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
153 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
156 static char *tm_days[] = {
157 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
162 * g_mime_utils_header_format_date:
163 * @date: time_t date representation
164 * @tz_offset: Timezone offset
166 * Allocates a string buffer containing the rfc822 formatted date
167 * string represented by @time and @tz_offset.
169 * Returns: a valid string representation of the date.
172 g_mime_utils_header_format_date (time_t date, int tz_offset)
176 date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
178 #if defined (HAVE_GMTIME_R)
179 gmtime_r (&date, &tm);
180 #elif defined (HAVE_GMTIME_S)
181 gmtime_s (&tm, &date);
183 memcpy (&tm, gmtime (&date), sizeof (tm));
186 return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
187 tm_days[tm.tm_wday], tm.tm_mday,
188 tm_months[tm.tm_mon],
190 tm.tm_hour, tm.tm_min, tm.tm_sec,
194 /* This is where it gets ugly... */
196 typedef struct _date_token {
197 struct _date_token *next;
203 #define date_token_free(tok) g_slice_free (date_token, tok)
204 #define date_token_new() g_slice_new (date_token)
207 datetok (const char *date)
209 date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
210 const char *start, *end;
215 /* kill leading whitespace */
216 while (*start == ' ' || *start == '\t')
222 mask = gmime_datetok_table[(unsigned char) *start];
224 /* find the end of this token */
226 while (*end && !strchr ("-/,\t\r\n ", *end))
227 mask |= gmime_datetok_table[(unsigned char) *end++];
230 token = date_token_new ();
232 token->start = start;
233 token->len = end - start;
250 decode_int (const char *in, size_t inlen)
252 register const char *inptr;
253 int sign = 1, val = 0;
262 } else if (*inptr == '+')
265 for ( ; inptr < inend; inptr++) {
266 if (!(*inptr >= '0' && *inptr <= '9'))
269 val = (val * 10) + (*inptr - '0');
279 get_days_in_month (int month, int year)
296 if (g_date_is_leap_year (year))
307 get_wday (const char *in, size_t inlen)
311 g_return_val_if_fail (in != NULL, -1);
316 for (wday = 0; wday < 7; wday++) {
317 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
321 return -1; /* unknown week day */
325 get_mday (const char *in, size_t inlen)
329 g_return_val_if_fail (in != NULL, -1);
331 mday = decode_int (in, inlen);
333 if (mday < 0 || mday > 31)
340 get_month (const char *in, size_t inlen)
344 g_return_val_if_fail (in != NULL, -1);
349 for (i = 0; i < 12; i++) {
350 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
354 return -1; /* unknown month */
358 get_year (const char *in, size_t inlen)
362 g_return_val_if_fail (in != NULL, -1);
364 if ((year = decode_int (in, inlen)) == -1)
368 year += (year < 70) ? 2000 : 1900;
377 get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
379 register const char *inptr;
380 int *val, colons = 0;
383 *hour = *min = *sec = 0;
387 for (inptr = in; inptr < inend; inptr++) {
400 } else if (!(*inptr >= '0' && *inptr <= '9'))
403 *val = (*val * 10) + (*inptr - '0');
410 get_tzone (date_token **token)
412 const char *inptr, *inend;
416 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
417 inptr = (*token)->start;
418 inlen = (*token)->len;
419 inend = inptr + inlen;
421 if (*inptr == '+' || *inptr == '-') {
422 return decode_int (inptr, inlen);
426 if (*(inend - 1) == ')')
432 for (t = 0; t < 15; t++) {
433 size_t len = strlen (tz_offsets[t].name);
438 if (!strncmp (inptr, tz_offsets[t].name, len))
439 return tz_offsets[t].offset;
448 mktime_utc (struct tm *tm)
456 #if defined (G_OS_WIN32)
458 if (tm->tm_isdst > 0) {
464 #elif defined (HAVE_TM_GMTOFF)
466 #elif defined (HAVE_TIMEZONE)
467 if (tm->tm_isdst > 0) {
468 #if defined (HAVE_ALTZONE)
470 #else /* !defined (HAVE_ALTZONE) */
471 tz = (timezone - 3600);
476 #elif defined (HAVE__TIMEZONE)
479 #error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
486 parse_rfc822_date (date_token *tokens, int *tzone)
488 int hour, min, sec, offset, n;
493 g_return_val_if_fail (tokens != NULL, (time_t) 0);
497 memset ((void *) &tm, 0, sizeof (struct tm));
499 if ((n = get_wday (token->start, token->len)) != -1) {
500 /* not all dates may have this... */
506 if (!token || (n = get_mday (token->start, token->len)) == -1)
513 if (!token || (n = get_month (token->start, token->len)) == -1)
520 if (!token || (n = get_year (token->start, token->len)) == -1)
523 tm.tm_year = n - 1900;
526 /* get the hour/min/sec */
527 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
535 /* get the timezone */
536 if (!token || (n = get_tzone (&token)) == -1) {
537 /* I guess we assume tz is GMT? */
543 t = mktime_utc (&tm);
545 /* t is now GMT of the time we want, but not offset by the timezone ... */
547 /* this should convert the time to the GMT equiv time */
548 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
557 #define date_token_mask(t) (((date_token *) t)->mask)
558 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
559 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
560 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
561 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
562 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
563 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
564 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
567 parse_broken_date (date_token *tokens, int *tzone)
569 gboolean got_wday, got_month, got_tzone;
570 int hour, min, sec, offset, n;
575 memset ((void *) &tm, 0, sizeof (struct tm));
576 got_wday = got_month = got_tzone = FALSE;
581 if (is_weekday (token) && !got_wday) {
582 if ((n = get_wday (token->start, token->len)) != -1) {
583 d(printf ("weekday; "));
590 if (is_month (token) && !got_month) {
591 if ((n = get_month (token->start, token->len)) != -1) {
592 d(printf ("month; "));
599 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
600 if (get_time (token->start, token->len, &hour, &min, &sec)) {
601 d(printf ("time; "));
609 if (is_tzone (token) && !got_tzone) {
610 date_token *t = token;
612 if ((n = get_tzone (&t)) != -1) {
613 d(printf ("tzone; "));
620 if (is_numeric (token)) {
621 if (token->len == 4 && !tm.tm_year) {
622 if ((n = get_year (token->start, token->len)) != -1) {
623 d(printf ("year; "));
624 tm.tm_year = n - 1900;
628 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
629 if (!got_month && token->next && is_numeric (token->next)) {
630 if ((n = decode_int (token->start, token->len)) > 12) {
638 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
640 d(printf ("mday; "));
643 } else if (!tm.tm_year) {
644 if ((n = get_year (token->start, token->len)) != -1) {
645 d(printf ("2-digit year; "));
646 tm.tm_year = n - 1900;
662 t = mktime_utc (&tm);
664 /* t is now GMT of the time we want, but not offset by the timezone ... */
666 /* this should convert the time to the GMT equiv time */
667 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
677 gmime_datetok_table_init (void)
681 memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
683 for (i = 0; i < 256; i++) {
684 if (!strchr (NUMERIC_CHARS, i))
685 gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
687 if (!strchr (WEEKDAY_CHARS, i))
688 gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
690 if (!strchr (MONTH_CHARS, i))
691 gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
693 if (!strchr (TIME_CHARS, i))
694 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
696 if (!strchr (TIMEZONE_ALPHA_CHARS, i))
697 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
699 if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
700 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
702 if (((char) i) == ':')
703 gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
705 if (strchr ("+-", i))
706 gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
709 printf ("static unsigned char gmime_datetok_table[256] = {");
710 for (i = 0; i < 256; i++) {
713 printf ("%3d,", gmime_datetok_table[i]);
721 * g_mime_utils_header_decode_date:
722 * @str: input date string
723 * @tz_offset: timezone offset
725 * Decodes the rfc822 date string and saves the GMT offset into
726 * @tz_offset if non-NULL.
728 * Returns: the time_t representation of the date string specified by
729 * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
730 * of the timezone offset will be stored.
733 g_mime_utils_header_decode_date (const char *str, int *tz_offset)
735 date_token *token, *tokens;
738 if (!(tokens = datetok (str))) {
745 if (!(date = parse_rfc822_date (tokens, tz_offset)))
746 date = parse_broken_date (tokens, tz_offset);
751 tokens = tokens->next;
752 date_token_free (token);
760 * g_mime_utils_generate_message_id:
761 * @fqdn: Fully qualified domain name
763 * Generates a unique Message-Id.
765 * Returns: a unique string in an addr-spec format suitable for use as
769 g_mime_utils_generate_message_id (const char *fqdn)
771 #ifdef G_THREADS_ENABLED
772 static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
773 #define MUTEX_LOCK() g_static_mutex_lock (&mutex)
774 #define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex)
777 #define MUTEX_UNLOCK()
779 static unsigned long int count = 0;
780 const char *hostname = NULL;
785 #ifdef HAVE_UTSNAME_DOMAINNAME
790 hostname = unam.nodename;
792 if (unam.domainname[0])
793 name = g_strdup_printf ("%s.%s", hostname, unam.domainname);
794 #else /* ! HAVE_UTSNAME_DOMAINNAME */
795 char host[MAXHOSTNAMELEN + 1];
797 #ifdef HAVE_GETHOSTNAME
798 host[MAXHOSTNAMELEN] = '\0';
799 if (gethostname (host, MAXHOSTNAMELEN) == 0) {
800 #ifdef HAVE_GETDOMAINNAME
801 size_t domainlen = MAXHOSTNAMELEN;
805 domain = g_malloc (domainlen);
807 while ((rv = getdomainname (domain, domainlen)) == -1 && errno == EINVAL) {
808 domainlen += MAXHOSTNAMELEN;
809 domain = g_realloc (domain, domainlen);
812 if (rv == 0 && domain[0]) {
814 name = g_strdup_printf ("%s.%s", host, domain);
820 #endif /* HAVE_GETDOMAINNAME */
824 #endif /* HAVE_GETHOSTNAME */
826 #endif /* HAVE_UTSNAME_DOMAINNAME */
828 #ifdef HAVE_GETADDRINFO
829 if (!name && hostname[0]) {
830 /* we weren't able to get a domain name */
831 struct addrinfo hints, *res;
833 memset (&hints, 0, sizeof (hints));
834 hints.ai_flags = AI_CANONNAME;
836 if (getaddrinfo (hostname, NULL, &hints, &res) == 0) {
837 name = g_strdup (res->ai_canonname);
841 #endif /* HAVE_GETADDRINFO */
843 fqdn = name != NULL ? name : (hostname[0] ? hostname : "localhost.localdomain");
847 msgid = g_strdup_printf ("%lu.%lu.%lu@%s", (unsigned long int) time (NULL),
848 (unsigned long int) getpid (), count++, fqdn);
857 decode_addrspec (const char **in)
859 const char *word, *inptr;
866 if (!(word = decode_word (&inptr))) {
867 w(g_warning ("No local-part in addr-spec: %s", *in));
871 addrspec = g_string_new ("");
872 g_string_append_len (addrspec, word, (size_t) (inptr - word));
874 /* get the rest of the local-part */
875 decode_lwsp (&inptr);
876 while (*inptr == '.') {
877 g_string_append_c (addrspec, *inptr++);
878 if ((word = decode_word (&inptr))) {
879 g_string_append_len (addrspec, word, (size_t) (inptr - word));
880 decode_lwsp (&inptr);
882 w(g_warning ("Invalid local-part in addr-spec: %s", *in));
887 /* we should be at the '@' now... */
888 if (*inptr++ != '@') {
889 w(g_warning ("Invalid addr-spec; missing '@': %s", *in));
893 g_string_append_c (addrspec, '@');
894 if (!decode_domain (&inptr, addrspec)) {
895 w(g_warning ("No domain in addr-spec: %s", *in));
900 g_string_free (addrspec, FALSE);
908 g_string_free (addrspec, TRUE);
914 decode_msgid (const char **in)
916 const char *inptr = *in;
919 decode_lwsp (&inptr);
921 w(g_warning ("Invalid msg-id; missing '<': %s", *in));
926 decode_lwsp (&inptr);
927 if ((msgid = decode_addrspec (&inptr))) {
928 decode_lwsp (&inptr);
930 w(g_warning ("Invalid msg-id; missing '>': %s", *in));
937 w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in));
939 while (*inptr && *inptr != '>')
942 msgid = g_strndup (*in, (size_t) (inptr - *in));
951 * g_mime_utils_decode_message_id:
952 * @message_id: string containing a message-id
954 * Decodes a msg-id as defined by rfc822.
956 * Returns: the addr-spec portion of the msg-id.
959 g_mime_utils_decode_message_id (const char *message_id)
961 g_return_val_if_fail (message_id != NULL, NULL);
963 return decode_msgid (&message_id);
968 * g_mime_references_decode:
969 * @text: string containing a list of msg-ids
971 * Decodes a list of msg-ids as in the References and/or In-Reply-To
972 * headers defined in rfc822.
974 * Returns: a list of referenced msg-ids.
977 g_mime_references_decode (const char *text)
979 GMimeReferences *refs, *tail, *ref;
980 const char *word, *inptr = text;
983 g_return_val_if_fail (text != NULL, NULL);
986 tail = (GMimeReferences *) &refs;
989 decode_lwsp (&inptr);
991 /* looks like a msg-id */
992 if ((msgid = decode_msgid (&inptr))) {
993 ref = g_new (GMimeReferences, 1);
999 w(g_warning ("Invalid References header: %s", inptr));
1002 } else if (*inptr) {
1003 /* looks like part of a phrase */
1004 if (!(word = decode_word (&inptr))) {
1005 w(g_warning ("Invalid References header: %s", inptr));
1016 * g_mime_references_append:
1017 * @refs: the address of a #GMimeReferences list
1018 * @msgid: a message-id string
1020 * Appends a reference to msgid to the list of references.
1023 g_mime_references_append (GMimeReferences **refs, const char *msgid)
1025 GMimeReferences *ref;
1027 g_return_if_fail (refs != NULL);
1028 g_return_if_fail (msgid != NULL);
1030 ref = (GMimeReferences *) refs;
1034 ref->next = g_new (GMimeReferences, 1);
1035 ref->next->msgid = g_strdup (msgid);
1036 ref->next->next = NULL;
1041 * g_mime_references_free:
1042 * @refs: a #GMimeReferences list
1044 * Frees the #GMimeReferences list.
1047 g_mime_references_free (GMimeReferences *refs)
1049 GMimeReferences *ref, *next;
1054 g_free (ref->msgid);
1062 * g_mime_references_clear:
1063 * @refs: address of a #GMimeReferences list
1065 * Clears the #GMimeReferences list and resets it to %NULL.
1068 g_mime_references_clear (GMimeReferences **refs)
1070 g_return_if_fail (refs != NULL);
1072 g_mime_references_free (*refs);
1078 * g_mime_references_get_next:
1079 * @ref: a #GMimeReferences list
1081 * Advances to the next reference node in the #GMimeReferences list.
1083 * Returns: the next reference node in the #GMimeReferences list.
1085 const GMimeReferences *
1086 g_mime_references_get_next (const GMimeReferences *ref)
1088 return ref ? ref->next : NULL;
1093 * g_mime_references_get_message_id:
1094 * @ref: a #GMimeReferences list
1096 * Gets the Message-Id reference from the #GMimeReferences node.
1098 * Returns: the Message-Id reference from the #GMimeReferences node.
1101 g_mime_references_get_message_id (const GMimeReferences *ref)
1103 return ref ? ref->msgid : NULL;
1108 is_rfc2047_token (const char *inptr, size_t len)
1110 if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
1116 /* skip past the charset */
1117 while (*inptr != '?' && len > 0) {
1122 if (*inptr != '?' || len < 4)
1125 if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
1138 header_fold (const char *in, gboolean structured)
1140 gboolean last_was_lwsp = FALSE;
1141 register const char *inptr;
1142 size_t len, outlen, i;
1149 if (len <= GMIME_FOLD_LEN + 1)
1150 return g_strdup (in);
1152 out = g_string_new ("");
1153 fieldlen = strcspn (inptr, ": \t\n");
1154 g_string_append_len (out, inptr, fieldlen);
1158 while (*inptr && *inptr != '\n') {
1159 len = strcspn (inptr, " \t\n");
1161 if (len > 1 && outlen + len > GMIME_FOLD_LEN) {
1162 if (outlen > 1 && out->len > fieldlen + 2) {
1163 if (last_was_lwsp) {
1165 out->str[out->len - 1] = '\t';
1167 g_string_insert_c (out, out->len - 1, '\n');
1169 g_string_append (out, "\n\t");
1173 if (!structured && !is_rfc2047_token (inptr, len)) {
1174 /* check for very long words, just cut them up */
1175 while (outlen + len > GMIME_FOLD_LEN) {
1176 for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
1177 g_string_append_c (out, inptr[i]);
1178 inptr += GMIME_FOLD_LEN - outlen;
1179 len -= GMIME_FOLD_LEN - outlen;
1180 g_string_append (out, "\n\t");
1184 g_string_append_len (out, inptr, len);
1188 last_was_lwsp = FALSE;
1189 } else if (len > 0) {
1190 g_string_append_len (out, inptr, len);
1193 last_was_lwsp = FALSE;
1195 last_was_lwsp = TRUE;
1196 if (*inptr == '\t') {
1197 /* tabs are a good place to fold, odds
1198 are that this is where the previous
1200 g_string_append (out, "\n\t");
1202 while (is_blank (*inptr))
1205 g_string_append_c (out, *inptr++);
1211 if (*inptr == '\n' && out->str[out->len - 1] != '\n')
1212 g_string_append_c (out, '\n');
1215 g_string_free (out, FALSE);
1222 * g_mime_utils_structured_header_fold:
1223 * @str: input string
1225 * Folds a structured header according to the rules in rfc822.
1227 * Returns: an allocated string containing the folded header.
1230 g_mime_utils_structured_header_fold (const char *str)
1232 return header_fold (str, TRUE);
1237 * g_mime_utils_unstructured_header_fold:
1238 * @str: input string
1240 * Folds an unstructured header according to the rules in rfc822.
1242 * Returns: an allocated string containing the folded header.
1245 g_mime_utils_unstructured_header_fold (const char *str)
1247 return header_fold (str, FALSE);
1252 * g_mime_utils_header_fold:
1253 * @str: input string
1255 * Folds a structured header according to the rules in rfc822.
1257 * Returns: an allocated string containing the folded header.
1260 g_mime_utils_header_fold (const char *str)
1262 return header_fold (str, TRUE);
1267 * g_mime_utils_header_printf:
1268 * @format: string format
1269 * @Varargs: arguments
1271 * Allocates a buffer containing a formatted header specified by the
1274 * Returns: an allocated string containing the folded header specified
1275 * by @format and the following arguments.
1278 g_mime_utils_header_printf (const char *format, ...)
1283 va_start (ap, format);
1284 buf = g_strdup_vprintf (format, ap);
1287 ret = header_fold (buf, TRUE);
1294 need_quotes (const char *string)
1296 gboolean quoted = FALSE;
1304 else if (*inptr == '"')
1306 else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
1317 * g_mime_utils_quote_string:
1318 * @str: input string
1320 * Quotes @string as needed according to the rules in rfc2045.
1322 * Returns: an allocated string containing the escaped and quoted (if
1323 * needed to be) input string. The decision to quote the string is
1324 * based on whether or not the input string contains any 'tspecials'
1325 * as defined by rfc2045.
1328 g_mime_utils_quote_string (const char *str)
1335 out = g_string_new ("");
1337 if ((quote = need_quotes (str)))
1338 g_string_append_c (out, '"');
1340 for (c = str; *c; c++) {
1341 if ((*c == '"' && quote) || *c == '\\')
1342 g_string_append_c (out, '\\');
1344 g_string_append_c (out, *c);
1348 g_string_append_c (out, '"');
1351 g_string_free (out, FALSE);
1358 * g_mime_utils_unquote_string:
1359 * @str: input string
1361 * Unquotes and unescapes a string.
1364 g_mime_utils_unquote_string (char *str)
1366 /* if the string is quoted, unquote it */
1367 register char *inptr = str;
1368 int escaped = FALSE;
1375 if (*inptr == '\\') {
1381 } else if (*inptr == '"') {
1400 * g_mime_utils_text_is_8bit:
1401 * @text: text to check for 8bit chars
1404 * Determines if @text contains 8bit characters within the first @len
1407 * Returns: %TRUE if the text contains 8bit characters or %FALSE
1411 g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
1413 register const unsigned char *inptr;
1414 const unsigned char *inend;
1416 g_return_val_if_fail (text != NULL, FALSE);
1419 for (inptr = text; *inptr && inptr < inend; inptr++)
1420 if (*inptr > (unsigned char) 127)
1428 * g_mime_utils_best_encoding:
1429 * @text: text to encode
1432 * Determines the best content encoding for the first @len bytes of
1435 * Returns: a #GMimeContentEncoding that is determined to be the best
1436 * encoding type for the specified block of text. ("best" in this
1437 * particular case means smallest output size)
1439 GMimeContentEncoding
1440 g_mime_utils_best_encoding (const unsigned char *text, size_t len)
1442 const unsigned char *ch, *inend;
1446 for (ch = text; ch < inend; ch++)
1447 if (*ch > (unsigned char) 127)
1450 if ((float) count <= len * 0.17)
1451 return GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE;
1453 return GMIME_CONTENT_ENCODING_BASE64;
1459 * @cd: iconv converter
1460 * @inbuf: input text buffer to convert
1461 * @inleft: length of the input buffer
1462 * @outp: pointer to output buffer
1463 * @outlenp: pointer to output buffer length
1464 * @ninval: the number of invalid bytes in @inbuf
1466 * Converts the input buffer from one charset to another using the
1467 * @cd. On completion, @outp will point to the output buffer
1468 * containing the converted text (nul-terminated), @outlenp will be
1469 * the size of the @outp buffer (note: not the strlen() of @outp) and
1470 * @ninval will contain the number of bytes which could not be
1473 * Bytes which cannot be converted from @inbuf will appear as '?'
1474 * characters in the output buffer.
1476 * If *@outp is non-NULL, then it is assumed that it points to a
1477 * pre-allocated buffer of length *@outlenp. This is done so that the
1478 * same output buffer can be reused multiple times.
1480 * Returns: the string length of the output buffer.
1483 charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size_t *outlenp, size_t *ninval)
1485 size_t outlen, outleft, rc, n = 0;
1488 if (*outp == NULL) {
1489 outleft = outlen = (inleft * 2) + 16;
1490 outbuf = out = g_malloc (outlen + 1);
1492 outleft = outlen = *outlenp;
1493 outbuf = out = *outp;
1497 rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
1498 if (rc == (size_t) -1) {
1499 if (errno == EINVAL) {
1500 /* incomplete sequence at the end of the input buffer */
1506 /* seems that GnuWin32's libiconv 1.9 does not set errno in
1507 * the E2BIG case, so we have to fake it */
1508 if (outleft <= inleft)
1512 if (errno == E2BIG) {
1513 /* need to grow the output buffer */
1514 outlen += (inleft * 2) + 16;
1515 rc = (size_t) (outbuf - out);
1516 out = g_realloc (out, outlen + 1);
1517 outleft = outlen - rc;
1520 /* invalid byte(-sequence) in the input buffer */
1528 } while (inleft > 0);
1530 iconv (cd, NULL, NULL, &outbuf, &outleft);
1537 return (outbuf - out);
1541 #define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
1542 #define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
1546 * g_mime_utils_decode_8bit:
1547 * @text: input text in unknown 8bit/multibyte character set
1548 * @len: input text length
1550 * Attempts to convert text in an unknown 8bit/multibyte charset into
1551 * UTF-8 by finding the charset which will convert the most bytes into
1552 * valid UTF-8 characters as possible. If no exact match can be found,
1553 * it will choose the best match and convert invalid byte sequences
1554 * into question-marks (?) in the returned string buffer.
1556 * Returns: a UTF-8 string representation of @text.
1559 g_mime_utils_decode_8bit (const char *text, size_t len)
1561 const char **charsets, **user_charsets, *locale, *best;
1562 size_t outleft, outlen, min, ninval;
1563 unsigned int included = 0;
1568 g_return_val_if_fail (text != NULL, NULL);
1570 locale = g_mime_locale_charset ();
1571 if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
1572 included |= USER_CHARSETS_INCLUDE_LOCALE;
1574 if ((user_charsets = g_mime_user_charsets ())) {
1575 while (user_charsets[i])
1579 charsets = g_alloca (sizeof (char *) * (i + 3));
1582 if (user_charsets) {
1583 while (user_charsets[i]) {
1584 /* keep a record of whether or not the user-supplied
1585 * charsets include UTF-8 and/or the default fallback
1586 * charset so that we avoid doubling our efforts for
1587 * these 2 charsets. We could have used a hash table
1588 * to keep track of unique charsets, but we can
1589 * (hopefully) assume that user_charsets is a unique
1590 * list of charsets with no duplicates. */
1591 if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
1592 included |= USER_CHARSETS_INCLUDE_UTF8;
1594 if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
1595 included |= USER_CHARSETS_INCLUDE_LOCALE;
1597 charsets[i] = user_charsets[i];
1602 if (!(included & USER_CHARSETS_INCLUDE_UTF8))
1603 charsets[i++] = "UTF-8";
1605 if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
1606 charsets[i++] = locale;
1613 outleft = (len * 2) + 16;
1614 out = g_malloc (outleft + 1);
1616 for (i = 0; charsets[i]; i++) {
1617 if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
1620 outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
1622 g_mime_iconv_close (cd);
1625 return g_realloc (out, outlen + 1);
1633 /* if we get here, then none of the charsets fit the 8bit text flawlessly...
1634 * try to find the one that fit the best and use that to convert what we can,
1635 * replacing any byte we can't convert with a '?' */
1637 if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
1638 /* this shouldn't happen... but if we are here, then
1639 * it did... the only thing we can do at this point
1640 * is replace the 8bit garbage and pray */
1641 register const char *inptr = text;
1642 const char *inend = inptr + len;
1645 while (inptr < inend) {
1646 if (is_ascii (*inptr))
1647 *outbuf++ = *inptr++;
1654 return g_realloc (out, (size_t) (outbuf - out));
1657 outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
1659 g_mime_iconv_close (cd);
1661 return g_realloc (out, outlen + 1);
1665 /* this decodes rfc2047's version of quoted-printable */
1667 quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
1669 register const unsigned char *inptr;
1670 register unsigned char *outptr;
1671 const unsigned char *inend;
1672 unsigned char c, c1;
1678 while (inptr < inend) {
1681 if (inend - inptr >= 2) {
1682 c = toupper (*inptr++);
1683 c1 = toupper (*inptr++);
1684 *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
1685 | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
1687 /* data was truncated */
1690 } else if (c == '_') {
1691 /* _'s are an rfc2047 shortcut for encoding spaces */
1698 return (ssize_t) (outptr - out);
1701 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1704 rfc2047_decode_word (const char *in, size_t inlen)
1706 const unsigned char *instart = (const unsigned char *) in;
1707 const register unsigned char *inptr = instart + 2;
1708 const unsigned char *inend = instart + inlen - 2;
1709 unsigned char *decoded;
1710 const char *charset;
1719 /* skip over the charset */
1720 if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1729 len = (size_t) (inend - inptr);
1730 decoded = g_alloca (len);
1731 declen = g_mime_encoding_base64_decode_step (inptr, len, decoded, &state, &save);
1734 d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
1741 len = (size_t) (inend - inptr);
1742 decoded = g_alloca (len);
1743 declen = quoted_decode (inptr, len, decoded);
1746 d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
1751 d(fprintf (stderr, "unknown encoding\n"));
1755 len = (inptr - 3) - (instart + 2);
1756 charenc = g_alloca (len + 1);
1757 memcpy (charenc, in + 2, len);
1758 charenc[len] = '\0';
1761 /* rfc2231 updates rfc2047 encoded words...
1762 * The ABNF given in RFC 2047 for encoded-words is:
1763 * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1764 * This specification changes this ABNF to:
1765 * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1768 /* trim off the 'language' part if it's there... */
1769 if ((p = strchr (charset, '*')))
1772 /* slight optimization? */
1773 if (!g_ascii_strcasecmp (charset, "UTF-8")) {
1774 p = (char *) decoded;
1777 //while (!g_utf8_validate (p, len, (const char **) &p)) {
1778 // len = declen - (p - (char *) decoded);
1782 return g_strndup ((char *) decoded, declen);
1785 if (!charset[0] || (cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
1786 w(g_warning ("Cannot convert from %s to UTF-8, header display may "
1787 "be corrupt: %s", charset[0] ? charset : "unspecified charset",
1788 g_strerror (errno)));
1790 return g_mime_utils_decode_8bit ((char *) decoded, declen);
1794 buf = g_malloc (len + 1);
1796 charset_convert (cd, (char *) decoded, declen, &buf, &len, &ninval);
1798 g_mime_iconv_close (cd);
1802 g_warning ("Failed to completely convert \"%.*s\" to UTF-8, display may be "
1803 "corrupt: %s", declen, decoded, g_strerror (errno));
1812 * g_mime_utils_header_decode_text:
1813 * @text: header text to decode
1815 * Decodes an rfc2047 encoded 'text' header.
1817 * Note: See g_mime_set_user_charsets() for details on how charset
1818 * conversion is handled for unencoded 8bit text and/or wrongly
1819 * specified rfc2047 encoded-word tokens.
1821 * Returns: a newly allocated UTF-8 string representing the the decoded
1825 g_mime_utils_header_decode_text (const char *text)
1827 gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
1828 register const char *inptr = text;
1829 gboolean encoded = FALSE;
1830 const char *lwsp, *word;
1837 return g_strdup ("");
1839 out = g_string_sized_new (strlen (text) + 1);
1841 while (*inptr != '\0') {
1843 while (is_lwsp (*inptr))
1846 nlwsp = (size_t) (inptr - lwsp);
1848 if (*inptr != '\0') {
1852 if (enable_rfc2047_workarounds) {
1853 if (!strncmp (inptr, "=?", 2)) {
1856 /* skip past the charset (if one is even declared, sigh) */
1857 while (*inptr && *inptr != '?') {
1858 ascii = ascii && is_ascii (*inptr);
1862 /* sanity check encoding type */
1863 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
1868 /* find the end of the rfc2047 encoded word token */
1869 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1870 ascii = ascii && is_ascii (*inptr);
1874 if (!strncmp (inptr, "?=", 2))
1878 /* stop if we encounter a possible rfc2047 encoded
1879 * token even if it's inside another word, sigh. */
1880 while (*inptr && !is_lwsp (*inptr) &&
1881 strncmp (inptr, "=?", 2) != 0) {
1882 ascii = ascii && is_ascii (*inptr);
1887 while (*inptr && !is_lwsp (*inptr)) {
1888 ascii = ascii && is_ascii (*inptr);
1893 n = (size_t) (inptr - word);
1894 if (is_rfc2047_encoded_word (word, n)) {
1895 if ((decoded = rfc2047_decode_word (word, n))) {
1896 /* rfc2047 states that you must ignore all
1897 * whitespace between encoded words */
1899 g_string_append_len (out, lwsp, nlwsp);
1901 g_string_append (out, decoded);
1906 /* append lwsp and invalid rfc2047 encoded-word token */
1907 g_string_append_len (out, lwsp, nlwsp + n);
1912 g_string_append_len (out, lwsp, nlwsp);
1914 /* append word token */
1916 /* *sigh* I hate broken mailers... */
1917 decoded = g_mime_utils_decode_8bit (word, n);
1918 g_string_append (out, decoded);
1921 g_string_append_len (out, word, n);
1927 /* appending trailing lwsp */
1928 g_string_append_len (out, lwsp, nlwsp);
1934 g_string_free (out, FALSE);
1941 * g_mime_utils_header_decode_phrase:
1942 * @phrase: header to decode
1944 * Decodes an rfc2047 encoded 'phrase' header.
1946 * Note: See g_mime_set_user_charsets() for details on how charset
1947 * conversion is handled for unencoded 8bit text and/or wrongly
1948 * specified rfc2047 encoded-word tokens.
1950 * Returns: a newly allocated UTF-8 string representing the the decoded
1954 g_mime_utils_header_decode_phrase (const char *phrase)
1956 register const char *inptr = phrase;
1957 gboolean encoded = FALSE;
1958 const char *lwsp, *text;
1965 return g_strdup ("");
1967 out = g_string_sized_new (strlen (phrase) + 1);
1969 while (*inptr != '\0') {
1971 while (is_lwsp (*inptr))
1974 nlwsp = (size_t) (inptr - lwsp);
1977 if (is_atom (*inptr)) {
1978 while (is_atom (*inptr))
1981 n = (size_t) (inptr - text);
1982 if (is_rfc2047_encoded_word (text, n)) {
1983 if ((decoded = rfc2047_decode_word (text, n))) {
1984 /* rfc2047 states that you must ignore all
1985 * whitespace between encoded words */
1987 g_string_append_len (out, lwsp, nlwsp);
1989 g_string_append (out, decoded);
1994 /* append lwsp and invalid rfc2047 encoded-word token */
1995 g_string_append_len (out, lwsp, nlwsp + n);
1999 /* append lwsp and atom token */
2000 g_string_append_len (out, lwsp, nlwsp + n);
2004 g_string_append_len (out, lwsp, nlwsp);
2007 while (*inptr && !is_lwsp (*inptr)) {
2008 ascii = ascii && is_ascii (*inptr);
2012 n = (size_t) (inptr - text);
2015 /* *sigh* I hate broken mailers... */
2016 decoded = g_mime_utils_decode_8bit (text, n);
2017 g_string_append (out, decoded);
2020 g_string_append_len (out, text, n);
2028 g_string_free (out, FALSE);
2034 /* rfc2047 version of quoted-printable */
2036 quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask)
2038 register const unsigned char *inptr = (const unsigned char *) in;
2039 const unsigned char *inend = inptr + len;
2040 register unsigned char *outptr = out;
2043 while (inptr < inend) {
2047 } else if (c != '_' && gmime_special_table[c] & safemask) {
2051 *outptr++ = tohex[(c >> 4) & 0xf];
2052 *outptr++ = tohex[c & 0xf];
2056 return (outptr - out);
2060 rfc2047_encode_word (GString *string, const char *word, size_t len,
2061 const char *charset, gushort safemask)
2063 register char *inptr, *outptr;
2064 iconv_t cd = (iconv_t) -1;
2065 unsigned char *encoded;
2072 if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
2073 cd = g_mime_iconv_open (charset, "UTF-8");
2075 if (cd != (iconv_t) -1) {
2076 uword = g_mime_iconv_strndup (cd, (char *) word, len);
2077 g_mime_iconv_close (cd);
2081 len = strlen (uword);
2087 switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) {
2088 case GMIME_CONTENT_ENCODING_BASE64:
2089 enclen = GMIME_BASE64_ENCODE_LEN (len);
2090 encoded = g_alloca (enclen + 1);
2094 pos = g_mime_encoding_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save);
2095 encoded[pos] = '\0';
2097 /* remove \n chars as headers need to be wrapped differently */
2098 if (G_UNLIKELY ((inptr = strchr ((char *) encoded, '\n')))) {
2100 while (G_LIKELY (*inptr)) {
2101 if (G_LIKELY (*inptr != '\n'))
2111 case GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE:
2112 enclen = GMIME_QP_ENCODE_LEN (len);
2113 encoded = g_alloca (enclen + 1);
2117 pos = quoted_encode (word, len, encoded, safemask);
2118 encoded[pos] = '\0';
2124 g_assert_not_reached ();
2129 g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded);
2139 typedef struct _rfc822_word {
2140 struct _rfc822_word *next;
2141 const char *start, *end;
2146 #define rfc822_word_free(word) g_slice_free (rfc822_word, word)
2147 #define rfc822_word_new() g_slice_new (rfc822_word)
2149 /* okay, so 'unstructured text' fields don't actually contain 'word'
2150 * tokens, but we can group stuff similarly... */
2151 static rfc822_word *
2152 rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase)
2154 rfc822_word *words, *tail, *word;
2155 rfc822_word_t type = WORD_ATOM;
2156 const char *inptr, *start, *last;
2157 int count = 0, encoding = 0;
2160 tail = (rfc822_word *) &words;
2162 last = start = inptr = in;
2163 while (inptr && *inptr) {
2164 const char *newinptr;
2167 newinptr = g_utf8_next_char (inptr);
2168 c = g_utf8_get_char (inptr);
2169 if (newinptr == NULL || !g_unichar_validate (c)) {
2170 w(g_warning ("Invalid UTF-8 sequence encountered"));
2177 if (c < 256 && is_lwsp (c)) {
2179 word = rfc822_word_new ();
2181 word->start = start;
2184 word->encoding = encoding;
2196 if (phrase && c < 128) {
2197 /* phrases can have qstring words */
2199 type = MAX (type, WORD_QSTRING);
2200 } else if (c > 127 && c < 256) {
2202 encoding = MAX (encoding, 1);
2203 } else if (c >= 256) {
2208 if (count >= GMIME_FOLD_PREENCODED) {
2209 word = rfc822_word_new ();
2211 word->start = start;
2214 word->encoding = encoding;
2220 /* Note: don't reset 'type' as it
2221 * needs to be preserved when breaking
2232 word = rfc822_word_new ();
2234 word->start = start;
2237 word->encoding = encoding;
2244 printf ("rfc822 word tokens:\n");
2247 printf ("\t'%.*s'; type=%d, encoding=%d\n",
2248 word->end - word->start, word->start,
2249 word->type, word->encoding);
2258 #define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8))
2261 should_merge_words (rfc822_word *word, rfc822_word *next)
2263 switch (word->type) {
2265 if (next->type == WORD_2047)
2268 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, next->type));
2270 /* avoid merging with words that need to be rfc2047 encoded */
2271 if (next->type == WORD_2047)
2274 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_QSTRING));
2276 if (next->type == WORD_ATOM) {
2277 /* whether we merge or not is dependent upon:
2278 * 1. the number of atoms in a row after 'word'
2279 * 2. if there is another encword after the string of atoms.
2283 while (next && next->type == WORD_ATOM) {
2288 /* if all the words after the encword are atoms, don't merge */
2289 if (!next || natoms > 3)
2293 /* avoid merging with qstrings */
2294 if (next->type == WORD_QSTRING)
2297 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_2047));
2304 rfc2047_encode_merge_rfc822_words (rfc822_word **wordsp)
2306 rfc822_word *word, *next, *words = *wordsp;
2308 /* first pass: merge qstrings with adjacent qstrings and encwords with adjacent encwords */
2310 while (word && word->next) {
2313 if (word->type != WORD_ATOM && word->type == next->type &&
2314 MERGED_WORD_LT_FOLDLEN (next->end - word->start, word->type)) {
2315 /* merge the words */
2316 word->encoding = MAX (word->encoding, next->encoding);
2318 word->end = next->end;
2319 word->next = next->next;
2321 rfc822_word_free (next);
2329 /* second pass: now merge atoms with the other words */
2331 while (word && word->next) {
2334 if (should_merge_words (word, next)) {
2335 /* the resulting word type is the MAX of the 2 types */
2336 word->type = MAX (word->type, next->type);
2338 word->encoding = MAX (word->encoding, next->encoding);
2340 word->end = next->end;
2341 word->next = next->next;
2343 rfc822_word_free (next);
2355 g_string_append_len_quoted (GString *out, const char *in, size_t len)
2357 register const char *inptr;
2360 g_string_append_c (out, '"');
2365 while (inptr < inend) {
2366 if (*inptr == '"' || *inptr == '\\')
2367 g_string_append_c (out, '\\');
2369 g_string_append_c (out, *inptr);
2374 g_string_append_c (out, '"');
2378 rfc2047_encode (const char *in, gushort safemask)
2380 rfc822_word *words, *word, *prev = NULL;
2381 const char **charsets, *charset;
2389 if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE)))
2390 return g_strdup (in);
2392 rfc2047_encode_merge_rfc822_words (&words);
2394 charsets = g_mime_user_charsets ();
2396 out = g_string_new ("");
2398 /* output words now with spaces between them */
2401 /* append correct number of spaces between words */
2402 if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
2403 /* one or both of the words are not encoded so we write the spaces out untouched */
2404 len = word->start - prev->end;
2405 g_string_append_len (out, prev->end, len);
2408 switch (word->type) {
2410 g_string_append_len (out, word->start, (size_t) (word->end - word->start));
2413 g_assert (safemask & IS_PSAFE);
2414 g_string_append_len_quoted (out, word->start, (size_t) (word->end - word->start));
2417 if (prev && prev->type == WORD_2047) {
2418 /* include the whitespace chars between these 2 words in the
2419 resulting rfc2047 encoded word. */
2420 len = word->end - prev->end;
2423 /* encoded words need to be separated by linear whitespace */
2424 g_string_append_c (out, ' ');
2426 len = word->end - word->start;
2427 start = word->start;
2430 switch (word->encoding) {
2431 case 0: /* us-ascii */
2432 rfc2047_encode_word (out, start, len, "us-ascii", safemask);
2434 case 1: /* iso-8859-1 */
2435 rfc2047_encode_word (out, start, len, "iso-8859-1", safemask);
2439 g_mime_charset_init (&mask);
2440 g_mime_charset_step (&mask, start, len);
2442 for (i = 0; charsets && charsets[i]; i++) {
2443 if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) {
2444 charset = charsets[i];
2450 charset = g_mime_charset_best_name (&mask);
2452 rfc2047_encode_word (out, start, len, charset, safemask);
2459 rfc822_word_free (prev);
2465 rfc822_word_free (prev);
2468 g_string_free (out, FALSE);
2475 * g_mime_utils_header_encode_phrase:
2476 * @phrase: phrase to encode
2478 * Encodes a 'phrase' header according to the rules in rfc2047.
2480 * Returns: the encoded 'phrase'. Useful for encoding internet
2484 g_mime_utils_header_encode_phrase (const char *phrase)
2489 return rfc2047_encode (phrase, IS_PSAFE);
2494 * g_mime_utils_header_encode_text:
2495 * @text: text to encode
2497 * Encodes a 'text' header according to the rules in rfc2047.
2499 * Returns: the encoded header. Useful for encoding
2500 * headers like "Subject".
2503 g_mime_utils_header_encode_text (const char *text)
2508 return rfc2047_encode (text, IS_ESAFE);