1 /* date.c - Date-parsing utility for the notmuch mail system.
3 * Copyright © 2000-2009 Jeffrey Stedfast
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/
19 /* This code was originally written by from Jeffrey Stedfast
20 * as part of his GMime library (http://spruce.sourceforge.net/gmime/)
22 * Carl Worth <cworth@cworth.org> imported it into notmuch and removed
37 #ifdef HAVE_SYS_PARAM_H
38 #include <sys/param.h> /* for MAXHOSTNAMELEN */
40 #define MAXHOSTNAMELEN 64
42 #ifdef HAVE_UTSNAME_DOMAINNAME
43 #include <sys/utsname.h> /* for uname() */
45 #include <sys/types.h>
47 #include <unistd.h> /* Unix header for getpid() */
53 #define getpid() _getpid()
61 #include "gmime-utils.h"
62 #include "gmime-table-private.h"
63 #include "gmime-parse-utils.h"
64 #include "gmime-part.h"
65 #include "gmime-charset.h"
66 #include "gmime-iconv.h"
67 #include "gmime-iconv-utils.h"
69 #ifdef ENABLE_WARNINGS
73 #endif /* ENABLE_WARNINGS */
77 #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
79 /* date parser macros */
80 #define NUMERIC_CHARS "1234567890"
81 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
82 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
83 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
84 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
85 #define TIME_CHARS "1234567890:"
87 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
88 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
89 #define DATE_TOKEN_NON_MONTH (1 << 2)
90 #define DATE_TOKEN_NON_TIME (1 << 3)
91 #define DATE_TOKEN_HAS_COLON (1 << 4)
92 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
93 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
94 #define DATE_TOKEN_HAS_SIGN (1 << 7)
96 static unsigned char gmime_datetok_table[256] = {
97 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
98 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
99 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
100 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
101 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
102 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
103 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
104 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
105 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
106 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
107 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
108 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
109 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
110 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
112 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
115 /* hrm, is there a library for this shit? */
122 { "EST", -500 }, /* these are all US timezones. bloody yanks */
137 static char *tm_months[] = {
138 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
139 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
142 static char *tm_days[] = {
143 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
146 /* This is where it gets ugly... */
148 typedef struct _date_token {
149 struct _date_token *next;
155 #define date_token_free(tok) g_slice_free (date_token, tok)
156 #define date_token_new() g_slice_new (date_token)
159 datetok (const char *date)
161 date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
162 const char *start, *end;
167 /* kill leading whitespace */
168 while (*start == ' ' || *start == '\t')
174 mask = gmime_datetok_table[(unsigned char) *start];
176 /* find the end of this token */
178 while (*end && !strchr ("-/,\t\r\n ", *end))
179 mask |= gmime_datetok_table[(unsigned char) *end++];
182 token = date_token_new ();
184 token->start = start;
185 token->len = end - start;
202 decode_int (const char *in, size_t inlen)
204 register const char *inptr;
205 int sign = 1, val = 0;
214 } else if (*inptr == '+')
217 for ( ; inptr < inend; inptr++) {
218 if (!(*inptr >= '0' && *inptr <= '9'))
221 val = (val * 10) + (*inptr - '0');
231 get_days_in_month (int month, int year)
248 if (g_date_is_leap_year (year))
259 get_wday (const char *in, size_t inlen)
263 g_return_val_if_fail (in != NULL, -1);
268 for (wday = 0; wday < 7; wday++) {
269 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
273 return -1; /* unknown week day */
277 get_mday (const char *in, size_t inlen)
281 g_return_val_if_fail (in != NULL, -1);
283 mday = decode_int (in, inlen);
285 if (mday < 0 || mday > 31)
292 get_month (const char *in, size_t inlen)
296 g_return_val_if_fail (in != NULL, -1);
301 for (i = 0; i < 12; i++) {
302 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
306 return -1; /* unknown month */
310 get_year (const char *in, size_t inlen)
314 g_return_val_if_fail (in != NULL, -1);
316 if ((year = decode_int (in, inlen)) == -1)
320 year += (year < 70) ? 2000 : 1900;
329 get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
331 register const char *inptr;
332 int *val, colons = 0;
335 *hour = *min = *sec = 0;
339 for (inptr = in; inptr < inend; inptr++) {
352 } else if (!(*inptr >= '0' && *inptr <= '9'))
355 *val = (*val * 10) + (*inptr - '0');
362 get_tzone (date_token **token)
364 const char *inptr, *inend;
368 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
369 inptr = (*token)->start;
370 inlen = (*token)->len;
371 inend = inptr + inlen;
373 if (*inptr == '+' || *inptr == '-') {
374 return decode_int (inptr, inlen);
378 if (*(inend - 1) == ')')
384 for (t = 0; t < 15; t++) {
385 size_t len = strlen (tz_offsets[t].name);
390 if (!strncmp (inptr, tz_offsets[t].name, len))
391 return tz_offsets[t].offset;
400 mktime_utc (struct tm *tm)
408 #if defined (G_OS_WIN32)
410 if (tm->tm_isdst > 0) {
416 #elif defined (HAVE_TM_GMTOFF)
418 #elif defined (HAVE_TIMEZONE)
419 if (tm->tm_isdst > 0) {
420 #if defined (HAVE_ALTZONE)
422 #else /* !defined (HAVE_ALTZONE) */
423 tz = (timezone - 3600);
428 #elif defined (HAVE__TIMEZONE)
431 #error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
438 parse_rfc822_date (date_token *tokens, int *tzone)
440 int hour, min, sec, offset, n;
445 g_return_val_if_fail (tokens != NULL, (time_t) 0);
449 memset ((void *) &tm, 0, sizeof (struct tm));
451 if ((n = get_wday (token->start, token->len)) != -1) {
452 /* not all dates may have this... */
458 if (!token || (n = get_mday (token->start, token->len)) == -1)
465 if (!token || (n = get_month (token->start, token->len)) == -1)
472 if (!token || (n = get_year (token->start, token->len)) == -1)
475 tm.tm_year = n - 1900;
478 /* get the hour/min/sec */
479 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
487 /* get the timezone */
488 if (!token || (n = get_tzone (&token)) == -1) {
489 /* I guess we assume tz is GMT? */
495 t = mktime_utc (&tm);
497 /* t is now GMT of the time we want, but not offset by the timezone ... */
499 /* this should convert the time to the GMT equiv time */
500 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
509 #define date_token_mask(t) (((date_token *) t)->mask)
510 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
511 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
512 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
513 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
514 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
515 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
516 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
519 parse_broken_date (date_token *tokens, int *tzone)
521 gboolean got_wday, got_month, got_tzone;
522 int hour, min, sec, offset, n;
527 memset ((void *) &tm, 0, sizeof (struct tm));
528 got_wday = got_month = got_tzone = FALSE;
533 if (is_weekday (token) && !got_wday) {
534 if ((n = get_wday (token->start, token->len)) != -1) {
535 d(printf ("weekday; "));
542 if (is_month (token) && !got_month) {
543 if ((n = get_month (token->start, token->len)) != -1) {
544 d(printf ("month; "));
551 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
552 if (get_time (token->start, token->len, &hour, &min, &sec)) {
553 d(printf ("time; "));
561 if (is_tzone (token) && !got_tzone) {
562 date_token *t = token;
564 if ((n = get_tzone (&t)) != -1) {
565 d(printf ("tzone; "));
572 if (is_numeric (token)) {
573 if (token->len == 4 && !tm.tm_year) {
574 if ((n = get_year (token->start, token->len)) != -1) {
575 d(printf ("year; "));
576 tm.tm_year = n - 1900;
580 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
581 if (!got_month && token->next && is_numeric (token->next)) {
582 if ((n = decode_int (token->start, token->len)) > 12) {
590 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
592 d(printf ("mday; "));
595 } else if (!tm.tm_year) {
596 if ((n = get_year (token->start, token->len)) != -1) {
597 d(printf ("2-digit year; "));
598 tm.tm_year = n - 1900;
614 t = mktime_utc (&tm);
616 /* t is now GMT of the time we want, but not offset by the timezone ... */
618 /* this should convert the time to the GMT equiv time */
619 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
629 gmime_datetok_table_init (void)
633 memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
635 for (i = 0; i < 256; i++) {
636 if (!strchr (NUMERIC_CHARS, i))
637 gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
639 if (!strchr (WEEKDAY_CHARS, i))
640 gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
642 if (!strchr (MONTH_CHARS, i))
643 gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
645 if (!strchr (TIME_CHARS, i))
646 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
648 if (!strchr (TIMEZONE_ALPHA_CHARS, i))
649 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
651 if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
652 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
654 if (((char) i) == ':')
655 gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
657 if (strchr ("+-", i))
658 gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
661 printf ("static unsigned char gmime_datetok_table[256] = {");
662 for (i = 0; i < 256; i++) {
665 printf ("%3d,", gmime_datetok_table[i]);
673 * g_mime_utils_header_decode_date:
674 * @str: input date string
675 * @tz_offset: timezone offset
677 * Decodes the rfc822 date string and saves the GMT offset into
678 * @tz_offset if non-NULL.
680 * Returns: the time_t representation of the date string specified by
681 * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
682 * of the timezone offset will be stored.
685 g_mime_utils_header_decode_date (const char *str, int *tz_offset)
687 date_token *token, *tokens;
690 if (!(tokens = datetok (str))) {
697 if (!(date = parse_rfc822_date (tokens, tz_offset)))
698 date = parse_broken_date (tokens, tz_offset);
703 tokens = tokens->next;
704 date_token_free (token);