1 /* date.c - Date-parsing utility for the notmuch mail system.
3 * Copyright © 2000-2009 Jeffrey Stedfast
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/
19 /* This code was originally written by from Jeffrey Stedfast
20 * as part of his GMime library (http://spruce.sourceforge.net/gmime/)
22 * Carl Worth <cworth@cworth.org> imported it into notmuch and removed
26 #include "notmuch-private.h"
40 #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
42 /* date parser macros */
43 #define NUMERIC_CHARS "1234567890"
44 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
45 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
46 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
47 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
48 #define TIME_CHARS "1234567890:"
50 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
51 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
52 #define DATE_TOKEN_NON_MONTH (1 << 2)
53 #define DATE_TOKEN_NON_TIME (1 << 3)
54 #define DATE_TOKEN_HAS_COLON (1 << 4)
55 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
56 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
57 #define DATE_TOKEN_HAS_SIGN (1 << 7)
59 static unsigned char gmime_datetok_table[256] = {
60 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
61 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
62 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
63 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
64 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
65 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
66 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
67 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
68 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
69 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
70 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
71 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
72 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
73 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
74 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
75 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
78 /* hrm, is there a library for this shit? */
85 { "EST", -500 }, /* these are all US timezones. bloody yanks */
100 static char *tm_months[] = {
101 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
102 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
105 static char *tm_days[] = {
106 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
109 /* This is where it gets ugly... */
111 typedef struct _date_token {
112 struct _date_token *next;
118 #define date_token_free(tok) g_slice_free (date_token, tok)
119 #define date_token_new() g_slice_new (date_token)
122 datetok (const char *date)
124 date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
125 const char *start, *end;
130 /* kill leading whitespace */
131 while (*start == ' ' || *start == '\t')
137 mask = gmime_datetok_table[(unsigned char) *start];
139 /* find the end of this token */
141 while (*end && !strchr ("-/,\t\r\n ", *end))
142 mask |= gmime_datetok_table[(unsigned char) *end++];
145 token = date_token_new ();
147 token->start = start;
148 token->len = end - start;
165 decode_int (const char *in, size_t inlen)
167 register const char *inptr;
168 int sign = 1, val = 0;
177 } else if (*inptr == '+')
180 for ( ; inptr < inend; inptr++) {
181 if (!(*inptr >= '0' && *inptr <= '9'))
184 val = (val * 10) + (*inptr - '0');
194 get_days_in_month (int month, int year)
211 if (g_date_is_leap_year (year))
222 get_wday (const char *in, size_t inlen)
226 g_return_val_if_fail (in != NULL, -1);
231 for (wday = 0; wday < 7; wday++) {
232 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
236 return -1; /* unknown week day */
240 get_mday (const char *in, size_t inlen)
244 g_return_val_if_fail (in != NULL, -1);
246 mday = decode_int (in, inlen);
248 if (mday < 0 || mday > 31)
255 get_month (const char *in, size_t inlen)
259 g_return_val_if_fail (in != NULL, -1);
264 for (i = 0; i < 12; i++) {
265 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
269 return -1; /* unknown month */
273 get_year (const char *in, size_t inlen)
277 g_return_val_if_fail (in != NULL, -1);
279 if ((year = decode_int (in, inlen)) == -1)
283 year += (year < 70) ? 2000 : 1900;
292 get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
294 register const char *inptr;
295 int *val, colons = 0;
298 *hour = *min = *sec = 0;
302 for (inptr = in; inptr < inend; inptr++) {
315 } else if (!(*inptr >= '0' && *inptr <= '9'))
318 *val = (*val * 10) + (*inptr - '0');
325 get_tzone (date_token **token)
327 const char *inptr, *inend;
331 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
332 inptr = (*token)->start;
333 inlen = (*token)->len;
334 inend = inptr + inlen;
336 if (*inptr == '+' || *inptr == '-') {
337 return decode_int (inptr, inlen);
341 if (*(inend - 1) == ')')
347 for (t = 0; t < 15; t++) {
348 size_t len = strlen (tz_offsets[t].name);
353 if (!strncmp (inptr, tz_offsets[t].name, len))
354 return tz_offsets[t].offset;
363 mktime_utc (struct tm *tm)
371 #if defined (G_OS_WIN32)
373 if (tm->tm_isdst > 0) {
379 #elif defined (HAVE_TM_GMTOFF)
381 #elif defined (HAVE_TIMEZONE)
382 if (tm->tm_isdst > 0) {
383 #if defined (HAVE_ALTZONE)
385 #else /* !defined (HAVE_ALTZONE) */
386 tz = (timezone - 3600);
391 #elif defined (HAVE__TIMEZONE)
394 #error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
401 parse_rfc822_date (date_token *tokens, int *tzone)
403 int hour, min, sec, offset, n;
408 g_return_val_if_fail (tokens != NULL, (time_t) 0);
412 memset ((void *) &tm, 0, sizeof (struct tm));
414 if ((n = get_wday (token->start, token->len)) != -1) {
415 /* not all dates may have this... */
421 if (!token || (n = get_mday (token->start, token->len)) == -1)
428 if (!token || (n = get_month (token->start, token->len)) == -1)
435 if (!token || (n = get_year (token->start, token->len)) == -1)
438 tm.tm_year = n - 1900;
441 /* get the hour/min/sec */
442 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
450 /* get the timezone */
451 if (!token || (n = get_tzone (&token)) == -1) {
452 /* I guess we assume tz is GMT? */
458 t = mktime_utc (&tm);
460 /* t is now GMT of the time we want, but not offset by the timezone ... */
462 /* this should convert the time to the GMT equiv time */
463 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
472 #define date_token_mask(t) (((date_token *) t)->mask)
473 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
474 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
475 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
476 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
477 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
478 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
479 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
482 parse_broken_date (date_token *tokens, int *tzone)
484 gboolean got_wday, got_month, got_tzone;
485 int hour, min, sec, offset, n;
490 memset ((void *) &tm, 0, sizeof (struct tm));
491 got_wday = got_month = got_tzone = FALSE;
496 if (is_weekday (token) && !got_wday) {
497 if ((n = get_wday (token->start, token->len)) != -1) {
498 d(printf ("weekday; "));
505 if (is_month (token) && !got_month) {
506 if ((n = get_month (token->start, token->len)) != -1) {
507 d(printf ("month; "));
514 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
515 if (get_time (token->start, token->len, &hour, &min, &sec)) {
516 d(printf ("time; "));
524 if (is_tzone (token) && !got_tzone) {
525 date_token *t = token;
527 if ((n = get_tzone (&t)) != -1) {
528 d(printf ("tzone; "));
535 if (is_numeric (token)) {
536 if (token->len == 4 && !tm.tm_year) {
537 if ((n = get_year (token->start, token->len)) != -1) {
538 d(printf ("year; "));
539 tm.tm_year = n - 1900;
543 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
544 if (!got_month && token->next && is_numeric (token->next)) {
545 if ((n = decode_int (token->start, token->len)) > 12) {
553 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
555 d(printf ("mday; "));
558 } else if (!tm.tm_year) {
559 if ((n = get_year (token->start, token->len)) != -1) {
560 d(printf ("2-digit year; "));
561 tm.tm_year = n - 1900;
577 t = mktime_utc (&tm);
579 /* t is now GMT of the time we want, but not offset by the timezone ... */
581 /* this should convert the time to the GMT equiv time */
582 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
592 gmime_datetok_table_init (void)
596 memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
598 for (i = 0; i < 256; i++) {
599 if (!strchr (NUMERIC_CHARS, i))
600 gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
602 if (!strchr (WEEKDAY_CHARS, i))
603 gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
605 if (!strchr (MONTH_CHARS, i))
606 gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
608 if (!strchr (TIME_CHARS, i))
609 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
611 if (!strchr (TIMEZONE_ALPHA_CHARS, i))
612 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
614 if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
615 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
617 if (((char) i) == ':')
618 gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
620 if (strchr ("+-", i))
621 gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
624 printf ("static unsigned char gmime_datetok_table[256] = {");
625 for (i = 0; i < 256; i++) {
628 printf ("%3d,", gmime_datetok_table[i]);
636 * g_mime_utils_header_decode_date:
637 * @str: input date string
638 * @tz_offset: timezone offset
640 * Decodes the rfc822 date string and saves the GMT offset into
641 * @tz_offset if non-NULL.
643 * Returns: the time_t representation of the date string specified by
644 * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
645 * of the timezone offset will be stored.
648 g_mime_utils_header_decode_date (const char *str, int *tz_offset)
650 date_token *token, *tokens;
653 if (!(tokens = datetok (str))) {
660 if (!(date = parse_rfc822_date (tokens, tz_offset)))
661 date = parse_broken_date (tokens, tz_offset);
666 tokens = tokens->next;
667 date_token_free (token);