Add support for simplified ISO 8601/RFC 3339 dates
[mmh] / sbr / dtimep.lex
1 /*
2 ** dtimep.lex exceeds the default table capacities for some old versions
3 ** of lex (and the minimum defaults as specified by POSIX).  The following
4 ** choices meet or exceed the lex defaults for older SunOS4.x, Solaris,
5 ** HPUX, and AIX.
6 */
7 %e4000
8 %p7000
9 %n2500
10 %a5000
11 %{
12 #include <time.h>
13 #include <ctype.h>
14 #include <h/tws.h>
15
16 /*
17 ** Since we're looking at a string at a time, don't worry about
18 ** wrapping to the next buffer.
19 */
20 #define yywrap() 1
21 #define YY_SKIP_YYWRAP
22
23 #define YY_NO_INPUT
24
25 /*
26 ** This is the tricky thing that makes this function cool.  We
27 ** replace the traditional int yylex(void) declaration with our
28 ** dparsetime() declaration, essentially piggy-backing off the
29 ** utility of the yylex() function and adding what we need to make
30 ** the parsing function useful to us.
31 */
32 #define YY_DECL struct tws *dparsetime(char *lexstr)
33
34 /*
35 ** yyerminate() is called after the input string is matched to
36 ** completion (actually, when the lexer reaches an EOF).  The only
37 ** thing that really needs to be in this macro function is the
38 ** return call, which must be substituted inline into dparsetime.
39 */
40
41 #define yyterminate() (void)yy_delete_buffer(lexhandle); \
42         if(!(tw.tw_flags & TW_SUCC)) { \
43                 return (struct tws *)NULL; \
44         } \
45         if(tw.tw_year < 1970) \
46                 tw.tw_year += 1900; \
47         if(tw.tw_year < 1970) \
48                 tw.tw_year += 100; \
49         return(&tw)
50
51 /*
52 ** Patchable flag that says how to interpret NN/NN/NN dates. When
53 ** true, we do it European style: DD/MM/YY. When false, we do it
54 ** American style: MM/DD/YY.  Of course, these are all non-RFC822
55 ** compliant.
56 */
57 int europeandate = 0;
58
59 static char *monthnames[] = {
60         "Jan", "Feb", "Mar", "Apr", "May", "Jun",
61         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
62         NULL
63 };
64
65 static char *daynames[] = {
66         "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
67 };
68
69 static int
70 name2num(char *name, char *names[])
71 {
72         int i;
73
74         for (i=0; names[i]; i++) {
75                 if (strncasecmp(name, names[i], strlen(names[i]))==0) {
76                         return i;
77                 }
78         }
79         return 0;
80 }
81
82 /*
83 ** The SET* macros will parse for the appropriate field, and leave the
84 ** cp pointer at the first character after the desired field. Be
85 ** careful with variable-length fields or alpha-num mixes.
86 **
87 ** The SKIP* macros skip over characters of a particular class and
88 ** leave cp at the position of the first character that doesn't match
89 ** that class. Correspondingly, SKIPTO* skips until it reaches a
90 ** character of a particular class.
91 */
92
93 #define INIT() { cp = yytext;}
94 #define SETWDAY()  { tw.tw_wday = name2num(cp, daynames); \
95         tw.tw_flags &= ~TW_SDAY; tw.tw_flags |= TW_SEXP; SKIPA(); }
96 #define SETMON()  { tw.tw_mon = name2num(cp, monthnames); SKIPA(); }
97 #define SETMON_NUM()  { tw.tw_mon = atoi(cp)-1; SKIPD(); }
98 #define SETYEAR()  { tw.tw_year = atoi(cp); SKIPD(); }
99 #define SETDAY()  { tw.tw_mday = atoi(cp); tw.tw_flags |= TW_YES; SKIPD(); }
100 #define SETTIME()  { tw.tw_hour = atoi(cp); cp += 2; SKIPTOD(); \
101         tw.tw_min = atoi(cp); cp += 2; if(*cp == ':') { \
102         tw.tw_sec = atoi(++cp); SKIPD(); } }
103 #define SETZONE(x)  { tw.tw_zone = ((x)/100)*60+(x)%100; \
104         tw.tw_flags |= TW_SZEXP; SKIPD(); }
105 #define SETZONEC(h, m)  { tw.tw_zone = (h)*60+(m); \
106         tw.tw_flags |= TW_SZEXP; SKIPD(); }
107 #define SETDST()  { tw.tw_flags |= TW_DST; }
108 #define SKIPD()  { while ( isdigit(*cp++) ) ; --cp; }
109 #define SKIPTOD()  { while ( !isdigit(*cp++) ) ; --cp; }
110 #define SKIPA()  { while ( isalpha(*cp++) ) ; --cp; }
111 #define SKIPTOA()  { while ( !isalpha(*cp++) ) ; --cp; }
112 #define SKIPSP()  { while ( isspace(*cp++) ) ; --cp; }
113 #define SKIPTOSP()  { while ( !isspace(*cp++) ) ; --cp; }
114
115 #ifdef ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST
116 # ifdef HAVE_SYS_TIME_H
117 #  include <sys/time.h>
118 # endif
119 #include <time.h>
120
121 static void
122 zonehack (struct tws *tw)
123 {
124         struct tm *tm;
125
126         if (dmktime (tw) == (time_t) -1)
127                 return;
128
129         tm = localtime (&tw->tw_clock);
130         if (tm->tm_isdst) {
131                 tw->tw_flags |= TW_DST;
132                 tw->tw_zone -= 60;
133         }
134 }
135 #endif /* ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST */
136 %}
137
138 sun     ([Ss]un(day)?)
139 mon     ([Mm]on(day)?)
140 tue     ([Tt]ue(sday)?)
141 wed     ([Ww]ed(nesday)?)
142 thu     ([Tt]hu(rsday)?)
143 fri     ([Ff]ri(day)?)
144 sat     ([Ss]at(urday)?)
145
146 DAY     ({sun}|{mon}|{tue}|{wed}|{thu}|{fri}|{sat})
147
148 jan     ([Jj]an(uary)?)
149 feb     ([Ff]eb(ruary)?)
150 mar     ([Mm]ar(ch)?)
151 apr     ([Aa]pr(il)?)
152 may     ([Mm]ay)
153 jun     ([Jj]un(e)?)
154 jul     ([Jj]ul(y)?)
155 aug     ([Aa]ug(ust)?)
156 sep     ([Ss]ep(tember)?)
157 oct     ([Oo]ct(ober)?)
158 nov     ([Nn]ov(ember)?)
159 dec     ([Dd]ec(ember)?)
160
161 MONTH   ({jan}|{feb}|{mar}|{apr}|{may}|{jun}|{jul}|{aug}|{sep}|{oct}|{nov}|{dec})
162
163 TIME    ({D}:{d}{d}(:{d}{d})?)
164
165 /*
166 ** The year can either be 2 digits, or 4. However, after
167 ** Y2K, we found that some MUA were reporting the year 100, hence
168 ** the middle term here. yyterminate() resolves the actual
169 ** issues with 2-digit years.
170 */
171
172 YEAR    (({d}{d})|(1{d}{d})|({d}{4}))
173
174 w       ([ \t]*)
175 W       ([ \t]+)
176 D       ([0-9]?[0-9])
177 d       [0-9]
178 nl      [ \t\n()]
179
180 %%
181 %{
182         /*
183         ** This section begins the definition of dparsetime().
184         ** Put here any local variable definitions and initializations
185         */
186         YY_BUFFER_STATE lexhandle;
187
188         unsigned char *cp;
189         static struct tws tw;
190
191         memset(&tw,0,sizeof(struct tws));
192
193         lexhandle = yy_scan_string(lexstr);
194 %}
195
196 {DAY}","?{W}{MONTH}{W}{D}{W}{TIME}{W}{YEAR}  {
197         INIT();
198         SETWDAY();
199         SKIPTOA();
200         SETMON();
201         SKIPTOD();
202         SETDAY();
203         SKIPTOD();
204         SETTIME();
205         SKIPTOD();
206         SETYEAR();
207 }
208
209 {DAY}","?{W}{D}{W}{MONTH}{W}{YEAR}{W}{TIME}  {
210         INIT();
211         SETWDAY();
212         SKIPTOD();
213         SETDAY();
214         SKIPTOA();
215         SETMON();
216         SKIPTOD();
217         SETYEAR();
218         SKIPTOD();
219         SETTIME();
220 }
221 {D}{W}{MONTH}{W}{YEAR}{W}{TIME}  {
222         INIT();
223         SETDAY();
224         SKIPTOA();
225         SETMON();
226         SKIPTOD();
227         SETYEAR();
228         SKIPTOD();
229         SETTIME();
230 }
231 {DAY}","?{W}{MONTH}{W}{D}","?{W}{YEAR}","?{W}{TIME}  {
232         INIT();
233         SETWDAY();
234         SKIPTOA();
235         SETMON();
236         SKIPTOD();
237         SETDAY();
238         SKIPTOD();
239         SETYEAR();
240         SKIPTOD();
241         SETTIME();
242 }
243 {DAY}","?{W}{MONTH}{W}{D}","?{W}{YEAR}  {
244         INIT();
245         SETWDAY();
246         SKIPTOA();
247         SETMON();
248         SKIPTOD();
249         SETDAY();
250         SKIPTOD();
251         SETYEAR();
252 }
253 {MONTH}{W}{D}","?{W}{YEAR}","?{W}{DAY}  {
254         INIT();
255         SETMON();
256         SKIPTOD();
257         SETDAY();
258         SKIPTOD();
259         SETYEAR();
260         SKIPTOA();
261         SETWDAY();
262 }
263 {MONTH}{W}{D}","?{W}{YEAR}  {
264         INIT();
265         SETMON();
266         SKIPTOD();
267         SETDAY();
268         SKIPTOD();
269         SETYEAR();
270 }
271 {d}{4}"-"{d}{2}"-"{d}{2}(" "|"T"){TIME}  {
272         INIT();
273         SETYEAR();
274         SKIPTOD();
275         SETMON_NUM();
276         SKIPTOD();
277         SETDAY();
278         SKIPTOD();
279         SETTIME();
280 }
281 {d}{4}"-"{d}{2}"-"{d}{2}  {
282         INIT();
283         SETYEAR();
284         SKIPTOD();
285         SETMON_NUM();
286         SKIPTOD();
287         SETDAY();
288 }
289 {d}{2}"-"{d}{2}"-"{d}{2}  {
290         fprintf(stderr, "the highly ambiguous date format XX-XX-XX..."
291                         " is no longer supported\n");
292 }
293 {D}"/"{D}"/"{YEAR}{W}{TIME}  {
294         INIT();
295         if(europeandate) {
296                 /* DD/MM/YY */
297                 SETDAY();
298                 SKIPTOD();
299                 SETMON_NUM();
300         } else {
301                 /* MM/DD/YY */
302                 SETMON_NUM();
303                 SKIPTOD();
304                 SETDAY();
305         }
306         SKIPTOD();
307         SETYEAR();
308         SKIPTOD();
309         SETTIME();
310 }
311 {D}"/"{D}"/"{YEAR}  {
312         INIT();
313         if(europeandate) {
314                 /* DD/MM/YY */
315                 SETDAY();
316                 SKIPTOD();
317                 SETMON_NUM();
318         } else {
319                 /* MM/DD/YY */
320                 SETMON_NUM();
321                 SKIPTOD();
322                 SETDAY();
323         }
324         SKIPTOD();
325         SETYEAR();
326 }
327
328 "[Aa][Mm]"
329 "[Pp][Mm]"  tw.tw_hour += 12;
330
331 "+"{D}{d}{d}  {
332         INIT();
333         SKIPTOD();
334         SETZONE(atoi(cp));
335 #ifdef ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST
336         zonehack (&tw);
337 #endif /* ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST */
338         yyterminate();
339 }
340 "-"{D}{d}{d}  {
341         INIT();
342         SKIPTOD();
343         SETZONE(-atoi(cp));
344 #ifdef ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST
345         zonehack (&tw);
346 #endif /* ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST */
347         yyterminate();
348
349 }
350 "+"{d}{d}":"{d}{d}  {
351         INIT();
352         SKIPTOD();
353         SETZONEC(atoi(cp), atoi(cp+3));
354 #ifdef ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST
355         zonehack (&tw);
356 #endif /* ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST */
357         yyterminate();
358 }
359 "-"{d}{d}":"{d}{d}  {
360         INIT();
361         SKIPTOD();
362         SETZONEC(-atoi(cp), -atoi(cp+3));
363 #ifdef ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST
364         zonehack (&tw);
365 #endif /* ADJUST_NUMERIC_ONLY_TZ_OFFSETS_WRT_DST */
366         yyterminate();
367
368 }
369 {nl}("ut"|"UT")         INIT(); SETZONE(0); yyterminate();
370 {nl}("gmt"|"GMT")       INIT(); SETZONE(0); yyterminate();
371 {nl}("est"|"EST")       INIT(); SETZONE(-500); yyterminate();
372 {nl}("edt"|"EDT")       { INIT(); SETDST(); SETZONE(-500); yyterminate(); }
373 {nl}("cst"|"CST")       INIT(); SETZONE(-600); yyterminate();
374 {nl}("cdt"|"CDT")       { INIT(); SETDST(); SETZONE(-600); yyterminate(); }
375 {nl}("mst"|"MST")       INIT(); SETZONE(-700); yyterminate();
376 {nl}("mdt"|"MDT")       { INIT(); SETDST(); SETZONE(-700); yyterminate(); }
377 {nl}("pst"|"PST")       INIT(); SETZONE(-800); yyterminate();
378 {nl}("pdt"|"PDT")       { INIT(); SETDST(); SETZONE(-800); yyterminate(); }
379 {nl}("nst"|"NST")       INIT(); SETZONE(-330); yyterminate();
380 {nl}("ast"|"AST")       INIT(); SETZONE(-400); yyterminate();
381 {nl}("adt"|"ADT")       { INIT(); SETDST(); SETZONE(-400); yyterminate(); }
382 {nl}("hst"|"HST")       INIT(); SETZONE(-1000); yyterminate();
383 {nl}("hdt"|"HDT")       { INIT(); SETDST(); SETZONE(-1000); yyterminate(); }
384 .|\n
385
386 %%
387 /*
388 ** This is a portable way to squash a warning about the yyunput()
389 ** function being static but never used. It costs us a tiny amount
390 ** of extra code in the binary but the other options are:
391 ** "%option nounput" which is flex-specific
392 ** makefile hackery just to compile dtimep.c with different flags
393 */
394 void dtimep_yyunput(int c)
395 {
396         unput(c);
397 }