3 * fmt_compile.c -- "compile" format strings for fmt_scan
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
9 * This code compiles the format strings (documented in mh-format(5)) into
10 * an internal form to be later processed by fmt_scan.c.
12 * What happens here is that the format strings are parsed and an array
13 * of struct format structures are returned. Each format structure is
14 * a single operation interpreted by the the routines in fmt_scan.c.
16 * There is a NOT a one-to-one correspondence between format strings and
17 * format instructions; some functions have side effects that can result
18 * in multiple instructions being generated. The exact list of instructions
19 * generated by a format string can be seem with the nmh fmtdump utility.
21 * A list of format instructions can be found in fmt_compile.h.
23 * If you wish to add a new function, you will need to do the following
26 * - Add a new instruction to the list of instructions in fmt_compile.h.
27 * Note that test instructions (starting with FT_IF_S_NULL) have special
28 * handling, so if you are NOT writing a test function then you need
29 * to insert it into the list before that _and_ bump all of the
30 * following instruction numbers.
32 * - Add the function name to the functable[] array below, and write any
33 * special code that your function may require in terms of parsing
34 * (it very well may not need anything).
36 * - Add the code in fmt_scan.c to handle your new function.
38 * - Add code to fmtdump.c to display your new function.
40 * - Document the new function in the mh-format(5) man page.
45 #include <h/addrsbr.h>
47 #include <h/fmt_scan.h>
48 #include <h/fmt_compile.h>
51 #ifdef HAVE_SYS_TIME_H
52 # include <sys/time.h>
57 * hash table for deciding if a component is "interesting"
59 struct comp *wantcomp[128];
61 static struct format *formatvec; /* array to hold formats */
62 static struct format *next_fp; /* next free format slot */
63 static struct format *fp; /* current format slot */
64 static struct comp *cm; /* most recent comp ref */
65 static struct ftable *ftbl; /* most recent func ref */
67 static int infunction; /* function nesting cnt */
69 extern struct mailname fmt_mnull;
71 /* ftable->type (argument type) */
72 #define TF_COMP 0 /* component expected */
73 #define TF_NUM 1 /* number expected */
74 #define TF_STR 2 /* string expected */
75 #define TF_EXPR 3 /* component or func. expected */
76 #define TF_NONE 4 /* no argument */
77 #define TF_MYBOX 5 /* special - get current user's mbox */
78 #define TF_NOW 6 /* special - get current unix time */
79 #define TF_EXPR_SV 7 /* like expr but save current str reg */
80 #define TF_NOP 8 /* like expr but no result */
81 #define TF_MYNAME 9 /* special - get current name of user */
82 #define TF_MYHOST 10 /* special - get "local" hostname */
83 #define TF_LMBOX 11 /* special - get full local mailbox */
86 /* NB that TFL_PUTS is also used to decide whether the test
87 * in a "%<(function)..." should be a string or numeric one.
89 #define TFL_PUTS 1 /* implicit putstr if top level */
90 #define TFL_PUTN 2 /* implicit putnum if top level */
93 * The functable array maps between the text names of format functions and
94 * the format instructions interpreted by the engine in fmt_scan.c.
96 * The elements of this structure are as follows:
98 * name - The name of the function as seen in the format string. This is
99 * what maps a particular function name into a format instruction.
100 * type - The type of argument this function expects. Those types are
101 * listed above (with the TF_ prefix). This affects what gets
102 * placed in the format instruction (the f_un union).
103 * f_type - The instruction corresponding to this function (from the list
105 * extra - Used by some functions to provide extra data to the compiler.
107 * - Providing an alternate instruction to combine a load
108 * and test operation (see do_if()).
109 * - Passed in f_value in the format instruction to provide
110 * extra information for the engine (see FT_LV_DAT handling
112 * - Provide a hint as to preprocessing that is required for
113 * this instruction (see do_name()).
114 * flags - See the definitions for TFL_PUTS & TFL_PUTN above.
118 char *name; /* function name */
119 char type; /* argument type */
120 char f_type; /* fmt type */
121 char extra; /* arg. type dependent extra info */
125 static struct ftable functable[] = {
126 { "nonzero", TF_EXPR, FT_V_NE, FT_IF_V_NE, 0 },
127 { "zero", TF_EXPR, FT_V_EQ, FT_IF_V_EQ, 0 },
128 { "eq", TF_NUM, FT_V_EQ, FT_IF_V_EQ, 0 },
129 { "ne", TF_NUM, FT_V_NE, FT_IF_V_NE, 0 },
130 { "gt", TF_NUM, FT_V_GT, FT_IF_V_GT, 0 },
131 { "null", TF_EXPR, FT_S_NULL, FT_IF_S_NULL, 0 },
132 { "nonnull", TF_EXPR, FT_S_NONNULL, FT_IF_S, 0 },
133 { "match", TF_STR, FT_V_MATCH, FT_IF_MATCH, 0 },
134 { "amatch", TF_STR, FT_V_AMATCH, FT_IF_AMATCH, 0 },
136 { "putstr", TF_EXPR, FT_STR, 0, 0 },
137 { "putstrf", TF_EXPR, FT_STRF, 0, 0 },
138 { "putnum", TF_EXPR, FT_NUM, 0, 0 },
139 { "putnumf", TF_EXPR, FT_NUMF, 0, 0 },
140 { "putaddr", TF_STR, FT_PUTADDR, 0, 0 },
141 { "putlit", TF_EXPR, FT_STRLIT, 0, 0 },
142 { "zputlit", TF_EXPR, FT_STRLITZ, 0, 0 },
143 { "void", TF_NOP, 0, 0, 0 },
145 { "comp", TF_COMP, FT_LS_COMP, 0, TFL_PUTS },
146 { "lit", TF_STR, FT_LS_LIT, 0, TFL_PUTS },
147 { "getenv", TF_STR, FT_LS_GETENV, 0, TFL_PUTS },
148 { "profile", TF_STR, FT_LS_CFIND, 0, TFL_PUTS },
149 { "decodecomp", TF_COMP, FT_LS_DECODECOMP, 0, TFL_PUTS },
150 { "decode", TF_EXPR, FT_LS_DECODE, 0, TFL_PUTS },
151 { "trim", TF_EXPR, FT_LS_TRIM, 0, 0 },
152 { "compval", TF_COMP, FT_LV_COMP, 0, TFL_PUTN },
153 { "compflag", TF_COMP, FT_LV_COMPFLAG, 0, TFL_PUTN },
154 { "num", TF_NUM, FT_LV_LIT, 0, TFL_PUTN },
155 { "msg", TF_NONE, FT_LV_DAT, 0, TFL_PUTN },
156 { "cur", TF_NONE, FT_LV_DAT, 1, TFL_PUTN },
157 { "size", TF_NONE, FT_LV_DAT, 2, TFL_PUTN },
158 { "width", TF_NONE, FT_LV_DAT, 3, TFL_PUTN },
159 { "unseen", TF_NONE, FT_LV_DAT, 4, TFL_PUTN },
160 { "dat", TF_NUM, FT_LV_DAT, 0, TFL_PUTN },
161 { "strlen", TF_NONE, FT_LV_STRLEN, 0, TFL_PUTN },
162 { "me", TF_MYBOX, FT_LS_LIT, 0, TFL_PUTS },
163 { "myname", TF_MYNAME, FT_LS_LIT, 0, TFL_PUTS },
164 { "myhost", TF_MYHOST, FT_LS_LIT, 0, TFL_PUTS },
165 { "localmbox", TF_LMBOX, FT_LS_LIT, 0, TFL_PUTS },
166 { "plus", TF_NUM, FT_LV_PLUS_L, 0, TFL_PUTN },
167 { "minus", TF_NUM, FT_LV_MINUS_L, 0, TFL_PUTN },
168 { "divide", TF_NUM, FT_LV_DIVIDE_L, 0, TFL_PUTN },
169 { "modulo", TF_NUM, FT_LV_MODULO_L, 0, TFL_PUTN },
170 { "charleft", TF_NONE, FT_LV_CHAR_LEFT, 0, TFL_PUTN },
171 { "timenow", TF_NOW, FT_LV_LIT, 0, TFL_PUTN },
173 { "month", TF_COMP, FT_LS_MONTH, FT_PARSEDATE, TFL_PUTS },
174 { "lmonth", TF_COMP, FT_LS_LMONTH, FT_PARSEDATE, TFL_PUTS },
175 { "tzone", TF_COMP, FT_LS_ZONE, FT_PARSEDATE, TFL_PUTS },
176 { "day", TF_COMP, FT_LS_DAY, FT_PARSEDATE, TFL_PUTS },
177 { "weekday", TF_COMP, FT_LS_WEEKDAY, FT_PARSEDATE, TFL_PUTS },
178 { "tws", TF_COMP, FT_LS_822DATE, FT_PARSEDATE, TFL_PUTS },
179 { "sec", TF_COMP, FT_LV_SEC, FT_PARSEDATE, TFL_PUTN },
180 { "min", TF_COMP, FT_LV_MIN, FT_PARSEDATE, TFL_PUTN },
181 { "hour", TF_COMP, FT_LV_HOUR, FT_PARSEDATE, TFL_PUTN },
182 { "mday", TF_COMP, FT_LV_MDAY, FT_PARSEDATE, TFL_PUTN },
183 { "mon", TF_COMP, FT_LV_MON, FT_PARSEDATE, TFL_PUTN },
184 { "year", TF_COMP, FT_LV_YEAR, FT_PARSEDATE, TFL_PUTN },
185 { "yday", TF_COMP, FT_LV_YDAY, FT_PARSEDATE, TFL_PUTN },
186 { "wday", TF_COMP, FT_LV_WDAY, FT_PARSEDATE, TFL_PUTN },
187 { "zone", TF_COMP, FT_LV_ZONE, FT_PARSEDATE, TFL_PUTN },
188 { "clock", TF_COMP, FT_LV_CLOCK, FT_PARSEDATE, TFL_PUTN },
189 { "rclock", TF_COMP, FT_LV_RCLOCK, FT_PARSEDATE, TFL_PUTN },
190 { "sday", TF_COMP, FT_LV_DAYF, FT_PARSEDATE, TFL_PUTN },
191 { "szone", TF_COMP, FT_LV_ZONEF, FT_PARSEDATE, TFL_PUTN },
192 { "dst", TF_COMP, FT_LV_DST, FT_PARSEDATE, TFL_PUTN },
193 { "pretty", TF_COMP, FT_LS_PRETTY, FT_PARSEDATE, TFL_PUTS },
194 { "nodate", TF_COMP, FT_LV_COMPFLAG, FT_PARSEDATE, TFL_PUTN },
195 { "date2local", TF_COMP, FT_LOCALDATE, FT_PARSEDATE, 0 },
196 { "date2gmt", TF_COMP, FT_GMTDATE, FT_PARSEDATE, 0 },
198 { "pers", TF_COMP, FT_LS_PERS, FT_PARSEADDR, TFL_PUTS },
199 { "mbox", TF_COMP, FT_LS_MBOX, FT_PARSEADDR, TFL_PUTS },
200 { "host", TF_COMP, FT_LS_HOST, FT_PARSEADDR, TFL_PUTS },
201 { "path", TF_COMP, FT_LS_PATH, FT_PARSEADDR, TFL_PUTS },
202 { "gname", TF_COMP, FT_LS_GNAME, FT_PARSEADDR, TFL_PUTS },
203 { "note", TF_COMP, FT_LS_NOTE, FT_PARSEADDR, TFL_PUTS },
204 { "addr", TF_COMP, FT_LS_ADDR, FT_PARSEADDR, TFL_PUTS },
205 { "proper", TF_COMP, FT_LS_822ADDR, FT_PARSEADDR, TFL_PUTS },
206 { "type", TF_COMP, FT_LV_HOSTTYPE, FT_PARSEADDR, TFL_PUTN },
207 { "ingrp", TF_COMP, FT_LV_INGRPF, FT_PARSEADDR, TFL_PUTN },
208 { "nohost", TF_COMP, FT_LV_NOHOSTF, FT_PARSEADDR, TFL_PUTN },
209 { "formataddr", TF_EXPR_SV,FT_FORMATADDR, FT_FORMATADDR, 0 },
210 { "concataddr", TF_EXPR_SV,FT_CONCATADDR, FT_FORMATADDR, 0 },
211 { "friendly", TF_COMP, FT_LS_FRIENDLY, FT_PARSEADDR, TFL_PUTS },
213 { "mymbox", TF_COMP, FT_LV_COMPFLAG, FT_MYMBOX, TFL_PUTN },
214 { "addtoseq", TF_STR, FT_ADDTOSEQ, 0, 0 },
216 { "unquote", TF_EXPR, FT_LS_UNQUOTE, 0, TFL_PUTS},
221 /* Add new component to the hash table */
222 #define NEWCOMP(cm,name) do { \
223 cm = ((struct comp *) calloc(1, sizeof (struct comp)));\
227 cm->c_next = wantcomp[i];\
231 #define NEWFMT (next_fp++)
232 #define NEW(type,fill,wid) do {\
233 fp=NEWFMT; fp->f_type=(type); fp->f_fill=(fill); fp->f_width=(wid); \
236 /* Add (possibly new) component to the hash table */
237 #define ADDC(name) do { \
245 #define LV(type, value) do { NEW(type,0,0); fp->f_value = (value); } while (0)
246 #define LS(type, str) do { NEW(type,0,0); fp->f_text = (str); } while (0)
248 #define PUTCOMP(comp) do { NEW(FT_COMP,0,0); ADDC(comp); } while (0)
249 #define PUTLIT(str) do { NEW(FT_LIT,0,0); fp->f_text = (str); } while (0)
250 #define PUTC(c) do { NEW(FT_CHAR,0,0); fp->f_char = (c); } while (0)
253 static unsigned char *usr_fstring; /* for CERROR */
255 #define CERROR(str) compile_error (str, cp)
260 static struct ftable *lookup(char *);
261 static void compile_error(char *, char *);
262 static char *compile (char *);
263 static char *do_spec(char *);
264 static char *do_name(char *, int);
265 static char *do_func(char *);
266 static char *do_expr (char *, int);
267 static char *do_loop(char *);
268 static char *do_if(char *);
272 * Lookup a function name in the functable
274 static struct ftable *
277 register struct ftable *t = functable;
279 register char c = *name;
281 while ((nm = t->name)) {
282 if (*nm == c && strcmp (nm, name) == 0)
287 return (struct ftable *) 0;
292 compile_error(char *str, char *cp)
294 int i, errpos, errctx;
296 errpos = cp - format_string;
297 errctx = errpos > 20 ? 20 : errpos;
298 usr_fstring[errpos] = '\0';
300 for (i = errpos-errctx; i < errpos; i++) {
302 if (iscntrl(usr_fstring[i]))
304 if (usr_fstring[i] < 32)
306 usr_fstring[i] = '_';
309 advise(NULL, "\"%s\": format compile error - %s",
310 &usr_fstring[errpos-errctx], str);
311 adios (NULL, "%*s", errctx+1, "^");
315 * Compile format string "fstring" into format list "fmt".
316 * Return the number of header components found in the format
321 fmt_compile(char *fstring, struct format **fmt)
327 free (format_string);
328 format_string = getcpy (fstring);
329 usr_fstring = fstring;
331 /* init the component hash table. */
332 for (i = 0; i < sizeof(wantcomp)/sizeof(wantcomp[0]); i++)
335 memset((char *) &fmt_mnull, 0, sizeof(fmt_mnull));
337 /* it takes at least 4 char to generate one format so we
338 * allocate a worst-case format array using 1/4 the length
339 * of the format string. We actually need twice this much
340 * to handle both pre-processing (e.g., address parsing) and
343 i = strlen(fstring)/2 + 1;
345 next_fp = formatvec = (struct format *)calloc ((size_t) i,
346 sizeof(struct format));
348 adios (NULL, "unable to allocate format storage");
353 cp = compile(format_string);
355 CERROR("extra '%>', '%|' or '%?'");
357 LV(FT_DONE, 0); /* really done */
366 register char *cp = sp;
371 while ((c = *cp) && c != '%')
407 case ';': /* comment line */
409 while ((c = *cp++) && c != '\n')
422 * Process functions & components (handle field width here as well
427 register char *cp = sp;
430 register int ljust = 0;
431 #endif /* not lint */
432 register int wid = 0;
433 register char fill = ' ';
445 wid = wid*10 + (c - '0');
451 fp->f_type = wid? FT_COMPF : FT_COMP;
456 if (ftbl->flags & TFL_PUTS) {
457 LV( wid? FT_STRF : FT_STR, ftbl->extra);
459 else if (ftbl->flags & TFL_PUTN) {
460 LV( wid? FT_NUMF : FT_NUM, ftbl->extra);
465 CERROR("component or function name expected");
476 * Process a component name. Normally this involves generating an FT_COMP
477 * instruction for the specified component. If preprocess is set, then we
478 * do some extra processing.
481 do_name(char *sp, int preprocess)
483 register char *cp = sp;
486 static int primed = 0;
488 while (isalnum(c = *cp++) || c == '-' || c == '_')
491 CERROR("'}' expected");
495 switch (preprocess) {
498 if (cm->c_type & CT_ADDR) {
499 CERROR("component used as both date and address");
501 cm->c_tws = (struct tws *)
502 calloc((size_t) 1, sizeof(*cm->c_tws));
503 fp->f_type = preprocess;
505 cm->c_type |= CT_DATE;
510 ismymbox ((struct mailname *) 0);
515 if (cm->c_type & CT_DATE) {
516 CERROR("component used as both date and address");
518 cm->c_mn = &fmt_mnull;
519 fp->f_type = preprocess;
521 cm->c_type |= CT_ADDR;
525 if (cm->c_type & CT_DATE) {
526 CERROR("component used as both date and address");
528 cm->c_type |= CT_ADDR;
535 * Generate one or more instructions corresponding to the named function.
536 * The different type of function arguments are handled here.
541 register char *cp = sp;
543 register struct ftable *t;
545 int mflag; /* minus sign in NUM */
549 while (isalnum(c = *cp++))
551 if (c != '(' && c != '{' && c != ' ' && c != ')') {
552 CERROR("'(', '{', ' ' or ')' expected");
555 if ((t = lookup (sp)) == 0) {
556 CERROR("unknown function");
565 CERROR("component name expected");
567 cp = do_name(cp, t->extra);
568 fp->f_type = t->f_type;
573 if ((mflag = (c == '-')))
577 n = n*10 + (c - '0');
587 while (c && c != ')')
594 LV(t->f_type,t->extra);
598 LS(t->f_type, getusername());
602 LS(t->f_type, getfullname());
606 LS(t->f_type, LocalName(0));
610 LS(t->f_type, getlocalmbox());
614 LV(t->f_type, time((time_t *) 0));
622 cp = do_expr(cp, t->extra);
630 cp = do_expr(cp, t->extra);
636 CERROR("')' expected");
643 * Handle an expression as an argument. Basically we call one of do_name(),
644 * do_func(), or do_if()
647 do_expr (char *sp, int preprocess)
649 register char *cp = sp;
652 if ((c = *cp++) == '{') {
653 cp = do_name (cp, preprocess);
654 fp->f_type = FT_LS_COMP;
655 } else if (c == '(') {
657 } else if (c == ')') {
659 } else if (c == '%' && *cp == '<') {
662 CERROR ("'(', '{', '%<' or ')' expected");
668 * I am guessing this was for some kind of loop statement, which would have
669 * looked like %[ .... %]. It looks like the way this would have worked
670 * is that the format engine would have seen that FT_DONE had a 1 in the
671 * f_un.f_un_value and then decided whether or not to continue the loop.
672 * There is no support for this in the format engine, so right now if
673 * you try using it you will reach the FT_DONE and simply stop. I'm leaving
674 * this here in case someone wants to continue the work.
679 register char *cp = sp;
680 struct format *floop;
685 CERROR ("']' expected");
687 LV(FT_DONE, 1); /* not yet done */
689 fp->f_skip = floop - fp; /* skip backwards */
695 * Handle an if-elsif-endif statement. Note here that the branching
696 * is handled by the f_skip member of the struct format (which is really
697 * just f_width overloaded). This number controls how far to move forward
698 * (or back) in the format instruction array.
703 register char *cp = sp;
704 register struct format *fexpr,
705 *fif = (struct format *)NULL;
706 register int c = '<';
709 if (c == '<') { /* doing an IF */
710 if ((c = *cp++) == '{') /*}*/{
712 fp->f_type = FT_LS_COMP;
717 /* see if we can merge the load and the "if" */
718 if (ftbl->f_type >= IF_FUNCS)
719 fp->f_type = ftbl->extra;
721 /* Put out a string test or a value test depending
722 * on what this function's return type is.
724 if (ftbl->flags & TFL_PUTS) {
732 CERROR("'(' or '{' expected"); /*}*/
736 fexpr = fp; /* loc of [ELS]IF */
737 cp = compile (cp); /* compile IF TRUE stmts */
739 fif->f_skip = next_fp - fif;
741 if ((c = *cp++) == '|') { /* the last ELSE */
743 fif = fp; /* loc of GOTO */
744 fexpr->f_skip = next_fp - fexpr;
746 fexpr = (struct format *)NULL;/* no extra ENDIF */
748 cp = compile (cp); /* compile ELSE stmts */
749 fif->f_skip = next_fp - fif;
752 else if (c == '?') { /* another ELSIF */
754 fif = fp; /* loc of GOTO */
755 fexpr->f_skip = next_fp - fexpr;
757 c = '<'; /* impersonate an IF */
764 CERROR("'>' expected.");
767 if (fexpr) /* IF ... [ELSIF ...] ENDIF */
768 fexpr->f_skip = next_fp - fexpr;