X-Git-Url: http://git.marmaro.de/?a=blobdiff_plain;f=sbr%2Ffmt_compile.c;h=014733427dcd6261cded324b744e0cac125cdc7b;hb=e69044f7624abe5cb2cb796d528c0cc5f29515f7;hp=d699916a7f9743b1dcc8cc97d7bdfeee971e2267;hpb=13f84dd50ca5754391dbd3296a5c7425f9363600;p=mmh diff --git a/sbr/fmt_compile.c b/sbr/fmt_compile.c index d699916..0147334 100644 --- a/sbr/fmt_compile.c +++ b/sbr/fmt_compile.c @@ -2,11 +2,43 @@ /* * fmt_compile.c -- "compile" format strings for fmt_scan * - * $Id$ - * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for * complete copyright information. + * + * This code compiles the format strings (documented in mh-format(5)) into + * an internal form to be later processed by fmt_scan.c. + * + * What happens here is that the format strings are parsed and an array + * of struct format structures are returned. Each format structure is + * a single operation interpreted by the the routines in fmt_scan.c. + * + * There is a NOT a one-to-one correspondence between format strings and + * format instructions; some functions have side effects that can result + * in multiple instructions being generated. The exact list of instructions + * generated by a format string can be seem with the nmh fmtdump utility. + * + * A list of format instructions can be found in fmt_compile.h. + * + * If you wish to add a new function, you will need to do the following + * things: + * + * - Add a new instruction to the list of instructions in fmt_compile.h. + * Note that test instructions (starting with FT_IF_S_NULL) have special + * handling, so if you are NOT writing a test function then you need + * to insert it into the list before that _and_ bump all of the + * following instruction numbers. + * + * - Add the function name to the functable[] array below, and write any + * special code that your function may require in terms of parsing + * (it very well may not need anything). + * + * - Add the code in fmt_scan.c to handle your new function. + * + * - Add code to fmtdump.c to display your new function. + * + * - Document the new function in the mh-format(5) man page. + * */ #include @@ -14,22 +46,18 @@ #include #include #include +#include +#include -#ifdef TIME_WITH_SYS_TIME +#ifdef HAVE_SYS_TIME_H # include -# include -#else -# ifdef TM_IN_SYS_TIME -# include -# else -# include -# endif #endif +#include /* * hash table for deciding if a component is "interesting" */ -struct comp *wantcomp[128]; +static struct comp *wantcomp[128]; static struct format *formatvec; /* array to hold formats */ static struct format *next_fp; /* next free format slot */ @@ -51,11 +79,42 @@ extern struct mailname fmt_mnull; #define TF_NOW 6 /* special - get current unix time */ #define TF_EXPR_SV 7 /* like expr but save current str reg */ #define TF_NOP 8 /* like expr but no result */ +#define TF_MYNAME 9 /* special - get current name of user */ +#define TF_MYHOST 10 /* special - get "local" hostname */ +#define TF_LMBOX 11 /* special - get full local mailbox */ /* ftable->flags */ +/* NB that TFL_PUTS is also used to decide whether the test + * in a "%<(function)..." should be a string or numeric one. + */ #define TFL_PUTS 1 /* implicit putstr if top level */ #define TFL_PUTN 2 /* implicit putnum if top level */ +/* + * The functable array maps between the text names of format functions and + * the format instructions interpreted by the engine in fmt_scan.c. + * + * The elements of this structure are as follows: + * + * name - The name of the function as seen in the format string. This is + * what maps a particular function name into a format instruction. + * type - The type of argument this function expects. Those types are + * listed above (with the TF_ prefix). This affects what gets + * placed in the format instruction (the f_un union). + * f_type - The instruction corresponding to this function (from the list + * in fmt_compile.h). + * extra - Used by some functions to provide extra data to the compiler. + * Uses include: + * - Providing an alternate instruction to combine a load + * and test operation (see do_if()). + * - Passed in f_value in the format instruction to provide + * extra information for the engine (see FT_LV_DAT handling + * in fmt_scan.c). + * - Provide a hint as to preprocessing that is required for + * this instruction (see do_name()). + * flags - See the definitions for TFL_PUTS & TFL_PUTN above. + */ + struct ftable { char *name; /* function name */ char type; /* argument type */ @@ -80,6 +139,8 @@ static struct ftable functable[] = { { "putnum", TF_EXPR, FT_NUM, 0, 0 }, { "putnumf", TF_EXPR, FT_NUMF, 0, 0 }, { "putaddr", TF_STR, FT_PUTADDR, 0, 0 }, + { "putlit", TF_EXPR, FT_STRLIT, 0, 0 }, + { "zputlit", TF_EXPR, FT_STRLITZ, 0, 0 }, { "void", TF_NOP, 0, 0, 0 }, { "comp", TF_COMP, FT_LS_COMP, 0, TFL_PUTS }, @@ -100,6 +161,9 @@ static struct ftable functable[] = { { "dat", TF_NUM, FT_LV_DAT, 0, TFL_PUTN }, { "strlen", TF_NONE, FT_LV_STRLEN, 0, TFL_PUTN }, { "me", TF_MYBOX, FT_LS_LIT, 0, TFL_PUTS }, + { "myname", TF_MYNAME, FT_LS_LIT, 0, TFL_PUTS }, + { "myhost", TF_MYHOST, FT_LS_LIT, 0, TFL_PUTS }, + { "localmbox", TF_LMBOX, FT_LS_LIT, 0, TFL_PUTS }, { "plus", TF_NUM, FT_LV_PLUS_L, 0, TFL_PUTN }, { "minus", TF_NUM, FT_LV_MINUS_L, 0, TFL_PUTN }, { "divide", TF_NUM, FT_LV_DIVIDE_L, 0, TFL_PUTN }, @@ -144,6 +208,7 @@ static struct ftable functable[] = { { "ingrp", TF_COMP, FT_LV_INGRPF, FT_PARSEADDR, TFL_PUTN }, { "nohost", TF_COMP, FT_LV_NOHOSTF, FT_PARSEADDR, TFL_PUTN }, { "formataddr", TF_EXPR_SV,FT_FORMATADDR, FT_FORMATADDR, 0 }, + { "concataddr", TF_EXPR_SV,FT_CONCATADDR, FT_FORMATADDR, 0 }, { "friendly", TF_COMP, FT_LS_FRIENDLY, FT_PARSEADDR, TFL_PUTS }, { "mymbox", TF_COMP, FT_LV_COMPFLAG, FT_MYMBOX, TFL_PUTN }, @@ -154,33 +219,56 @@ static struct ftable functable[] = { { NULL, 0, 0, 0, 0 } }; +/* + * Hash function for component name. The function should be + * case independent and probably shouldn't involve a routine + * call. This function is pretty good but will not work on + * single character component names. + */ +#define CHASH(nm) (((((nm)[0]) - ((nm)[1])) & 0x1f) + (((nm)[2]) & 0x5f)) + +/* + * Find a component in the hash table. + */ +#define FINDCOMP(comp,name) \ + for (comp = wantcomp[CHASH(name)]; \ + comp && strcmp(comp->c_name,name); \ + comp = comp->c_next) \ + ; + /* Add new component to the hash table */ -#define NEWCOMP(cm,name)\ +#define NEWCOMP(cm,name) do { \ cm = ((struct comp *) calloc(1, sizeof (struct comp)));\ - cm->c_name = name;\ + cm->c_name = getcpy(name);\ + cm->c_refcount++;\ ncomp++;\ i = CHASH(name);\ cm->c_next = wantcomp[i];\ - wantcomp[i] = cm; + wantcomp[i] = cm; \ + } while (0) #define NEWFMT (next_fp++) -#define NEW(type,fill,wid)\ - fp=NEWFMT; fp->f_type=(type); fp->f_fill=(fill); fp->f_width=(wid); +#define NEW(type,fill,wid) do {\ + fp=NEWFMT; fp->f_type=(type); fp->f_fill=(fill); fp->f_width=(wid); \ + } while (0) /* Add (possibly new) component to the hash table */ -#define ADDC(name)\ +#define ADDC(name) do { \ FINDCOMP(cm, name);\ if (!cm) {\ NEWCOMP(cm,name);\ }\ - fp->f_comp = cm; + fp->f_comp = cm; \ + fp->f_flags |= FF_COMPREF; \ + cm->c_refcount++; \ + } while (0) -#define LV(type, value) NEW(type,0,0); fp->f_value = (value); -#define LS(type, str) NEW(type,0,0); fp->f_text = (str); +#define LV(type, value) do { NEW(type,0,0); fp->f_value = (value); } while (0) +#define LS(type, str) do { NEW(type,0,0); fp->f_text = getcpy(str); fp->f_flags |= FF_STRALLOC; } while (0) -#define PUTCOMP(comp) NEW(FT_COMP,0,0); ADDC(comp); -#define PUTLIT(str) NEW(FT_LIT,0,0); fp->f_text = (str); -#define PUTC(c) NEW(FT_CHAR,0,0); fp->f_char = (c); +#define PUTCOMP(comp) do { NEW(FT_COMP,0,0); ADDC(comp); } while (0) +#define PUTLIT(str) do { NEW(FT_LIT,0,0); fp->f_text = getcpy(str); fp->f_flags |= FF_STRALLOC; } while (0) +#define PUTC(c) do { NEW(FT_CHAR,0,0); fp->f_char = (c); } while (0) static char *format_string; static unsigned char *usr_fstring; /* for CERROR */ @@ -188,11 +276,6 @@ static unsigned char *usr_fstring; /* for CERROR */ #define CERROR(str) compile_error (str, cp) /* - * external prototypes - */ -extern char *getusername(void); - -/* * static prototypes */ static struct ftable *lookup(char *); @@ -204,8 +287,13 @@ static char *do_func(char *); static char *do_expr (char *, int); static char *do_loop(char *); static char *do_if(char *); +static void free_component(struct comp *); +static void free_comptable(void); +/* + * Lookup a function name in the functable + */ static struct ftable * lookup(char *name) { @@ -253,19 +341,19 @@ compile_error(char *str, char *cp) */ int -fmt_compile(char *fstring, struct format **fmt) +fmt_compile(char *fstring, struct format **fmt, int reset_comptable) { register char *cp; - int i; + size_t i; + static int comptable_initialized = 0; - if (format_string) - free (format_string); format_string = getcpy (fstring); usr_fstring = fstring; - /* init the component hash table. */ - for (i = 0; i < sizeof(wantcomp)/sizeof(wantcomp[0]); i++) - wantcomp[i] = 0; + if (reset_comptable || !comptable_initialized) { + free_comptable(); + comptable_initialized = 1; + } memset((char *) &fmt_mnull, 0, sizeof(fmt_mnull)); @@ -282,7 +370,6 @@ fmt_compile(char *fstring, struct format **fmt) if (next_fp == NULL) adios (NULL, "unable to allocate format storage"); - ncomp = 0; infunction = 0; cp = compile(format_string); @@ -292,6 +379,7 @@ fmt_compile(char *fstring, struct format **fmt) LV(FT_DONE, 0); /* really done */ *fmt = formatvec; + free(format_string); return (ncomp); } @@ -353,6 +441,9 @@ compile (char *sp) } +/* + * Process functions & components (handle field width here as well + */ static char * do_spec(char *sp) { @@ -404,6 +495,11 @@ do_spec(char *sp) return (cp); } +/* + * Process a component name. Normally this involves generating an FT_COMP + * instruction for the specified component. If preprocess is set, then we + * do some extra processing. + */ static char * do_name(char *sp, int preprocess) { @@ -458,6 +554,10 @@ do_name(char *sp, int preprocess) return (cp); } +/* + * Generate one or more instructions corresponding to the named function. + * The different type of function arguments are handled here. + */ static char * do_func(char *sp) { @@ -521,6 +621,18 @@ do_func(char *sp) LS(t->f_type, getusername()); break; + case TF_MYNAME: + LS(t->f_type, getfullname()); + break; + + case TF_MYHOST: + LS(t->f_type, LocalName(0)); + break; + + case TF_LMBOX: + LS(t->f_type, getlocalmbox()); + break; + case TF_NOW: LV(t->f_type, time((time_t *) 0)); break; @@ -550,6 +662,10 @@ do_func(char *sp) return (cp); } +/* + * Handle an expression as an argument. Basically we call one of do_name(), + * do_func(), or do_if() + */ static char * do_expr (char *sp, int preprocess) { @@ -571,6 +687,46 @@ do_expr (char *sp, int preprocess) return (cp); } +/* + * I am guessing this was for some kind of loop statement, which would have + * looked like %[ .... %]. It looks like the way this would have worked + * is that the format engine would have seen that FT_DONE had a 1 in the + * f_un.f_un_value and then decided whether or not to continue the loop. + * There is no support for this in the format engine, so right now if + * you try using it you will reach the FT_DONE and simply stop. I'm leaving + * this here in case someone wants to continue the work. + * + * Okay, got some more information on this from John L. Romine! From an + * email he sent to the nmh-workers mailing list on December 2, 2010, he + * explains it thusly: + * + * In this case (scan, formatsbr) it has to do with an extension to + * the mh-format syntax to allow for looping. + * + * The scan format is processed once for each message. Those #ifdef + * JLR changes allowed for the top part of the format file to be + * processed once, then a second, looping part to be processed + * once per message. As I recall, there were new mh-format escape + * sequences to delimit the loop. This would have allowed for things + * like per-format column headings in the scan output. + * + * Since existing format files didn't include the scan listing + * header (it was hard-coded in scan.c) it would not have been + * backward-compatible. All existing format files (including any + * local ones) would have needed to be changed to include the format + * codes for a header. The practice at the time was not to introduce + * incompatible changes in a minor release, and I never managed to + * put out a newer major release. + * + * I can see how this would work, and I suspect part of the motivation was + * because the format compiler routines (at the time) couldn't really be + * called multiple times on the same message because the memory management + * was so lousy. That's been reworked and things are now a lot cleaner, + * so I suspect if we're going to allow a format string to be used for the + * scan header it might be simpler to have a separate format string just + * for the header. But I'll leave this code in for now just in case we + * decide that we want some kind of looping support. + */ static char * do_loop(char *sp) { @@ -589,6 +745,12 @@ do_loop(char *sp) return cp; } +/* + * Handle an if-elsif-endif statement. Note here that the branching + * is handled by the f_skip member of the struct format (which is really + * just f_width overloaded). This number controls how far to move forward + * (or back) in the format instruction array. + */ static char * do_if(char *sp) { @@ -610,7 +772,14 @@ do_if(char *sp) if (ftbl->f_type >= IF_FUNCS) fp->f_type = ftbl->extra; else { - LV (FT_IF_V_NE, 0); + /* Put out a string test or a value test depending + * on what this function's return type is. + */ + if (ftbl->flags & TFL_PUTS) { + LV (FT_IF_S, 0); + } else { + LV (FT_IF_V_NE, 0); + } } } else { @@ -654,3 +823,198 @@ do_if(char *sp) return (cp); } + +/* + * Free a set of format instructions. + * + * What we do here is: + * + * - Iterate through the list of format instructions, freeing any references + * to allocated memory in each instruction. + * - Free component references. + * - If requested, reset the component hash table; that will also free any + * references to components stored there. + * + */ + +void +fmt_free(struct format *fmt, int reset_comptable) +{ + struct format *fp = fmt; + + if (fp) { + while (! (fp->f_type == FT_DONE && fp->f_value == 0)) { + if (fp->f_flags & FF_STRALLOC) + free(fp->f_text); + if (fp->f_flags & FF_COMPREF) + free_component(fp->f_comp); + fp++; + } + free(fmt); + } + + if (reset_comptable) + free_comptable(); +} + +/* + * Find a component in our hash table. This is just a public interface to + * the FINDCOMP macro, so we don't have to expose our hash table. + */ + +struct comp * +fmt_findcomp(char *component) +{ + struct comp *cm; + + FINDCOMP(cm, component); + + return cm; +} + +/* + * Like fmt_findcomp, but case-insensitive. + */ + +struct comp * +fmt_findcasecomp(char *component) +{ + struct comp *cm; + + for (cm = wantcomp[CHASH(component)]; cm; cm = cm->c_next) + if (mh_strcasecmp(component, cm->c_name) == 0) + break; + + return cm; +} + +/* + * Add an entry to the component hash table + * + * Returns true if the component was added, 0 if it already existed. + * + */ + +int +fmt_addcompentry(char *component) +{ + struct comp *cm; + int i; + + FINDCOMP(cm, component); + + if (cm) + return 0; + + NEWCOMP(cm, component); + + /* + * ncomp is really meant for fmt_compile() and this function is + * meant to be used outside of it. So decrement it just to be safe + * (internal callers should be using NEWCOMP()). + */ + + ncomp--; + + return 1; +} + +/* + * Add a string to a component hash table entry. + * + * Note the special handling for components marked with CT_ADDR. The comments + * in fmt_scan.h explain this in more detail. + */ + +int +fmt_addcomptext(char *component, char *text) +{ + int i, found = 0, bucket = CHASH(component); + struct comp *cptr = wantcomp[bucket]; + char *cp; + + while (cptr) { + if (mh_strcasecmp(component, cptr->c_name) == 0) { + found++; + if (! cptr->c_text) { + cptr->c_text = getcpy(text); + } else { + i = strlen(cp = cptr->c_text) - 1; + if (cp[i] == '\n') { + if (cptr->c_type & CT_ADDR) { + cp[i] = '\0'; + cp = add(",\n\t", cp); + } else { + cp = add("\t", cp); + } + } + cptr->c_text = add(text, cp); + } + } + cptr = cptr->c_next; + } + + return found ? bucket : -1; +} + +/* + * Append text to a component we've already found. See notes in fmt_scan.h + * for more information. + */ + +void +fmt_appendcomp(int bucket, char *component, char *text) +{ + struct comp *cptr; + + if (bucket != -1) { + for (cptr = wantcomp[bucket]; cptr; cptr = cptr->c_next) + if (mh_strcasecmp(component, cptr->c_name) == 0) + cptr->c_text = add(text, cptr->c_text); + } +} + +/* + * Free and reset our component hash table + */ + +static void +free_comptable(void) +{ + unsigned int i; + struct comp *cm, *cm2; + + for (i = 0; i < sizeof(wantcomp)/sizeof(wantcomp[0]); i++) { + cm = wantcomp[i]; + while (cm != NULL) { + cm2 = cm->c_next; + free_component(cm); + cm = cm2; + } + wantcomp[i] = 0; + } + + ncomp = 0; +} + +/* + * Decrement the reference count of a component structure. If it reaches + * zero, free it + */ + +static void +free_component(struct comp *cm) +{ + if (--cm->c_refcount <= 0) { + /* Shouldn't ever be NULL, but just in case ... */ + if (cm->c_name) + free(cm->c_name); + if (cm->c_text) + free(cm->c_text); + if (cm->c_type & CT_DATE) + free(cm->c_tws); + if (cm->c_type & CT_ADDR && cm->c_mn && cm->c_mn != &fmt_mnull) + mnfree(cm->c_mn); + free(cm); + } +}