/*
* fmt_compile.c -- "compile" format strings for fmt_scan
*
- * $Id$
+ * This code is Copyright (c) 2002, by the authors of nmh. See the
+ * COPYRIGHT file in the root directory of the nmh distribution for
+ * complete copyright information.
+ *
+ * This code compiles the format strings (documented in mh-format(5)) into
+ * an internal form to be later processed by fmt_scan.c.
+ *
+ * What happens here is that the format strings are parsed and an array
+ * of struct format structures are returned. Each format structure is
+ * a single operation interpreted by the the routines in fmt_scan.c.
+ *
+ * There is a NOT a one-to-one correspondence between format strings and
+ * format instructions; some functions have side effects that can result
+ * in multiple instructions being generated. The exact list of instructions
+ * generated by a format string can be seem with the nmh fmtdump utility.
+ *
+ * A list of format instructions can be found in fmt_compile.h.
+ *
+ * If you wish to add a new function, you will need to do the following
+ * things:
+ *
+ * - Add a new instruction to the list of instructions in fmt_compile.h.
+ * Note that test instructions (starting with FT_IF_S_NULL) have special
+ * handling, so if you are NOT writing a test function then you need
+ * to insert it into the list before that _and_ bump all of the
+ * following instruction numbers.
+ *
+ * - Add the function name to the functable[] array below, and write any
+ * special code that your function may require in terms of parsing
+ * (it very well may not need anything).
+ *
+ * - Add the code in fmt_scan.c to handle your new function.
+ *
+ * - Add code to fmtdump.c to display your new function.
+ *
+ * - Document the new function in the mh-format(5) man page.
+ *
*/
#include <h/mh.h>
#include <h/addrsbr.h>
-#include <zotnet/tws/tws.h>
+#include <h/tws.h>
#include <h/fmt_scan.h>
#include <h/fmt_compile.h>
+#include <h/mts.h>
+#include <h/utils.h>
-#ifdef TIME_WITH_SYS_TIME
+#ifdef HAVE_SYS_TIME_H
# include <sys/time.h>
-# include <time.h>
-#else
-# ifdef TM_IN_SYS_TIME
-# include <sys/time.h>
-# else
-# include <time.h>
-# endif
#endif
+#include <time.h>
/*
* hash table for deciding if a component is "interesting"
*/
-struct comp *wantcomp[128];
+static struct comp *wantcomp[128];
static struct format *formatvec; /* array to hold formats */
static struct format *next_fp; /* next free format slot */
#define TF_NOW 6 /* special - get current unix time */
#define TF_EXPR_SV 7 /* like expr but save current str reg */
#define TF_NOP 8 /* like expr but no result */
+#define TF_MYNAME 9 /* special - get current name of user */
+#define TF_MYHOST 10 /* special - get "local" hostname */
+#define TF_LMBOX 11 /* special - get full local mailbox */
/* ftable->flags */
+/* NB that TFL_PUTS is also used to decide whether the test
+ * in a "%<(function)..." should be a string or numeric one.
+ */
#define TFL_PUTS 1 /* implicit putstr if top level */
#define TFL_PUTN 2 /* implicit putnum if top level */
+/*
+ * The functable array maps between the text names of format functions and
+ * the format instructions interpreted by the engine in fmt_scan.c.
+ *
+ * The elements of this structure are as follows:
+ *
+ * name - The name of the function as seen in the format string. This is
+ * what maps a particular function name into a format instruction.
+ * type - The type of argument this function expects. Those types are
+ * listed above (with the TF_ prefix). This affects what gets
+ * placed in the format instruction (the f_un union).
+ * f_type - The instruction corresponding to this function (from the list
+ * in fmt_compile.h).
+ * extra - Used by some functions to provide extra data to the compiler.
+ * Uses include:
+ * - Providing an alternate instruction to combine a load
+ * and test operation (see do_if()).
+ * - Passed in f_value in the format instruction to provide
+ * extra information for the engine (see FT_LV_DAT handling
+ * in fmt_scan.c).
+ * - Provide a hint as to preprocessing that is required for
+ * this instruction (see do_name()).
+ * flags - See the definitions for TFL_PUTS & TFL_PUTN above.
+ */
+
struct ftable {
char *name; /* function name */
char type; /* argument type */
{ "putnum", TF_EXPR, FT_NUM, 0, 0 },
{ "putnumf", TF_EXPR, FT_NUMF, 0, 0 },
{ "putaddr", TF_STR, FT_PUTADDR, 0, 0 },
+ { "putlit", TF_EXPR, FT_STRLIT, 0, 0 },
+ { "zputlit", TF_EXPR, FT_STRLITZ, 0, 0 },
{ "void", TF_NOP, 0, 0, 0 },
{ "comp", TF_COMP, FT_LS_COMP, 0, TFL_PUTS },
{ "dat", TF_NUM, FT_LV_DAT, 0, TFL_PUTN },
{ "strlen", TF_NONE, FT_LV_STRLEN, 0, TFL_PUTN },
{ "me", TF_MYBOX, FT_LS_LIT, 0, TFL_PUTS },
+ { "myname", TF_MYNAME, FT_LS_LIT, 0, TFL_PUTS },
+ { "myhost", TF_MYHOST, FT_LS_LIT, 0, TFL_PUTS },
+ { "localmbox", TF_LMBOX, FT_LS_LIT, 0, TFL_PUTS },
{ "plus", TF_NUM, FT_LV_PLUS_L, 0, TFL_PUTN },
{ "minus", TF_NUM, FT_LV_MINUS_L, 0, TFL_PUTN },
{ "divide", TF_NUM, FT_LV_DIVIDE_L, 0, TFL_PUTN },
{ "ingrp", TF_COMP, FT_LV_INGRPF, FT_PARSEADDR, TFL_PUTN },
{ "nohost", TF_COMP, FT_LV_NOHOSTF, FT_PARSEADDR, TFL_PUTN },
{ "formataddr", TF_EXPR_SV,FT_FORMATADDR, FT_FORMATADDR, 0 },
+ { "concataddr", TF_EXPR_SV,FT_CONCATADDR, FT_FORMATADDR, 0 },
{ "friendly", TF_COMP, FT_LS_FRIENDLY, FT_PARSEADDR, TFL_PUTS },
{ "mymbox", TF_COMP, FT_LV_COMPFLAG, FT_MYMBOX, TFL_PUTN },
{ "addtoseq", TF_STR, FT_ADDTOSEQ, 0, 0 },
+ { "unquote", TF_EXPR, FT_LS_UNQUOTE, 0, TFL_PUTS},
+
{ NULL, 0, 0, 0, 0 }
};
+/*
+ * Hash function for component name. The function should be
+ * case independent and probably shouldn't involve a routine
+ * call. This function is pretty good but will not work on
+ * single character component names.
+ */
+#define CHASH(nm) (((((nm)[0]) - ((nm)[1])) & 0x1f) + (((nm)[2]) & 0x5f))
+
+/*
+ * Find a component in the hash table.
+ */
+#define FINDCOMP(comp,name) \
+ for (comp = wantcomp[CHASH(name)]; \
+ comp && strcmp(comp->c_name,name); \
+ comp = comp->c_next) \
+ ;
+
/* Add new component to the hash table */
-#define NEWCOMP(cm,name)\
+#define NEWCOMP(cm,name) do { \
cm = ((struct comp *) calloc(1, sizeof (struct comp)));\
- cm->c_name = name;\
+ cm->c_name = getcpy(name);\
+ cm->c_refcount++;\
ncomp++;\
i = CHASH(name);\
cm->c_next = wantcomp[i];\
- wantcomp[i] = cm;
+ wantcomp[i] = cm; \
+ } while (0)
#define NEWFMT (next_fp++)
-#define NEW(type,fill,wid)\
- fp=NEWFMT; fp->f_type=(type); fp->f_fill=(fill); fp->f_width=(wid);
+#define NEW(type,fill,wid) do {\
+ fp=NEWFMT; fp->f_type=(type); fp->f_fill=(fill); fp->f_width=(wid); \
+ } while (0)
/* Add (possibly new) component to the hash table */
-#define ADDC(name)\
+#define ADDC(name) do { \
FINDCOMP(cm, name);\
if (!cm) {\
NEWCOMP(cm,name);\
}\
- fp->f_comp = cm;
+ fp->f_comp = cm; \
+ fp->f_flags |= FF_COMPREF; \
+ cm->c_refcount++; \
+ } while (0)
-#define LV(type, value) NEW(type,0,0); fp->f_value = (value);
-#define LS(type, str) NEW(type,0,0); fp->f_text = (str);
+#define LV(type, value) do { NEW(type,0,0); fp->f_value = (value); } while (0)
+#define LS(type, str) do { NEW(type,0,0); fp->f_text = getcpy(str); fp->f_flags |= FF_STRALLOC; } while (0)
-#define PUTCOMP(comp) NEW(FT_COMP,0,0); ADDC(comp);
-#define PUTLIT(str) NEW(FT_LIT,0,0); fp->f_text = (str);
-#define PUTC(c) NEW(FT_CHAR,0,0); fp->f_char = (c);
+#define PUTCOMP(comp) do { NEW(FT_COMP,0,0); ADDC(comp); } while (0)
+#define PUTLIT(str) do { NEW(FT_LIT,0,0); fp->f_text = getcpy(str); fp->f_flags |= FF_STRALLOC; } while (0)
+#define PUTC(c) do { NEW(FT_CHAR,0,0); fp->f_char = (c); } while (0)
static char *format_string;
-static char *usr_fstring; /* for CERROR */
+static unsigned char *usr_fstring; /* for CERROR */
#define CERROR(str) compile_error (str, cp)
/*
- * external prototypes
- */
-extern char *getusername(void);
-
-/*
* static prototypes
*/
static struct ftable *lookup(char *);
static char *do_expr (char *, int);
static char *do_loop(char *);
static char *do_if(char *);
+static void free_component(struct comp *);
+static void free_comptable(void);
+/*
+ * Lookup a function name in the functable
+ */
static struct ftable *
lookup(char *name)
{
*/
int
-fmt_compile(char *fstring, struct format **fmt)
+fmt_compile(char *fstring, struct format **fmt, int reset_comptable)
{
register char *cp;
- int i;
+ size_t i;
+ static int comptable_initialized = 0;
- if (format_string)
- free (format_string);
format_string = getcpy (fstring);
usr_fstring = fstring;
- /* init the component hash table. */
- for (i = 0; i < sizeof(wantcomp)/sizeof(wantcomp[0]); i++)
- wantcomp[i] = 0;
+ if (reset_comptable || !comptable_initialized) {
+ free_comptable();
+ comptable_initialized = 1;
+ }
memset((char *) &fmt_mnull, 0, sizeof(fmt_mnull));
* normal processing.
*/
i = strlen(fstring)/2 + 1;
+ if (i==1) i++;
next_fp = formatvec = (struct format *)calloc ((size_t) i,
sizeof(struct format));
if (next_fp == NULL)
adios (NULL, "unable to allocate format storage");
- ncomp = 0;
infunction = 0;
cp = compile(format_string);
LV(FT_DONE, 0); /* really done */
*fmt = formatvec;
+ free(format_string);
return (ncomp);
}
}
+/*
+ * Process functions & components (handle field width here as well
+ */
static char *
do_spec(char *sp)
{
return (cp);
}
+/*
+ * Process a component name. Normally this involves generating an FT_COMP
+ * instruction for the specified component. If preprocess is set, then we
+ * do some extra processing.
+ */
static char *
do_name(char *sp, int preprocess)
{
if (cm->c_type & CT_ADDR) {
CERROR("component used as both date and address");
}
- if (! (cm->c_type & CT_DATE)) {
- cm->c_tws = (struct tws *)
- calloc((size_t) 1, sizeof(*cm->c_tws));
- fp->f_type = preprocess;
- PUTCOMP(sp);
- cm->c_type |= CT_DATE;
- }
+ cm->c_tws = (struct tws *)
+ calloc((size_t) 1, sizeof(*cm->c_tws));
+ fp->f_type = preprocess;
+ PUTCOMP(sp);
+ cm->c_type |= CT_DATE;
break;
case FT_MYMBOX:
ismymbox ((struct mailname *) 0);
primed++;
}
- cm->c_type |= CT_MYMBOX;
/* fall through */
case FT_PARSEADDR:
if (cm->c_type & CT_DATE) {
CERROR("component used as both date and address");
}
- if (! (cm->c_type & CT_ADDRPARSE)) {
- cm->c_mn = &fmt_mnull;
- fp->f_type = preprocess;
- PUTCOMP(sp);
- cm->c_type |= (CT_ADDR | CT_ADDRPARSE);
- }
+ cm->c_mn = &fmt_mnull;
+ fp->f_type = preprocess;
+ PUTCOMP(sp);
+ cm->c_type |= CT_ADDR;
break;
case FT_FORMATADDR:
return (cp);
}
+/*
+ * Generate one or more instructions corresponding to the named function.
+ * The different type of function arguments are handled here.
+ */
static char *
do_func(char *sp)
{
LS(t->f_type, getusername());
break;
+ case TF_MYNAME:
+ LS(t->f_type, getfullname());
+ break;
+
+ case TF_MYHOST:
+ LS(t->f_type, LocalName(0));
+ break;
+
+ case TF_LMBOX:
+ LS(t->f_type, getlocalmbox());
+ break;
+
case TF_NOW:
LV(t->f_type, time((time_t *) 0));
break;
return (cp);
}
+/*
+ * Handle an expression as an argument. Basically we call one of do_name(),
+ * do_func(), or do_if()
+ */
static char *
do_expr (char *sp, int preprocess)
{
return (cp);
}
+/*
+ * I am guessing this was for some kind of loop statement, which would have
+ * looked like %[ .... %]. It looks like the way this would have worked
+ * is that the format engine would have seen that FT_DONE had a 1 in the
+ * f_un.f_un_value and then decided whether or not to continue the loop.
+ * There is no support for this in the format engine, so right now if
+ * you try using it you will reach the FT_DONE and simply stop. I'm leaving
+ * this here in case someone wants to continue the work.
+ *
+ * Okay, got some more information on this from John L. Romine! From an
+ * email he sent to the nmh-workers mailing list on December 2, 2010, he
+ * explains it thusly:
+ *
+ * In this case (scan, formatsbr) it has to do with an extension to
+ * the mh-format syntax to allow for looping.
+ *
+ * The scan format is processed once for each message. Those #ifdef
+ * JLR changes allowed for the top part of the format file to be
+ * processed once, then a second, looping part to be processed
+ * once per message. As I recall, there were new mh-format escape
+ * sequences to delimit the loop. This would have allowed for things
+ * like per-format column headings in the scan output.
+ *
+ * Since existing format files didn't include the scan listing
+ * header (it was hard-coded in scan.c) it would not have been
+ * backward-compatible. All existing format files (including any
+ * local ones) would have needed to be changed to include the format
+ * codes for a header. The practice at the time was not to introduce
+ * incompatible changes in a minor release, and I never managed to
+ * put out a newer major release.
+ *
+ * I can see how this would work, and I suspect part of the motivation was
+ * because the format compiler routines (at the time) couldn't really be
+ * called multiple times on the same message because the memory management
+ * was so lousy. That's been reworked and things are now a lot cleaner,
+ * so I suspect if we're going to allow a format string to be used for the
+ * scan header it might be simpler to have a separate format string just
+ * for the header. But I'll leave this code in for now just in case we
+ * decide that we want some kind of looping support.
+ */
static char *
do_loop(char *sp)
{
return cp;
}
+/*
+ * Handle an if-elsif-endif statement. Note here that the branching
+ * is handled by the f_skip member of the struct format (which is really
+ * just f_width overloaded). This number controls how far to move forward
+ * (or back) in the format instruction array.
+ */
static char *
do_if(char *sp)
{
if (ftbl->f_type >= IF_FUNCS)
fp->f_type = ftbl->extra;
else {
- LV (FT_IF_V_NE, 0);
+ /* Put out a string test or a value test depending
+ * on what this function's return type is.
+ */
+ if (ftbl->flags & TFL_PUTS) {
+ LV (FT_IF_S, 0);
+ } else {
+ LV (FT_IF_V_NE, 0);
+ }
}
}
else {
return (cp);
}
+
+/*
+ * Free a set of format instructions.
+ *
+ * What we do here is:
+ *
+ * - Iterate through the list of format instructions, freeing any references
+ * to allocated memory in each instruction.
+ * - Free component references.
+ * - If requested, reset the component hash table; that will also free any
+ * references to components stored there.
+ *
+ */
+
+void
+fmt_free(struct format *fmt, int reset_comptable)
+{
+ struct format *fp = fmt;
+
+ if (fp) {
+ while (! (fp->f_type == FT_DONE && fp->f_value == 0)) {
+ if (fp->f_flags & FF_STRALLOC)
+ free(fp->f_text);
+ if (fp->f_flags & FF_COMPREF)
+ free_component(fp->f_comp);
+ fp++;
+ }
+ free(fmt);
+ }
+
+ if (reset_comptable)
+ free_comptable();
+}
+
+/*
+ * Find a component in our hash table. This is just a public interface to
+ * the FINDCOMP macro, so we don't have to expose our hash table.
+ */
+
+struct comp *
+fmt_findcomp(char *component)
+{
+ struct comp *cm;
+
+ FINDCOMP(cm, component);
+
+ return cm;
+}
+
+/*
+ * Like fmt_findcomp, but case-insensitive.
+ */
+
+struct comp *
+fmt_findcasecomp(char *component)
+{
+ struct comp *cm;
+
+ for (cm = wantcomp[CHASH(component)]; cm; cm = cm->c_next)
+ if (mh_strcasecmp(component, cm->c_name) == 0)
+ break;
+
+ return cm;
+}
+
+/*
+ * Add an entry to the component hash table
+ *
+ * Returns true if the component was added, 0 if it already existed.
+ *
+ */
+
+int
+fmt_addcompentry(char *component)
+{
+ struct comp *cm;
+ int i;
+
+ FINDCOMP(cm, component);
+
+ if (cm)
+ return 0;
+
+ NEWCOMP(cm, component);
+
+ /*
+ * ncomp is really meant for fmt_compile() and this function is
+ * meant to be used outside of it. So decrement it just to be safe
+ * (internal callers should be using NEWCOMP()).
+ */
+
+ ncomp--;
+
+ return 1;
+}
+
+/*
+ * Add a string to a component hash table entry.
+ *
+ * Note the special handling for components marked with CT_ADDR. The comments
+ * in fmt_scan.h explain this in more detail.
+ */
+
+int
+fmt_addcomptext(char *component, char *text)
+{
+ int i, found = 0, bucket = CHASH(component);
+ struct comp *cptr = wantcomp[bucket];
+ char *cp;
+
+ while (cptr) {
+ if (mh_strcasecmp(component, cptr->c_name) == 0) {
+ found++;
+ if (! cptr->c_text) {
+ cptr->c_text = getcpy(text);
+ } else {
+ i = strlen(cp = cptr->c_text) - 1;
+ if (cp[i] == '\n') {
+ if (cptr->c_type & CT_ADDR) {
+ cp[i] = '\0';
+ cp = add(",\n\t", cp);
+ } else {
+ cp = add("\t", cp);
+ }
+ }
+ cptr->c_text = add(text, cp);
+ }
+ }
+ cptr = cptr->c_next;
+ }
+
+ return found ? bucket : -1;
+}
+
+/*
+ * Append text to a component we've already found. See notes in fmt_scan.h
+ * for more information.
+ */
+
+void
+fmt_appendcomp(int bucket, char *component, char *text)
+{
+ struct comp *cptr;
+
+ if (bucket != -1) {
+ for (cptr = wantcomp[bucket]; cptr; cptr = cptr->c_next)
+ if (mh_strcasecmp(component, cptr->c_name) == 0)
+ cptr->c_text = add(text, cptr->c_text);
+ }
+}
+
+/*
+ * Free and reset our component hash table
+ */
+
+static void
+free_comptable(void)
+{
+ unsigned int i;
+ struct comp *cm, *cm2;
+
+ for (i = 0; i < sizeof(wantcomp)/sizeof(wantcomp[0]); i++) {
+ cm = wantcomp[i];
+ while (cm != NULL) {
+ cm2 = cm->c_next;
+ free_component(cm);
+ cm = cm2;
+ }
+ wantcomp[i] = 0;
+ }
+
+ ncomp = 0;
+}
+
+/*
+ * Decrement the reference count of a component structure. If it reaches
+ * zero, free it
+ */
+
+static void
+free_component(struct comp *cm)
+{
+ if (--cm->c_refcount <= 0) {
+ /* Shouldn't ever be NULL, but just in case ... */
+ if (cm->c_name)
+ free(cm->c_name);
+ if (cm->c_text)
+ free(cm->c_text);
+ if (cm->c_type & CT_DATE)
+ free(cm->c_tws);
+ if (cm->c_type & CT_ADDR && cm->c_mn && cm->c_mn != &fmt_mnull)
+ mnfree(cm->c_mn);
+ free(cm);
+ }
+}