2 * mf.c -- mail filter subroutines
4 * This code is Copyright (c) 2002, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
17 static char *getcpy (char *);
18 static void compress (char *, unsigned char *);
19 static int isat (char *);
20 static int parse_address (void);
21 static int phrase (char *);
22 static int route_addr (char *);
23 static int local_part (char *);
24 static int domain (char *);
25 static int route (char *);
26 static int my_lex (char *);
35 /* causes compiles to blow up because the symbol _cleanup is undefined
36 * where did this ever come from? */
42 p = mh_xmalloc ((size_t) (strlen (s) + 2));
51 return (strncmp (string, "From ", 5) == 0
52 || strncmp (string, ">From ", 6) == 0);
57 lequal (unsigned char *a, unsigned char *b)
63 char c1 = islower (*a) ? toupper (*a) : *a;
64 char c2 = islower (*b) ? toupper (*b) : *b;
74 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
75 * addresses, so for each list of addresses we see if we can find some
76 * character to give us a hint.
80 #define CHKADR 0 /* undertermined address style */
81 #define UNIXDR 1 /* UNIX-style address */
82 #define ARPADR 2 /* ARPAnet-style address */
85 static char *punctuators = ";<>.()[]";
86 static char *vp = NULL;
87 static char *tp = NULL;
89 static struct adrx adrxs1;
93 seekadrx (char *addrs)
95 static int state = CHKADR;
97 register struct adrx *adrxp;
100 for (state = UNIXDR, cp = addrs; *cp; cp++)
101 if (strchr(punctuators, *cp)) {
108 adrxp = uucpadrx (addrs);
113 adrxp = getadrx (addrs);
125 * uucpadrx() implements a partial UUCP-style address parser. It's based
126 * on the UUCP notion that addresses are separated by spaces or commas.
131 uucpadrx (char *addrs)
133 register unsigned char *cp, *wp, *xp, *yp;
135 register struct adrx *adrxp = &adrxs1;
138 vp = tp = getcpy (addrs);
139 compress (addrs, vp);
140 } else if (tp == NULL) {
146 for (cp = tp; isspace (*cp); cp++)
154 if ((wp = strchr(cp, ',')) == NULL) {
155 if ((wp = strchr(cp, ' ')) != NULL) {
157 while (isspace (*xp))
159 if (*xp != 0 && isat (--xp)) {
161 while (isspace (*yp))
164 if ((zp = strchr(yp, ' ')) != NULL)
179 adrxp->text = getcpy (cp);
181 adrxp->host = adrxp->path = NULL;
182 if ((wp = strrchr(cp, '@')) != NULL) {
184 adrxp->host = *wp ? wp : NULL;
186 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
189 adrxp->host = wp + 3;
192 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
200 compress (char *fp, unsigned char *tp)
203 register unsigned char *cp;
205 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
212 if (c == ' ' && cp < tp)
220 return (strncmp (p, " AT ", 4)
221 && strncmp (p, " At ", 4)
222 && strncmp (p, " aT ", 4)
223 && strncmp (p, " at ", 4) ? FALSE : TRUE);
229 * getadrx() implements a partial 822-style address parser. The parser
230 * is neither complete nor correct. It does however recognize nearly all
231 * of the 822 address syntax. In addition it handles the majority of the
232 * 733 syntax as well. Most problems arise from trying to accomodate both.
234 * In terms of 822, the route-specification in
236 * "<" [route] local-part "@" domain ">"
238 * is parsed and returned unchanged. Multiple at-signs are compressed
239 * via source-routing. Recursive groups are not allowed as per the
242 * In terms of 733, " at " is recognized as equivalent to "@".
244 * In terms of both the parser will not complain about missing hosts.
248 * We should not allow addresses like
250 * Marshall T. Rose <MRose@UCI>
252 * but should insist on
254 * "Marshall T. Rose" <MRose@UCI>
256 * Unfortunately, a lot of mailers stupidly let people get away with this.
260 * We should not allow addresses like
264 * but should insist on
268 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
273 * We should not allow addresses like
275 * @UCI:MRose@UCI-750a
277 * but should insist on
279 * Marshall Rose <@UCI:MRose@UCI-750a>
281 * Unfortunately, a lot of mailers stupidly do this.
305 static struct specials special[] = {
322 static int glevel = 0;
323 static int ingrp = 0;
324 static int last_lex = LX_END;
326 static char *dp = NULL;
327 static unsigned char *cp = NULL;
328 static unsigned char *ap = NULL;
329 static char *pers = NULL;
330 static char *mbox = NULL;
331 static char *host = NULL;
332 static char *path = NULL;
333 static char *grp = NULL;
334 static char *note = NULL;
335 static char err[BUFSIZ];
336 static char adr[BUFSIZ];
338 static struct adrx adrxs2;
342 getadrx (char *addrs)
345 register struct adrx *adrxp = &adrxs2;
359 pers = mbox = host = path = grp = note = NULL;
363 dp = cp = getcpy (addrs ? addrs : "");
365 } else if (cp == NULL) {
371 switch (parse_address ()) {
383 default: /* catch trailing comments */
408 while (isspace (*ap))
411 sprintf (adr, "%.*s", (int)(cp - ap), ap);
414 bp = adr + strlen (adr) - 1;
415 if (*bp == ',' || *bp == ';' || *bp == '\n')
424 adrxp->ingrp = ingrp;
426 adrxp->err = err[0] ? err : NULL;
439 switch (my_lex (buffer)) {
442 pers = getcpy (buffer);
447 strcpy (err, "extraneous semi-colon");
460 case LX_LBRK: /* sigh (2) */
463 case LX_AT: /* sigh (3) */
465 if (route_addr (buffer) == NOTOK)
467 return OK; /* why be choosy? */
470 sprintf (err, "illegal address construct (%s)", buffer);
474 switch (my_lex (buffer)) {
477 pers = add (buffer, add (" ", pers));
478 more_phrase: ; /* sigh (1) */
479 if (phrase (buffer) == NOTOK)
485 if (route_addr (buffer) == NOTOK)
487 if (last_lex == LX_RBRK)
489 sprintf (err, "missing right-bracket (%s)", buffer);
495 sprintf (err, "nested groups not allowed (%s)", pers);
498 grp = add (": ", pers);
504 switch (my_lex (buffer)) {
506 case LX_END: /* tsk, tsk */
515 return parse_address ();
519 case LX_DOT: /* sigh (1) */
520 pers = add (".", pers);
524 sprintf (err, "no mailbox in address, only a phrase (%s%s)", pers, buffer);
535 mbox = add (buffer, pers);
537 if (route_addr (buffer) == NOTOK)
545 if (domain (buffer) == NOTOK)
551 strcpy (err, "extraneous semi-colon");
559 sprintf (err, "junk after local@domain (%s)", buffer);
563 case LX_SEMI: /* no host */
567 if (last_lex == LX_SEMI && glevel-- <= 0) {
568 strcpy (err, "extraneous semi-colon");
576 sprintf (err, "missing mailbox (%s)", buffer);
583 phrase (char *buffer)
586 switch (my_lex (buffer)) {
589 pers = add (buffer, add (" ", pers));
599 route_addr (char *buffer)
601 register char *pp = cp;
603 if (my_lex (buffer) == LX_AT) {
604 if (route (buffer) == NOTOK)
610 if (local_part (buffer) == NOTOK)
615 return domain (buffer);
617 case LX_SEMI: /* if in group */
618 case LX_RBRK: /* no host */
624 sprintf (err, "no at-sign after local-part (%s)", buffer);
631 local_part (char *buffer)
636 switch (my_lex (buffer)) {
639 mbox = add (buffer, mbox);
643 sprintf (err, "no mailbox in local-part (%s)", buffer);
647 switch (my_lex (buffer)) {
649 mbox = add (buffer, mbox);
660 domain (char *buffer)
663 switch (my_lex (buffer)) {
666 host = add (buffer, host);
670 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
674 switch (my_lex (buffer)) {
676 host = add (buffer, host);
679 case LX_AT: /* sigh (0) */
680 mbox = add (host, add ("%", mbox));
698 switch (my_lex (buffer)) {
701 path = add (buffer, path);
705 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
708 switch (my_lex (buffer)) {
710 path = add (buffer, path);
712 switch (my_lex (buffer)) {
717 path = add (buffer, path);
721 sprintf (err, "no at-sign found for next domain in route (%s)",
728 case LX_AT: /* XXX */
730 path = add (buffer, path);
734 path = add (buffer, path);
738 sprintf (err, "no colon found to terminate route (%s)", buffer);
746 my_lex (char *buffer)
748 /* buffer should be at least BUFSIZ bytes long */
750 register unsigned char c;
753 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
754 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
759 return (last_lex = LX_END);
767 return (last_lex = LX_END);
776 return (last_lex = LX_ERR);
779 if ((c = *cp++) == 0) {
781 return (last_lex = LX_ERR);
794 note = note ? add (buffer, add (" ", note))
796 return my_lex (buffer);
807 return (last_lex = LX_ERR);
810 if ((c = *cp++) == 0) {
812 return (last_lex = LX_ERR);
820 return (last_lex = LX_QSTR);
830 return (last_lex = LX_ERR);
833 if ((c = *cp++) == 0) {
835 return (last_lex = LX_ERR);
843 return (last_lex = LX_DLIT);
849 for (i = 0; special[i].lx_chr != 0; i++)
850 if (c == special[i].lx_chr)
851 return (last_lex = special[i].lx_val);
854 return (last_lex = LX_ERR);
857 if ((c = *cp++) == 0)
859 for (i = 0; special[i].lx_chr != 0; i++)
860 if (c == special[i].lx_chr)
862 if (iscntrl (c) || isspace (c))
872 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
877 /* Out of buffer space. *bp is the last byte in the buffer */
879 return (last_lex = LX_ERR);
884 legal_person (char *p)
888 static char buffer[BUFSIZ];
892 for (cp = p; *cp; cp++)
893 for (i = 0; special[i].lx_chr; i++)
894 if (*cp == special[i].lx_chr) {
895 sprintf (buffer, "\"%s\"", p);
904 mfgets (FILE *in, char **bp)
907 register char *cp, *dp, *ep;
909 static char *pp = NULL;
912 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
914 for (ep = (cp = pp) + len - 2;;) {
915 switch (i = getc (in)) {
933 if (cp == pp) /* end of headers, gobble it */
935 switch (i = getc (in)) {
936 default: /* end of line */
937 case '\n': /* end of headers, save for next call */
941 case ' ': /* continue headers */
945 } /* fall into default case */
952 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
953 cp += dp - pp, ep = (pp = cp) + len - 2;