3 * mf.c -- mail filter subroutines
7 * This code is Copyright (c) 2002, by the authors of nmh. See the
8 * COPYRIGHT file in the root directory of the nmh distribution for
9 * complete copyright information.
20 static char *getcpy (char *);
21 static char *add (char *, char *);
22 static void compress (char *, char *);
23 static int isat (char *);
24 static int parse_address (void);
25 static int phrase (char *);
26 static int route_addr (char *);
27 static int local_part (char *);
28 static int domain (char *);
29 static int route (char *);
30 static int my_lex (char *);
39 /* causes compiles to blow up because the symbol _cleanup is undefined
40 where did this ever come from? */
46 p = mh_xmalloc ((size_t) (strlen (s) + 2));
53 add (char *s1, char *s2)
60 p = mh_xmalloc ((size_t) (strlen (s1) + strlen (s2) + 2));
61 sprintf (p, "%s%s", s2, s1);
69 return (strncmp (string, "From ", 5) == 0
70 || strncmp (string, ">From ", 6) == 0);
75 lequal (char *a, char *b)
81 char c1 = islower (*a) ? toupper (*a) : *a;
82 char c2 = islower (*b) ? toupper (*b) : *b;
92 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
93 * addresses, so for each list of addresses we see if we can find some
94 * character to give us a hint.
98 #define CHKADR 0 /* undertermined address style */
99 #define UNIXDR 1 /* UNIX-style address */
100 #define ARPADR 2 /* ARPAnet-style address */
103 static char *punctuators = ";<>.()[]";
104 static char *vp = NULL;
105 static char *tp = NULL;
107 static struct adrx adrxs1;
111 seekadrx (char *addrs)
113 static int state = CHKADR;
115 register struct adrx *adrxp;
118 for (state = UNIXDR, cp = addrs; *cp; cp++)
119 if (strchr(punctuators, *cp)) {
126 adrxp = uucpadrx (addrs);
131 adrxp = getadrx (addrs);
143 * uucpadrx() implements a partial UUCP-style address parser. It's based
144 * on the UUCP notion that addresses are separated by spaces or commas.
149 uucpadrx (char *addrs)
151 register char *cp, *wp, *xp, *yp, *zp;
152 register struct adrx *adrxp = &adrxs1;
155 vp = tp = getcpy (addrs);
156 compress (addrs, vp);
165 for (cp = tp; isspace (*cp); cp++)
173 if ((wp = strchr(cp, ',')) == NULL) {
174 if ((wp = strchr(cp, ' ')) != NULL) {
176 while (isspace (*xp))
178 if (*xp != 0 && isat (--xp)) {
180 while (isspace (*yp))
183 if ((zp = strchr(yp, ' ')) != NULL)
202 adrxp->text = getcpy (cp);
204 adrxp->host = adrxp->path = NULL;
205 if ((wp = strrchr(cp, '@')) != NULL) {
207 adrxp->host = *wp ? wp : NULL;
210 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
213 adrxp->host = wp + 3;
216 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
224 compress (char *fp, char *tp)
226 register char c, *cp;
228 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
236 if (c == ' ' && cp < tp)
244 return (strncmp (p, " AT ", 4)
245 && strncmp (p, " At ", 4)
246 && strncmp (p, " aT ", 4)
247 && strncmp (p, " at ", 4) ? FALSE : TRUE);
253 * getadrx() implements a partial 822-style address parser. The parser
254 * is neither complete nor correct. It does however recognize nearly all
255 * of the 822 address syntax. In addition it handles the majority of the
256 * 733 syntax as well. Most problems arise from trying to accomodate both.
258 * In terms of 822, the route-specification in
260 * "<" [route] local-part "@" domain ">"
262 * is parsed and returned unchanged. Multiple at-signs are compressed
263 * via source-routing. Recursive groups are not allowed as per the
266 * In terms of 733, " at " is recognized as equivalent to "@".
268 * In terms of both the parser will not complain about missing hosts.
272 * We should not allow addresses like
274 * Marshall T. Rose <MRose@UCI>
276 * but should insist on
278 * "Marshall T. Rose" <MRose@UCI>
280 * Unfortunately, a lot of mailers stupidly let people get away with this.
284 * We should not allow addresses like
288 * but should insist on
292 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
297 * We should not allow addresses like
299 * @UCI:MRose@UCI-750a
301 * but should insist on
303 * Marshall Rose <@UCI:MRose@UCI-750a>
305 * Unfortunately, a lot of mailers stupidly do this.
329 static struct specials special[] = {
346 static int glevel = 0;
347 static int ingrp = 0;
348 static int last_lex = LX_END;
350 static char *dp = NULL;
351 static char *cp = NULL;
352 static char *ap = NULL;
353 static char *pers = NULL;
354 static char *mbox = NULL;
355 static char *host = NULL;
356 static char *path = NULL;
357 static char *grp = NULL;
358 static char *note = NULL;
359 static char err[BUFSIZ];
360 static char adr[BUFSIZ];
362 static struct adrx adrxs2;
366 getadrx (char *addrs)
369 register struct adrx *adrxp = &adrxs2;
383 pers = mbox = host = path = grp = note = NULL;
387 dp = cp = getcpy (addrs ? addrs : "");
397 switch (parse_address ()) {
409 default: /* catch trailing comments */
434 while (isspace (*ap))
437 sprintf (adr, "%.*s", cp - ap, ap);
440 bp = adr + strlen (adr) - 1;
441 if (*bp == ',' || *bp == ';' || *bp == '\n')
450 adrxp->ingrp = ingrp;
452 adrxp->err = err[0] ? err : NULL;
465 switch (my_lex (buffer)) {
468 pers = getcpy (buffer);
473 strcpy (err, "extraneous semi-colon");
486 case LX_LBRK: /* sigh (2) */
489 case LX_AT: /* sigh (3) */
491 if (route_addr (buffer) == NOTOK)
493 return OK; /* why be choosy? */
496 sprintf (err, "illegal address construct (%s)", buffer);
500 switch (my_lex (buffer)) {
503 pers = add (buffer, add (" ", pers));
504 more_phrase: ; /* sigh (1) */
505 if (phrase (buffer) == NOTOK)
511 if (route_addr (buffer) == NOTOK)
513 if (last_lex == LX_RBRK)
515 sprintf (err, "missing right-bracket (%s)", buffer);
521 sprintf (err, "nested groups not allowed (%s)", pers);
524 grp = add (": ", pers);
530 switch (my_lex (buffer)) {
532 case LX_END: /* tsk, tsk */
541 return parse_address ();
545 case LX_DOT: /* sigh (1) */
546 pers = add (".", pers);
550 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
562 mbox = add (buffer, pers);
564 if (route_addr (buffer) == NOTOK)
572 if (domain (buffer) == NOTOK)
578 strcpy (err, "extraneous semi-colon");
586 sprintf (err, "junk after local@domain (%s)", buffer);
590 case LX_SEMI: /* no host */
594 if (last_lex == LX_SEMI && glevel-- <= 0) {
595 strcpy (err, "extraneous semi-colon");
603 sprintf (err, "missing mailbox (%s)", buffer);
610 phrase (char *buffer)
613 switch (my_lex (buffer)) {
616 pers = add (buffer, add (" ", pers));
626 route_addr (char *buffer)
628 register char *pp = cp;
630 if (my_lex (buffer) == LX_AT) {
631 if (route (buffer) == NOTOK)
637 if (local_part (buffer) == NOTOK)
642 return domain (buffer);
644 case LX_SEMI: /* if in group */
645 case LX_RBRK: /* no host */
651 sprintf (err, "no at-sign after local-part (%s)", buffer);
658 local_part (char *buffer)
663 switch (my_lex (buffer)) {
666 mbox = add (buffer, mbox);
670 sprintf (err, "no mailbox in local-part (%s)", buffer);
674 switch (my_lex (buffer)) {
676 mbox = add (buffer, mbox);
687 domain (char *buffer)
690 switch (my_lex (buffer)) {
693 host = add (buffer, host);
697 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
701 switch (my_lex (buffer)) {
703 host = add (buffer, host);
706 case LX_AT: /* sigh (0) */
707 mbox = add (host, add ("%", mbox));
725 switch (my_lex (buffer)) {
728 path = add (buffer, path);
732 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
735 switch (my_lex (buffer)) {
737 path = add (buffer, path);
739 switch (my_lex (buffer)) {
744 path = add (buffer, path);
748 sprintf (err, "no at-sign found for next domain in route (%s)",
755 case LX_AT: /* XXX */
757 path = add (buffer, path);
761 path = add (buffer, path);
765 sprintf (err, "no colon found to terminate route (%s)", buffer);
773 my_lex (char *buffer)
775 /* buffer should be at least BUFSIZ bytes long */
777 register char c, *bp;
779 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
780 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
785 return (last_lex = LX_END);
793 return (last_lex = LX_END);
802 return (last_lex = LX_ERR);
805 if ((c = *cp++) == 0) {
807 return (last_lex = LX_ERR);
820 note = note ? add (buffer, add (" ", note))
822 return my_lex (buffer);
833 return (last_lex = LX_ERR);
836 if ((c = *cp++) == 0) {
838 return (last_lex = LX_ERR);
846 return (last_lex = LX_QSTR);
856 return (last_lex = LX_ERR);
859 if ((c = *cp++) == 0) {
861 return (last_lex = LX_ERR);
869 return (last_lex = LX_DLIT);
875 for (i = 0; special[i].lx_chr != 0; i++)
876 if (c == special[i].lx_chr)
877 return (last_lex = special[i].lx_val);
880 return (last_lex = LX_ERR);
883 if ((c = *cp++) == 0)
885 for (i = 0; special[i].lx_chr != 0; i++)
886 if (c == special[i].lx_chr)
888 if (iscntrl (c) || isspace (c))
898 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
903 /* Out of buffer space. *bp is the last byte in the buffer */
905 return (last_lex = LX_ERR);
910 legal_person (char *p)
914 static char buffer[BUFSIZ];
918 for (cp = p; *cp; cp++)
919 for (i = 0; special[i].lx_chr; i++)
920 if (*cp == special[i].lx_chr) {
921 sprintf (buffer, "\"%s\"", p);
930 mfgets (FILE *in, char **bp)
933 register char *cp, *dp, *ep;
935 static char *pp = NULL;
938 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
940 for (ep = (cp = pp) + len - 2;;) {
941 switch (i = getc (in)) {
959 if (cp == pp) /* end of headers, gobble it */
961 switch (i = getc (in)) {
962 default: /* end of line */
963 case '\n': /* end of headers, save for next call */
967 case ' ': /* continue headers */
971 } /* fall into default case */
978 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
984 cp += dp - pp, ep = (pp = cp) + len - 2;