3 * mf.c -- mail filter subroutines
15 static char *getcpy (char *);
16 static char *add (char *, char *);
17 static void compress (char *, char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
39 if ((p = malloc ((size_t) (strlen (s) + 2))))
46 add (char *s1, char *s2)
53 if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
54 sprintf (p, "%s%s", s2, s1);
62 return (strncmp (string, "From ", 5) == 0
63 || strncmp (string, ">From ", 6) == 0);
68 lequal (char *a, char *b)
74 char c1 = islower (*a) ? toupper (*a) : *a;
75 char c2 = islower (*b) ? toupper (*b) : *b;
85 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
86 * addresses, so for each list of addresses we see if we can find some
87 * character to give us a hint.
91 #define CHKADR 0 /* undertermined address style */
92 #define UNIXDR 1 /* UNIX-style address */
93 #define ARPADR 2 /* ARPAnet-style address */
96 static char *punctuators = ";<>.()[]";
97 static char *vp = NULL;
98 static char *tp = NULL;
100 static struct adrx adrxs1;
104 seekadrx (char *addrs)
106 static int state = CHKADR;
108 register struct adrx *adrxp;
111 for (state = UNIXDR, cp = addrs; *cp; cp++)
112 if (strchr(punctuators, *cp)) {
119 adrxp = uucpadrx (addrs);
124 adrxp = getadrx (addrs);
136 * uucpadrx() implements a partial UUCP-style address parser. It's based
137 * on the UUCP notion that addresses are separated by spaces or commas.
142 uucpadrx (char *addrs)
144 register char *cp, *wp, *xp, *yp, *zp;
145 register struct adrx *adrxp = &adrxs1;
148 vp = tp = getcpy (addrs);
149 compress (addrs, vp);
158 for (cp = tp; isspace (*cp); cp++)
166 if ((wp = strchr(cp, ',')) == NULL) {
167 if ((wp = strchr(cp, ' ')) != NULL) {
169 while (isspace (*xp))
171 if (*xp != 0 && isat (--xp)) {
173 while (isspace (*yp))
176 if ((zp = strchr(yp, ' ')) != NULL)
195 adrxp->text = getcpy (cp);
197 adrxp->host = adrxp->path = NULL;
198 if ((wp = strrchr(cp, '@')) != NULL) {
200 adrxp->host = *wp ? wp : NULL;
203 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
206 adrxp->host = wp + 3;
209 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
217 compress (char *fp, char *tp)
219 register char c, *cp;
221 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
229 if (c == ' ' && cp < tp)
237 return (strncmp (p, " AT ", 4)
238 && strncmp (p, " At ", 4)
239 && strncmp (p, " aT ", 4)
240 && strncmp (p, " at ", 4) ? FALSE : TRUE);
246 * getadrx() implements a partial 822-style address parser. The parser
247 * is neither complete nor correct. It does however recognize nearly all
248 * of the 822 address syntax. In addition it handles the majority of the
249 * 733 syntax as well. Most problems arise from trying to accomodate both.
251 * In terms of 822, the route-specification in
253 * "<" [route] local-part "@" domain ">"
255 * is parsed and returned unchanged. Multiple at-signs are compressed
256 * via source-routing. Recursive groups are not allowed as per the
259 * In terms of 733, " at " is recognized as equivalent to "@".
261 * In terms of both the parser will not complain about missing hosts.
265 * We should not allow addresses like
267 * Marshall T. Rose <MRose@UCI>
269 * but should insist on
271 * "Marshall T. Rose" <MRose@UCI>
273 * Unfortunately, a lot of mailers stupidly let people get away with this.
277 * We should not allow addresses like
281 * but should insist on
285 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
290 * We should not allow addresses like
292 * @UCI:MRose@UCI-750a
294 * but should insist on
296 * Marshall Rose <@UCI:MRose@UCI-750a>
298 * Unfortunately, a lot of mailers stupidly do this.
322 static struct specials special[] = {
339 static int glevel = 0;
340 static int ingrp = 0;
341 static int last_lex = LX_END;
343 static char *dp = NULL;
344 static char *cp = NULL;
345 static char *ap = NULL;
346 static char *pers = NULL;
347 static char *mbox = NULL;
348 static char *host = NULL;
349 static char *path = NULL;
350 static char *grp = NULL;
351 static char *note = NULL;
352 static char err[BUFSIZ];
353 static char adr[BUFSIZ];
355 static struct adrx adrxs2;
359 getadrx (char *addrs)
362 register struct adrx *adrxp = &adrxs2;
376 pers = mbox = host = path = grp = note = NULL;
380 dp = cp = getcpy (addrs ? addrs : "");
390 switch (parse_address ()) {
402 default: /* catch trailing comments */
427 while (isspace (*ap))
430 sprintf (adr, "%.*s", cp - ap, ap);
433 bp = adr + strlen (adr) - 1;
434 if (*bp == ',' || *bp == ';' || *bp == '\n')
443 adrxp->ingrp = ingrp;
445 adrxp->err = err[0] ? err : NULL;
458 switch (my_lex (buffer)) {
461 pers = getcpy (buffer);
466 strcpy (err, "extraneous semi-colon");
479 case LX_LBRK: /* sigh (2) */
482 case LX_AT: /* sigh (3) */
484 if (route_addr (buffer) == NOTOK)
486 return OK; /* why be choosy? */
489 sprintf (err, "illegal address construct (%s)", buffer);
493 switch (my_lex (buffer)) {
496 pers = add (buffer, add (" ", pers));
497 more_phrase: ; /* sigh (1) */
498 if (phrase (buffer) == NOTOK)
504 if (route_addr (buffer) == NOTOK)
506 if (last_lex == LX_RBRK)
508 sprintf (err, "missing right-bracket (%s)", buffer);
514 sprintf (err, "nested groups not allowed (%s)", pers);
517 grp = add (": ", pers);
523 switch (my_lex (buffer)) {
525 case LX_END: /* tsk, tsk */
534 return parse_address ();
538 case LX_DOT: /* sigh (1) */
539 pers = add (".", pers);
543 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
555 mbox = add (buffer, pers);
557 if (route_addr (buffer) == NOTOK)
565 if (domain (buffer) == NOTOK)
571 strcpy (err, "extraneous semi-colon");
579 sprintf (err, "junk after local@domain (%s)", buffer);
583 case LX_SEMI: /* no host */
587 if (last_lex == LX_SEMI && glevel-- <= 0) {
588 strcpy (err, "extraneous semi-colon");
596 sprintf (err, "missing mailbox (%s)", buffer);
603 phrase (char *buffer)
606 switch (my_lex (buffer)) {
609 pers = add (buffer, add (" ", pers));
619 route_addr (char *buffer)
621 register char *pp = cp;
623 if (my_lex (buffer) == LX_AT) {
624 if (route (buffer) == NOTOK)
630 if (local_part (buffer) == NOTOK)
635 return domain (buffer);
637 case LX_SEMI: /* if in group */
638 case LX_RBRK: /* no host */
644 sprintf (err, "no at-sign after local-part (%s)", buffer);
651 local_part (char *buffer)
656 switch (my_lex (buffer)) {
659 mbox = add (buffer, mbox);
663 sprintf (err, "no mailbox in local-part (%s)", buffer);
667 switch (my_lex (buffer)) {
669 mbox = add (buffer, mbox);
680 domain (char *buffer)
683 switch (my_lex (buffer)) {
686 host = add (buffer, host);
690 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
694 switch (my_lex (buffer)) {
696 host = add (buffer, host);
699 case LX_AT: /* sigh (0) */
700 mbox = add (host, add ("%", mbox));
718 switch (my_lex (buffer)) {
721 path = add (buffer, path);
725 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
728 switch (my_lex (buffer)) {
730 path = add (buffer, path);
732 switch (my_lex (buffer)) {
737 path = add (buffer, path);
741 sprintf (err, "no at-sign found for next domain in route (%s)",
748 case LX_AT: /* XXX */
750 path = add (buffer, path);
754 path = add (buffer, path);
758 sprintf (err, "no colon found to terminate route (%s)", buffer);
766 my_lex (char *buffer)
769 register char c, *bp;
774 return (last_lex = LX_END);
782 return (last_lex = LX_END);
786 for (*bp++ = c, i = 0;;)
790 return (last_lex = LX_ERR);
793 if ((c = *cp++) == 0) {
795 return (last_lex = LX_ERR);
808 note = note ? add (buffer, add (" ", note))
810 return my_lex (buffer);
819 return (last_lex = LX_ERR);
822 if ((c = *cp++) == 0) {
824 return (last_lex = LX_ERR);
832 return (last_lex = LX_QSTR);
840 return (last_lex = LX_ERR);
843 if ((c = *cp++) == 0) {
845 return (last_lex = LX_ERR);
853 return (last_lex = LX_DLIT);
858 for (i = 0; special[i].lx_chr != 0; i++)
859 if (c == special[i].lx_chr)
860 return (last_lex = special[i].lx_val);
863 return (last_lex = LX_ERR);
866 if ((c = *cp++) == 0)
868 for (i = 0; special[i].lx_chr != 0; i++)
869 if (c == special[i].lx_chr)
871 if (iscntrl (c) || isspace (c))
881 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
888 legal_person (char *p)
892 static char buffer[BUFSIZ];
896 for (cp = p; *cp; cp++)
897 for (i = 0; special[i].lx_chr; i++)
898 if (*cp == special[i].lx_chr) {
899 sprintf (buffer, "\"%s\"", p);
908 mfgets (FILE *in, char **bp)
911 register char *cp, *dp, *ep;
913 static char *pp = NULL;
916 if (!(pp = malloc ((size_t) (len = BUFSIZ))))
919 for (ep = (cp = pp) + len - 2;;) {
920 switch (i = getc (in)) {
938 if (cp == pp) /* end of headers, gobble it */
940 switch (i = getc (in)) {
941 default: /* end of line */
942 case '\n': /* end of headers, save for next call */
946 case ' ': /* continue headers */
950 } /* fall into default case */
957 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
963 cp += dp - pp, ep = (pp = cp) + len - 2;