1 /* mf.c - mail filter subroutines */
3 static char ident[] = "@(#)$Id: mf.c,v 1.10 1992/12/15 00:20:22 jromine Exp $";
10 static int isat(), parse_address(), phrase();
11 static int route_addr(), local_part(), domain(), route();
13 static void compress();
16 static char *getcpy (s)
21 if (!s) { _cleanup(); abort(); for(;;) pause();}
22 if ((p = malloc ((unsigned) (strlen (s) + 2))) != NULL)
28 static char *add (s1, s2)
37 if ((p = malloc ((unsigned) (strlen (s1) + strlen (s2) + 2))) != NULL)
38 (void) sprintf (p, "%s%s", s2, s1);
46 register char *string;
48 return (strncmp (string, "From ", 5) == 0
49 || strncmp (string, ">From ", 6) == 0);
61 char c1 = islower (*a) ? toupper (*a) : *a;
62 char c2 = islower (*b) ? toupper (*b) : *b;
74 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
75 * addresses, so for each list of addresses we see if we can find some
76 * character to give us a hint.
81 #define CHKADR 0 /* undertermined address style */
82 #define UNIXDR 1 /* UNIX-style address */
83 #define ARPADR 2 /* ARPAnet-style address */
86 static char *punctuators = ";<>.()[]";
87 static char *vp = NULL;
88 static char *tp = NULL;
90 static struct adrx adrxs1;
94 struct adrx *seekadrx (addrs)
97 static int state = CHKADR;
99 register struct adrx *adrxp;
102 for (state = UNIXDR, cp = addrs; *cp; cp++)
103 if (index (punctuators, *cp)) {
110 adrxp = uucpadrx (addrs);
115 adrxp = getadrx (addrs);
129 * uucpadrx() implements a partial UUCP-style address parser. It's based
130 * on the UUCP notion that addresses are separated by spaces or commas.
135 struct adrx *uucpadrx (addrs)
136 register char *addrs;
143 register struct adrx *adrxp = &adrxs1;
146 vp = tp = getcpy (addrs);
147 compress (addrs, vp);
156 for (cp = tp; isspace (*cp); cp++)
166 if ((wp = index (cp, ',')) == NULL)
167 if ((wp = index (cp, ' ')) != NULL) {
169 while (isspace (*xp))
171 if (*xp != 0 && isat (--xp)) {
173 while (isspace (*yp))
176 if ((zp = index (yp, ' ')) != NULL)
192 free (adrxp -> text);
193 adrxp -> text = getcpy (cp);
195 adrxp -> host = adrxp -> path = NULL;
196 if ((wp = rindex (cp, '@')) != NULL) {
198 adrxp -> host = *wp ? wp : NULL;
201 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
204 adrxp -> host = wp + 3;
207 adrxp -> pers = adrxp -> grp = adrxp -> note = adrxp -> err = NULL;
215 static void compress (fp, tp)
222 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
230 if (c == ' ' && cp < tp)
238 return (strncmp (p, " AT ", 4)
239 && strncmp (p, " At ", 4)
240 && strncmp (p, " aT ", 4)
241 && strncmp (p, " at ", 4) ? FALSE : TRUE);
248 * getadrx() implements a partial 822-style address parser. The parser
249 * is neither complete nor correct. It does however recognize nearly all
250 * of the 822 address syntax. In addition it handles the majority of the
251 * 733 syntax as well. Most problems arise from trying to accomodate both.
253 * In terms of 822, the route-specification in
255 * "<" [route] local-part "@" domain ">"
257 * is parsed and returned unchanged. Multiple at-signs are compressed
258 * via source-routing. Recursive groups are not allowed as per the
261 * In terms of 733, " at " is recognized as equivalent to "@".
263 * In terms of both the parser will not complain about missing hosts.
267 * We should not allow addresses like
269 * Marshall T. Rose <MRose@UCI>
271 * but should insist on
273 * "Marshall T. Rose" <MRose@UCI>
275 * Unfortunately, a lot of mailers stupidly let people get away with this.
279 * We should not allow addresses like
283 * but should insist on
287 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
292 * We should not allow addresses like
294 * @UCI:MRose@UCI-750a
296 * but should insist on
298 * Marshall Rose <@UCI:MRose@UCI-750a>
300 * Unfortunately, a lot of mailers stupidly do this.
343 static int glevel = 0;
344 static int ingrp = 0;
345 static int last_lex = LX_END;
347 static char *dp = NULL;
348 static char *cp = NULL;
349 static char *ap = NULL;
350 static char *pers = NULL;
351 static char *mbox = NULL;
352 static char *host = NULL;
353 static char *path = NULL;
354 static char *grp = NULL;
355 static char *note = NULL;
356 static char err[BUFSIZ];
357 static char adr[BUFSIZ];
359 static struct adrx adrxs2;
363 struct adrx *getadrx (addrs)
364 register char *addrs;
367 register struct adrx *adrxp = &adrxs2;
381 pers = mbox = host = path = grp = note = NULL;
385 dp = cp = getcpy (addrs ? addrs : "");
395 switch (parse_address ()) {
407 default: /* catch trailing comments */
432 while (isspace (*ap))
435 (void) sprintf (adr, "%.*s", cp - ap, ap);
437 (void) strcpy (adr, ap);
438 bp = adr + strlen (adr) - 1;
439 if (*bp == ',' || *bp == ';' || *bp == '\n')
445 adrxp -> pers = pers;
446 adrxp -> mbox = mbox;
447 adrxp -> host = host;
448 adrxp -> path = path;
450 adrxp -> ingrp = ingrp;
451 adrxp -> note = note;
452 adrxp -> err = err[0] ? err : NULL;
459 static int parse_address () {
464 switch (my_lex (buffer)) {
467 pers = getcpy (buffer);
472 (void) strcpy (err, "extraneous semi-colon");
485 case LX_LBRK: /* sigh (2) */
488 case LX_AT: /* sigh (3) */
490 if (route_addr (buffer) == NOTOK)
492 return OK; /* why be choosy? */
495 (void) sprintf (err, "illegal address construct (%s)", buffer);
501 switch (my_lex (buffer)) {
504 pers = add (buffer, add (" ", pers));
505 more_phrase: ; /* sigh (1) */
506 if (phrase (buffer) == NOTOK)
512 if (route_addr (buffer) == NOTOK)
514 if (last_lex == LX_RBRK)
516 (void) sprintf (err, "missing right-bracket (%s)", buffer);
522 (void) sprintf (err, "nested groups not allowed (%s)",
526 grp = add (": ", pers);
532 switch (my_lex (buffer)) {
534 case LX_END: /* tsk, tsk */
543 return parse_address ();
547 case LX_DOT: /* sigh (1) */
548 pers = add (".", pers);
553 "no mailbox in address, only a phrase (%s%s)",
567 mbox = add (buffer, pers);
569 if (route_addr (buffer) == NOTOK)
577 if (domain (buffer) == NOTOK)
583 (void) strcpy (err, "extraneous semi-colon");
591 (void) sprintf (err, "junk after local@domain (%s)",
596 case LX_SEMI: /* no host */
600 if (last_lex == LX_SEMI && glevel-- <= 0) {
601 (void) strcpy (err, "extraneous semi-colon");
609 (void) sprintf (err, "missing mailbox (%s)", buffer);
616 static int phrase (buffer)
617 register char *buffer;
620 switch (my_lex (buffer)) {
623 pers = add (buffer, add (" ", pers));
633 static int route_addr (buffer)
634 register char *buffer;
636 register char *pp = cp;
638 if (my_lex (buffer) == LX_AT) {
639 if (route (buffer) == NOTOK)
645 if (local_part (buffer) == NOTOK)
650 return domain (buffer);
652 case LX_SEMI: /* if in group */
653 case LX_RBRK: /* no host */
659 (void) sprintf (err, "no at-sign after local-part (%s)", buffer);
666 static int local_part (buffer)
667 register char *buffer;
672 switch (my_lex (buffer)) {
675 mbox = add (buffer, mbox);
679 (void) sprintf (err, "no mailbox in local-part (%s)", buffer);
683 switch (my_lex (buffer)) {
685 mbox = add (buffer, mbox);
696 static int domain (buffer)
697 register char *buffer;
700 switch (my_lex (buffer)) {
703 host = add (buffer, host);
708 "no sub-domain in domain-part of address (%s)",
713 switch (my_lex (buffer)) {
715 host = add (buffer, host);
718 case LX_AT: /* sigh (0) */
719 mbox = add (host, add ("%", mbox));
732 static int route (buffer)
733 register char *buffer;
738 switch (my_lex (buffer)) {
741 path = add (buffer, path);
746 "no sub-domain in domain-part of address (%s)",
750 switch (my_lex (buffer)) {
752 path = add (buffer, path);
754 switch (my_lex (buffer)) {
759 path = add (buffer, path);
764 "no at-sign found for next domain in route (%s)",
771 case LX_AT: /* XXX */
773 path = add (buffer, path);
777 path = add (buffer, path);
782 "no colon found to terminate route (%s)", buffer);
790 static int my_lex (buffer)
791 register char *buffer;
800 return (last_lex = LX_END);
808 return (last_lex = LX_END);
812 for (*bp++ = c, i = 0;;)
816 return (last_lex = LX_ERR);
819 if ((c = *cp++) == 0) {
821 return (last_lex = LX_ERR);
834 note = note ? add (buffer, add (" ", note))
836 return my_lex (buffer);
847 return (last_lex = LX_ERR);
850 if ((c = *cp++) == 0) {
852 return (last_lex = LX_ERR);
860 return (last_lex = LX_QSTR);
868 return (last_lex = LX_ERR);
871 if ((c = *cp++) == 0) {
873 return (last_lex = LX_ERR);
881 return (last_lex = LX_DLIT);
888 for (i = 0; special[i].lx_chr != 0; i++)
889 if (c == special[i].lx_chr)
890 return (last_lex = special[i].lx_val);
893 return (last_lex = LX_ERR);
896 if ((c = *cp++) == 0)
898 for (i = 0; special[i].lx_chr != 0; i++)
899 if (c == special[i].lx_chr)
901 if (iscntrl (c) || isspace (c))
911 last_lex = !gotat || cp == NULL || index (cp, '<') != NULL
918 char *legal_person (p)
923 static char buffer[BUFSIZ];
927 for (cp = p; *cp; cp++)
928 for (i = 0; special[i].lx_chr; i++)
929 if (*cp == special[i].lx_chr) {
930 (void) sprintf (buffer, "\"%s\"", p);
948 static char *pp = NULL;
951 if ((pp = malloc ((unsigned) (len = BUFSIZ))) == NULL)
954 for (ep = (cp = pp) + len - 2;;) {
955 switch (i = getc (in)) {
973 if (cp == pp) /* end of headers, gobble it */
975 switch (i = getc (in)) {
976 default: /* end of line */
977 case '\n': /* end of headers, save for next call */
978 (void) ungetc (i, in);
981 case ' ': /* continue headers */
985 } /* fall into default case */
992 if ((dp = realloc (pp, (unsigned) (len += BUFSIZ))) == NULL) {
998 cp += dp - pp, ep = (pp = cp) + len - 2;