2 ** mf.c -- mail filter subroutines
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
17 static char *getcpy(char *);
18 static int isat(char *);
19 static int parse_address(void);
20 static int phrase(char *);
21 static int route_addr(char *);
22 static int local_part(char *);
23 static int domain(char *);
24 static int route(char *);
25 static int my_lex(char *);
35 ** causes compiles to blow up because the symbol _cleanup
36 ** is undefined where did this ever come from?
43 p = mh_xmalloc((size_t) (strlen(s) + 2));
49 #define CHKADR 0 /* undertermined address style */
50 #define UNIXDR 1 /* UNIX-style address */
51 #define ARPADR 2 /* ARPAnet-style address */
57 return (strncmp(p, " AT ", 4)!=0 && strncmp(p, " At ", 4)!=0 &&
58 strncmp(p, " aT ", 4)!=0 && strncmp(p, " at ", 4)!=0 ?
65 ** getadrx() implements a partial 822-style address parser. The parser
66 ** is neither complete nor correct. It does however recognize nearly all
67 ** of the 822 address syntax. In addition it handles the majority of the
68 ** 733 syntax as well. Most problems arise from trying to accomodate both.
70 ** In terms of 822, the route-specification in
72 ** "<" [route] local-part "@" domain ">"
74 ** is parsed and returned unchanged. Multiple at-signs are compressed
75 ** via source-routing. Recursive groups are not allowed as per the
78 ** In terms of 733, " at " is recognized as equivalent to "@".
80 ** In terms of both the parser will not complain about missing hosts.
84 ** We should not allow addresses like
86 ** Marshall T. Rose <MRose@UCI>
88 ** but should insist on
90 ** "Marshall T. Rose" <MRose@UCI>
92 ** Unfortunately, a lot of mailers stupidly let people get away with this.
96 ** We should not allow addresses like
100 ** but should insist on
104 ** Unfortunately, a lot of mailers stupidly let people's UAs get away with
109 ** We should not allow addresses like
111 ** @UCI:MRose@UCI-750a
113 ** but should insist on
115 ** Marshall Rose <@UCI:MRose@UCI-750a>
117 ** Unfortunately, a lot of mailers stupidly do this.
141 static struct specials special[] = {
158 static int glevel = 0;
159 static int ingrp = 0;
160 static int last_lex = LX_END;
162 static char *dp = NULL;
163 static unsigned char *cp = NULL;
164 static unsigned char *ap = NULL;
165 static char *pers = NULL;
166 static char *mbox = NULL;
167 static char *host = NULL;
168 static char *path = NULL;
169 static char *grp = NULL;
170 static char *note = NULL;
171 static char err[BUFSIZ];
172 static char adr[BUFSIZ];
174 static struct adrx adrxs2;
181 register struct adrx *adrxp = &adrxs2;
195 pers = mbox = host = path = grp = note = NULL;
199 dp = cp = getcpy(addrs ? addrs : "");
201 } else if (cp == NULL) {
207 switch (parse_address()) {
219 default: /* catch trailing comments */
247 sprintf(adr, "%.*s", (int)(cp - ap), ap);
250 bp = adr + strlen(adr) - 1;
251 if (*bp == ',' || *bp == ';' || *bp == '\n')
260 adrxp->ingrp = ingrp;
262 adrxp->err = err[0] ? err : NULL;
275 switch (my_lex(buffer)) {
278 pers = getcpy(buffer);
283 strcpy(err, "extraneous semi-colon");
296 case LX_LBRK: /* sigh (2) */
299 case LX_AT: /* sigh (3) */
301 if (route_addr(buffer) == NOTOK)
303 return OK; /* why be choosy? */
306 sprintf(err, "illegal address construct (%s)", buffer);
310 switch (my_lex(buffer)) {
313 pers = add(buffer, add(" ", pers));
314 more_phrase: ; /* sigh (1) */
315 if (phrase(buffer) == NOTOK)
321 if (route_addr(buffer) == NOTOK)
323 if (last_lex == LX_RBRK)
325 sprintf(err, "missing right-bracket (%s)", buffer);
331 sprintf(err, "nested groups not allowed (%s)", pers);
334 grp = add(": ", pers);
340 switch (my_lex(buffer)) {
342 case LX_END: /* tsk, tsk */
351 return parse_address();
355 case LX_DOT: /* sigh (1) */
356 pers = add(".", pers);
360 sprintf(err, "no mailbox in address, only a phrase (%s%s)", pers, buffer);
371 mbox = add(buffer, pers);
373 if (route_addr(buffer) == NOTOK)
381 if (domain(buffer) == NOTOK)
387 strcpy(err, "extraneous semi-colon");
395 sprintf(err, "junk after local@domain (%s)", buffer);
399 case LX_SEMI: /* no host */
403 if (last_lex == LX_SEMI && glevel-- <= 0) {
404 strcpy(err, "extraneous semi-colon");
412 sprintf(err, "missing mailbox (%s)", buffer);
422 switch (my_lex(buffer)) {
425 pers = add(buffer, add(" ", pers));
435 route_addr(char *buffer)
437 register char *pp = cp;
439 if (my_lex(buffer) == LX_AT) {
440 if (route(buffer) == NOTOK)
446 if (local_part(buffer) == NOTOK)
451 return domain(buffer);
453 case LX_SEMI: /* if in group */
454 case LX_RBRK: /* no host */
460 sprintf(err, "no at-sign after local-part (%s)", buffer);
467 local_part(char *buffer)
472 switch (my_lex(buffer)) {
475 mbox = add(buffer, mbox);
479 sprintf(err, "no mailbox in local-part (%s)", buffer);
483 switch (my_lex(buffer)) {
485 mbox = add(buffer, mbox);
499 switch (my_lex(buffer)) {
502 host = add(buffer, host);
506 sprintf(err, "no sub-domain in domain-part of address (%s)", buffer);
510 switch (my_lex(buffer)) {
512 host = add(buffer, host);
515 case LX_AT: /* sigh (0) */
516 mbox = add(host, add("%", mbox));
534 switch (my_lex(buffer)) {
537 path = add(buffer, path);
541 sprintf(err, "no sub-domain in domain-part of address (%s)", buffer);
544 switch (my_lex(buffer)) {
546 path = add(buffer, path);
548 switch (my_lex(buffer)) {
553 path = add(buffer, path);
557 sprintf(err, "no at-sign found for next domain in route (%s)",
564 case LX_AT: /* XXX */
566 path = add(buffer, path);
570 path = add(buffer, path);
574 sprintf(err, "no colon found to terminate route (%s)", buffer);
584 /* buffer should be at least BUFSIZ bytes long */
586 register unsigned char c;
590 ** Add C to the buffer bp. After use of this macro *bp is guaranteed
591 ** to be within the buffer.
596 if ((bp - buffer) == (BUFSIZ-1)) \
597 goto my_lex_buffull; \
603 return (last_lex = LX_END);
611 return (last_lex = LX_END);
620 return (last_lex = LX_ERR);
623 if ((c = *cp++) == 0) {
625 return (last_lex = LX_ERR);
638 note = note ? add(buffer, add(" ", note)) : getcpy(buffer);
639 return my_lex(buffer);
650 return (last_lex = LX_ERR);
653 if ((c = *cp++) == 0) {
655 return (last_lex = LX_ERR);
663 return (last_lex = LX_QSTR);
673 return (last_lex = LX_ERR);
676 if ((c = *cp++) == 0) {
678 return (last_lex = LX_ERR);
686 return (last_lex = LX_DLIT);
692 for (i = 0; special[i].lx_chr != 0; i++)
693 if (c == special[i].lx_chr)
694 return (last_lex = special[i].lx_val);
697 return (last_lex = LX_ERR);
700 if ((c = *cp++) == 0)
702 for (i = 0; special[i].lx_chr != 0; i++)
703 if (c == special[i].lx_chr)
705 if (iscntrl(c) || isspace(c))
715 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
720 /* Out of buffer space. *bp is the last byte in the buffer */
722 return (last_lex = LX_ERR);
727 legal_person(char *p)
731 static char buffer[BUFSIZ];
735 for (cp = p; *cp; cp++)
736 for (i = 0; special[i].lx_chr; i++)
737 if (*cp == special[i].lx_chr) {
738 sprintf(buffer, "\"%s\"", p);