Removed unneeded sbr code.
[mmh] / sbr / mf.c
1 /*
2  * mf.c -- mail filter subroutines
3  *
4  * This code is Copyright (c) 2002, by the authors of nmh.  See the
5  * COPYRIGHT file in the root directory of the nmh distribution for
6  * complete copyright information.
7  */
8
9 #include <h/mf.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <h/utils.h>
13
14 /*
15  * static prototypes
16  */
17 static char *getcpy (char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
26
27
28 static char *
29 getcpy (char *s)
30 {
31         register char *p;
32
33         if (!s) {
34                 /*
35                  * causes compiles to blow up because the symbol _cleanup
36                  * is undefined where did this ever come from?
37                  */
38                 /* _cleanup(); */
39                 abort();
40                 for(;;)
41                         pause();
42         }
43         p = mh_xmalloc ((size_t) (strlen (s) + 2));
44         strcpy (p, s);
45         return p;
46 }
47
48
49 #define CHKADR 0  /* undertermined address style */
50 #define UNIXDR 1  /* UNIX-style address */
51 #define ARPADR 2  /* ARPAnet-style address */
52
53
54 static int
55 isat (char *p)
56 {
57         return (strncmp (p, " AT ", 4)
58                 && strncmp (p, " At ", 4)
59                 && strncmp (p, " aT ", 4)
60                 && strncmp (p, " at ", 4) ? FALSE : TRUE);
61 }
62
63
64 /*
65  *
66  * getadrx() implements a partial 822-style address parser.  The parser
67  * is neither complete nor correct.  It does however recognize nearly all
68  * of the 822 address syntax.  In addition it handles the majority of the
69  * 733 syntax as well.  Most problems arise from trying to accomodate both.
70  *
71  * In terms of 822, the route-specification in
72  *
73  *     "<" [route] local-part "@" domain ">"
74  *
75  * is parsed and returned unchanged.  Multiple at-signs are compressed
76  * via source-routing.  Recursive groups are not allowed as per the
77  * standard.
78  *
79  * In terms of 733, " at " is recognized as equivalent to "@".
80  *
81  * In terms of both the parser will not complain about missing hosts.
82  *
83  * -----
84  *
85  * We should not allow addresses like
86  *
87  *     Marshall T. Rose <MRose@UCI>
88  *
89  * but should insist on
90  *
91  *     "Marshall T. Rose" <MRose@UCI>
92  *
93  * Unfortunately, a lot of mailers stupidly let people get away with this.
94  *
95  * -----
96  *
97  * We should not allow addresses like
98  *
99  *     <MRose@UCI>
100  *
101  * but should insist on
102  *
103  *     MRose@UCI
104  *
105  * Unfortunately, a lot of mailers stupidly let people's UAs get away with
106  * this.
107  *
108  * -----
109  *
110  * We should not allow addresses like
111  *
112  *     @UCI:MRose@UCI-750a
113  *
114  * but should insist on
115  *
116  *     Marshall Rose <@UCI:MRose@UCI-750a>
117  *
118  * Unfortunately, a lot of mailers stupidly do this.
119  *
120  */
121
122 #define QUOTE  '\\'
123
124 #define LX_END   0
125 #define LX_ERR   1
126 #define LX_ATOM  2
127 #define LX_QSTR  3
128 #define LX_DLIT  4
129 #define LX_SEMI  5
130 #define LX_COMA  6
131 #define LX_LBRK  7
132 #define LX_RBRK  8
133 #define LX_COLN  9
134 #define LX_DOT  10
135 #define LX_AT   11
136
137 struct specials {
138         char lx_chr;
139         int  lx_val;
140 };
141
142 static struct specials special[] = {
143         { ';',   LX_SEMI },
144         { ',',   LX_COMA },
145         { '<',   LX_LBRK },
146         { '>',   LX_RBRK },
147         { ':',   LX_COLN },
148         { '.',   LX_DOT },
149         { '@',   LX_AT },
150         { '(',   LX_ERR },
151         { ')',   LX_ERR },
152         { QUOTE, LX_ERR },
153         { '"',   LX_ERR },
154         { '[',   LX_ERR },
155         { ']',   LX_ERR },
156         { 0,     0 }
157 };
158
159 static int glevel = 0;
160 static int ingrp = 0;
161 static int last_lex = LX_END;
162
163 static char *dp = NULL;
164 static unsigned char *cp = NULL;
165 static unsigned char *ap = NULL;
166 static char *pers = NULL;
167 static char *mbox = NULL;
168 static char *host = NULL;
169 static char *path = NULL;
170 static char *grp = NULL;
171 static char *note = NULL;
172 static char err[BUFSIZ];
173 static char adr[BUFSIZ];
174
175 static struct adrx  adrxs2;
176
177
178 struct adrx *
179 getadrx (char *addrs)
180 {
181         register char *bp;
182         register struct adrx *adrxp = &adrxs2;
183
184         if (pers)
185                 free (pers);
186         if (mbox)
187                 free (mbox);
188         if (host)
189                 free (host);
190         if (path)
191                 free (path);
192         if (grp)
193                 free (grp);
194         if (note)
195                 free (note);
196         pers = mbox = host = path = grp = note = NULL;
197         err[0] = 0;
198
199         if (dp == NULL) {
200                 dp = cp = getcpy (addrs ? addrs : "");
201                 glevel = 0;
202         } else if (cp == NULL) {
203                 free (dp);
204                 dp = NULL;
205                 return NULL;
206         }
207
208         switch (parse_address ()) {
209                 case DONE:
210                         free (dp);
211                         dp = cp = NULL;
212                         return NULL;
213
214                 case OK:
215                         switch (last_lex) {
216                                 case LX_COMA:
217                                 case LX_END:
218                                         break;
219
220                                 default:  /* catch trailing comments */
221                                         bp = cp;
222                                         my_lex (adr);
223                                         cp = bp;
224                                         break;
225                         }
226                         break;
227
228                 default:
229                         break;
230                 }
231
232         if (err[0])
233                 for (;;) {
234                         switch (last_lex) {
235                                 case LX_COMA:
236                                 case LX_END:
237                                         break;
238
239                                 default:
240                                         my_lex (adr);
241                                         continue;
242                         }
243                         break;
244                 }
245         while (isspace (*ap))
246                 ap++;
247         if (cp)
248                 sprintf (adr, "%.*s", (int)(cp - ap), ap);
249         else
250                 strcpy (adr, ap);
251         bp = adr + strlen (adr) - 1;
252         if (*bp == ',' || *bp == ';' || *bp == '\n')
253                 *bp = 0;
254
255         adrxp->text = adr;
256         adrxp->pers = pers;
257         adrxp->mbox = mbox;
258         adrxp->host = host;
259         adrxp->path = path;
260         adrxp->grp = grp;
261         adrxp->ingrp = ingrp;
262         adrxp->note = note;
263         adrxp->err = err[0] ? err : NULL;
264
265         return adrxp;
266 }
267
268
269 static int
270 parse_address (void)
271 {
272         char buffer[BUFSIZ];
273
274 again: ;
275         ap = cp;
276         switch (my_lex (buffer)) {
277                 case LX_ATOM:
278                 case LX_QSTR:
279                         pers = getcpy (buffer);
280                         break;
281
282                 case LX_SEMI:
283                         if (glevel-- <= 0) {
284                                 strcpy (err, "extraneous semi-colon");
285                                 return NOTOK;
286                         }
287                 case LX_COMA:
288                         if (note) {
289                                 free (note);
290                                 note = NULL;
291                         }
292                         goto again;
293
294                 case LX_END:
295                         return DONE;
296
297                 case LX_LBRK:  /* sigh (2) */
298                         goto get_addr;
299
300                 case LX_AT:  /* sigh (3) */
301                         cp = ap;
302                         if (route_addr (buffer) == NOTOK)
303                                 return NOTOK;
304                         return OK;  /* why be choosy? */
305
306                 default:
307                         sprintf (err, "illegal address construct (%s)", buffer);
308                         return NOTOK;
309         }
310
311         switch (my_lex (buffer)) {
312                 case LX_ATOM:
313                 case LX_QSTR:
314                         pers = add (buffer, add (" ", pers));
315         more_phrase: ;  /* sigh (1) */
316                         if (phrase (buffer) == NOTOK)
317                                 return NOTOK;
318
319                         switch (last_lex) {
320                                 case LX_LBRK:
321                         get_addr: ;
322                                         if (route_addr (buffer) == NOTOK)
323                                                 return NOTOK;
324                                         if (last_lex == LX_RBRK)
325                                                 return OK;
326                                         sprintf (err, "missing right-bracket (%s)", buffer);
327                                         return NOTOK;
328
329                                 case LX_COLN:
330                         get_group: ;
331                                         if (glevel++ > 0) {
332                                                 sprintf (err, "nested groups not allowed (%s)", pers);
333                                                 return NOTOK;
334                                         }
335                                         grp = add (": ", pers);
336                                         pers = NULL;
337                                         {
338                                                 char   *pp = cp;
339
340                                                 for (;;)
341                                                         switch (my_lex (buffer)) {
342                                                                 case LX_SEMI:
343                                                                 case LX_END: /* tsk, tsk */
344                                                                         glevel--;
345                                                                         return OK;
346
347                                                                 case LX_COMA:
348                                                                         continue;
349
350                                                                 default:
351                                                                         cp = pp;
352                                                                         return parse_address ();
353                                                         }
354                                         }
355
356                                 case LX_DOT:  /* sigh (1) */
357                                         pers = add (".", pers);
358                                         goto more_phrase;
359
360                                 default:
361                                         sprintf (err, "no mailbox in address, only a phrase (%s%s)", pers, buffer);
362                                         return NOTOK;
363                         }
364
365                 case LX_LBRK:
366                         goto get_addr;
367
368                 case LX_COLN:
369                         goto get_group;
370
371                 case LX_DOT:
372                         mbox = add (buffer, pers);
373                         pers = NULL;
374                         if (route_addr (buffer) == NOTOK)
375                                 return NOTOK;
376                         goto check_end;
377
378                 case LX_AT:
379                         ingrp = glevel;
380                         mbox = pers;
381                         pers = NULL;
382                         if (domain (buffer) == NOTOK)
383                                 return NOTOK;
384         check_end: ;
385                         switch (last_lex) {
386                                 case LX_SEMI:
387                                         if (glevel-- <= 0) {
388                                                 strcpy (err, "extraneous semi-colon");
389                                                 return NOTOK;
390                                         }
391                                 case LX_COMA:
392                                 case LX_END:
393                                         return OK;
394
395                                 default:
396                                         sprintf (err, "junk after local@domain (%s)", buffer);
397                                         return NOTOK;
398                         }
399
400                 case LX_SEMI:  /* no host */
401                 case LX_COMA:
402                 case LX_END:
403                         ingrp = glevel;
404                         if (last_lex == LX_SEMI && glevel-- <= 0) {
405                                 strcpy (err, "extraneous semi-colon");
406                                 return NOTOK;
407                         }
408                         mbox = pers;
409                         pers = NULL;
410                         return OK;
411
412                 default:
413                         sprintf (err, "missing mailbox (%s)", buffer);
414                         return NOTOK;
415         }
416 }
417
418
419 static int
420 phrase (char *buffer)
421 {
422         for (;;)
423                 switch (my_lex (buffer)) {
424                         case LX_ATOM:
425                         case LX_QSTR:
426                                 pers = add (buffer, add (" ", pers));
427                                 continue;
428
429                         default:
430                                 return OK;
431                 }
432 }
433
434
435 static int
436 route_addr (char *buffer)
437 {
438         register char *pp = cp;
439
440         if (my_lex (buffer) == LX_AT) {
441                 if (route (buffer) == NOTOK)
442                         return NOTOK;
443         }
444         else
445                 cp = pp;
446
447         if (local_part (buffer) == NOTOK)
448                 return NOTOK;
449
450         switch (last_lex) {
451                 case LX_AT:
452                         return domain (buffer);
453
454                 case LX_SEMI:  /* if in group */
455                 case LX_RBRK:  /* no host */
456                 case LX_COMA:
457                 case LX_END:
458                         return OK;
459
460                 default:
461                         sprintf (err, "no at-sign after local-part (%s)", buffer);
462                         return NOTOK;
463         }
464 }
465
466
467 static int
468 local_part (char *buffer)
469 {
470         ingrp = glevel;
471
472         for (;;) {
473                 switch (my_lex (buffer)) {
474                         case LX_ATOM:
475                         case LX_QSTR:
476                                 mbox = add (buffer, mbox);
477                                 break;
478
479                         default:
480                                 sprintf (err, "no mailbox in local-part (%s)", buffer);
481                                 return NOTOK;
482                 }
483
484                 switch (my_lex (buffer)) {
485                         case LX_DOT:
486                                 mbox = add (buffer, mbox);
487                                 continue;
488
489                         default:
490                                 return OK;
491                 }
492         }
493 }
494
495
496 static int
497 domain (char *buffer)
498 {
499         for (;;) {
500                 switch (my_lex (buffer)) {
501                         case LX_ATOM:
502                         case LX_DLIT:
503                                 host = add (buffer, host);
504                                 break;
505
506                         default:
507                                 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
508                                 return NOTOK;
509                 }
510
511                 switch (my_lex (buffer)) {
512                         case LX_DOT:
513                                 host = add (buffer, host);
514                                 continue;
515
516                         case LX_AT:  /* sigh (0) */
517                                 mbox = add (host, add ("%", mbox));
518                                 free (host);
519                                 host = NULL;
520                                 continue;
521
522                         default:
523                                 return OK;
524                 }
525         }
526 }
527
528
529 static int
530 route (char *buffer)
531 {
532         path = getcpy ("@");
533
534         for (;;) {
535                 switch (my_lex (buffer)) {
536                         case LX_ATOM:
537                         case LX_DLIT:
538                                 path = add (buffer, path);
539                                 break;
540
541                         default:
542                                 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
543                                 return NOTOK;
544                 }
545                 switch (my_lex (buffer)) {
546                         case LX_COMA:
547                                 path = add (buffer, path);
548                                 for (;;) {
549                                         switch (my_lex (buffer)) {
550                                                 case LX_COMA:
551                                                         continue;
552
553                                                 case LX_AT:
554                                                         path = add (buffer, path);
555                                                         break;
556
557                                                 default:
558                                                         sprintf (err, "no at-sign found for next domain in route (%s)",
559                                                                          buffer);
560                                         }
561                                         break;
562                                 }
563                                 continue;
564
565                         case LX_AT:  /* XXX */
566                         case LX_DOT:
567                                 path = add (buffer, path);
568                                 continue;
569
570                         case LX_COLN:
571                                 path = add (buffer, path);
572                                 return OK;
573
574                         default:
575                                 sprintf (err, "no colon found to terminate route (%s)", buffer);
576                                 return NOTOK;
577                 }
578         }
579 }
580
581
582 static int
583 my_lex (char *buffer)
584 {
585         /* buffer should be at least BUFSIZ bytes long */
586         int i, gotat = 0;
587         register unsigned char c;
588         register char *bp;
589
590         /*
591          * Add C to the buffer bp. After use of this macro *bp is guaranteed
592          * to be within the buffer.
593          */
594 #define ADDCHR(C)  \
595         do { \
596                 *bp++ = (C); \
597                 if ((bp - buffer) == (BUFSIZ-1)) \
598                         goto my_lex_buffull; \
599         } while (0)
600
601         bp = buffer;
602         *bp = 0;
603         if (!cp)
604                 return (last_lex = LX_END);
605
606         gotat = isat (cp);
607         c = *cp++;
608         while (isspace (c))
609                 c = *cp++;
610         if (c == 0) {
611                 cp = NULL;
612                 return (last_lex = LX_END);
613         }
614
615         if (c == '(') {
616                 ADDCHR(c);
617                 for (i = 0;;)
618                         switch (c = *cp++) {
619                                 case 0:
620                                         cp = NULL;
621                                         return (last_lex = LX_ERR);
622                                 case QUOTE:
623                                         ADDCHR(c);
624                                         if ((c = *cp++) == 0) {
625                                                 cp = NULL;
626                                                 return (last_lex = LX_ERR);
627                                         }
628                                         ADDCHR(c);
629                                         continue;
630                                 case '(':
631                                         i++;
632                                 default:
633                                         ADDCHR(c);
634                                         continue;
635                                 case ')':
636                                         ADDCHR(c);
637                                         if (--i < 0) {
638                                                 *bp = 0;
639                                                 note = note ? add (buffer, add (" ", note))
640                                                         : getcpy (buffer);
641                                                 return my_lex (buffer);
642                                         }
643                         }
644         }
645
646         if (c == '"') {
647                 ADDCHR(c);
648                 for (;;)
649                         switch (c = *cp++) {
650                                 case 0:
651                                         cp = NULL;
652                                         return (last_lex = LX_ERR);
653                                 case QUOTE:
654                                         ADDCHR(c);
655                                         if ((c = *cp++) == 0) {
656                                                 cp = NULL;
657                                                 return (last_lex = LX_ERR);
658                                         }
659                                 default:
660                                         ADDCHR(c);
661                                         continue;
662                                 case '"':
663                                         ADDCHR(c);
664                                         *bp = 0;
665                                         return (last_lex = LX_QSTR);
666                         }
667         }
668
669         if (c == '[') {
670                 ADDCHR(c);
671                 for (;;)
672                         switch (c = *cp++) {
673                                 case 0:
674                                         cp = NULL;
675                                         return (last_lex = LX_ERR);
676                                 case QUOTE:
677                                         ADDCHR(c);
678                                         if ((c = *cp++) == 0) {
679                                                 cp = NULL;
680                                                 return (last_lex = LX_ERR);
681                                         }
682                                 default:
683                                         ADDCHR(c);
684                                         continue;
685                                 case ']':
686                                         ADDCHR(c);
687                                         *bp = 0;
688                                         return (last_lex = LX_DLIT);
689                         }
690         }
691
692         ADDCHR(c);
693         *bp = 0;
694         for (i = 0; special[i].lx_chr != 0; i++)
695                 if (c == special[i].lx_chr)
696                         return (last_lex = special[i].lx_val);
697
698         if (iscntrl (c))
699                 return (last_lex = LX_ERR);
700
701         for (;;) {
702                 if ((c = *cp++) == 0)
703                         break;
704                 for (i = 0; special[i].lx_chr != 0; i++)
705                         if (c == special[i].lx_chr)
706                                 goto got_atom;
707                 if (iscntrl (c) || isspace (c))
708                         break;
709                 ADDCHR(c);
710         }
711 got_atom: ;
712         if (c == 0)
713                 cp = NULL;
714         else
715                 cp--;
716         *bp = 0;
717         last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
718                 ? LX_ATOM : LX_AT;
719         return last_lex;
720
721  my_lex_buffull:
722         /* Out of buffer space. *bp is the last byte in the buffer */
723         *bp = 0;
724         return (last_lex = LX_ERR);
725 }
726
727
728 char *
729 legal_person (char *p)
730 {
731         int i;
732         register char *cp;
733         static char buffer[BUFSIZ];
734
735         if (*p == '"')
736                 return p;
737         for (cp = p; *cp; cp++)
738                 for (i = 0; special[i].lx_chr; i++)
739                         if (*cp == special[i].lx_chr) {
740                                 sprintf (buffer, "\"%s\"", p);
741                                 return buffer;
742                         }
743
744         return p;
745 }