8f7e3579dd2154bc884328001969e9fd4bdbf2f7
[mmh] / sbr / mf.c
1 /*
2 ** mf.c -- mail filter subroutines
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mf.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <h/utils.h>
13
14 /*
15 ** static prototypes
16 */
17 static char *getcpy (char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
26
27
28 static char *
29 getcpy (char *s)
30 {
31         register char *p;
32
33         if (!s) {
34                 /*
35                 ** causes compiles to blow up because the symbol _cleanup
36                 ** is undefined where did this ever come from?
37                 */
38                 /* _cleanup(); */
39                 abort();
40                 for(;;)
41                         pause();
42         }
43         p = mh_xmalloc ((size_t) (strlen (s) + 2));
44         strcpy (p, s);
45         return p;
46 }
47
48
49 #define CHKADR 0  /* undertermined address style */
50 #define UNIXDR 1  /* UNIX-style address */
51 #define ARPADR 2  /* ARPAnet-style address */
52
53
54 static int
55 isat (char *p)
56 {
57         return (strncmp (p, " AT ", 4)
58                 && strncmp (p, " At ", 4)
59                 && strncmp (p, " aT ", 4)
60                 && strncmp (p, " at ", 4) ? FALSE : TRUE);
61 }
62
63
64 /*
65 **
66 ** getadrx() implements a partial 822-style address parser.  The parser
67 ** is neither complete nor correct.  It does however recognize nearly all
68 ** of the 822 address syntax.  In addition it handles the majority of the
69 ** 733 syntax as well.  Most problems arise from trying to accomodate both.
70 **
71 ** In terms of 822, the route-specification in
72 **
73 **     "<" [route] local-part "@" domain ">"
74 **
75 ** is parsed and returned unchanged.  Multiple at-signs are compressed
76 ** via source-routing.  Recursive groups are not allowed as per the
77 ** standard.
78 **
79 ** In terms of 733, " at " is recognized as equivalent to "@".
80 **
81 ** In terms of both the parser will not complain about missing hosts.
82 **
83 ** -----
84 **
85 ** We should not allow addresses like
86 **
87 **     Marshall T. Rose <MRose@UCI>
88 **
89 ** but should insist on
90 **
91 **     "Marshall T. Rose" <MRose@UCI>
92 **
93 ** Unfortunately, a lot of mailers stupidly let people get away with this.
94 **
95 ** -----
96 **
97 ** We should not allow addresses like
98 **
99 **     <MRose@UCI>
100 **
101 ** but should insist on
102 **
103 **     MRose@UCI
104 **
105 ** Unfortunately, a lot of mailers stupidly let people's UAs get away with
106 ** this.
107 **
108 ** -----
109 **
110 ** We should not allow addresses like
111 **
112 **     @UCI:MRose@UCI-750a
113 **
114 ** but should insist on
115 **
116 **     Marshall Rose <@UCI:MRose@UCI-750a>
117 **
118 ** Unfortunately, a lot of mailers stupidly do this.
119 **
120 */
121
122 #define QUOTE  '\\'
123
124 #define LX_END   0
125 #define LX_ERR   1
126 #define LX_ATOM  2
127 #define LX_QSTR  3
128 #define LX_DLIT  4
129 #define LX_SEMI  5
130 #define LX_COMA  6
131 #define LX_LBRK  7
132 #define LX_RBRK  8
133 #define LX_COLN  9
134 #define LX_DOT  10
135 #define LX_AT   11
136
137 struct specials {
138         char lx_chr;
139         int  lx_val;
140 };
141
142 static struct specials special[] = {
143         { ';',   LX_SEMI },
144         { ',',   LX_COMA },
145         { '<',   LX_LBRK },
146         { '>',   LX_RBRK },
147         { ':',   LX_COLN },
148         { '.',   LX_DOT },
149         { '@',   LX_AT },
150         { '(',   LX_ERR },
151         { ')',   LX_ERR },
152         { QUOTE, LX_ERR },
153         { '"',   LX_ERR },
154         { '[',   LX_ERR },
155         { ']',   LX_ERR },
156         { 0,     0 }
157 };
158
159 static int glevel = 0;
160 static int ingrp = 0;
161 static int last_lex = LX_END;
162
163 static char *dp = NULL;
164 static unsigned char *cp = NULL;
165 static unsigned char *ap = NULL;
166 static char *pers = NULL;
167 static char *mbox = NULL;
168 static char *host = NULL;
169 static char *path = NULL;
170 static char *grp = NULL;
171 static char *note = NULL;
172 static char err[BUFSIZ];
173 static char adr[BUFSIZ];
174
175 static struct adrx  adrxs2;
176
177
178 struct adrx *
179 getadrx (char *addrs)
180 {
181         register char *bp;
182         register struct adrx *adrxp = &adrxs2;
183
184         if (pers)
185                 free (pers);
186         if (mbox)
187                 free (mbox);
188         if (host)
189                 free (host);
190         if (path)
191                 free (path);
192         if (grp)
193                 free (grp);
194         if (note)
195                 free (note);
196         pers = mbox = host = path = grp = note = NULL;
197         err[0] = 0;
198
199         if (dp == NULL) {
200                 dp = cp = getcpy (addrs ? addrs : "");
201                 glevel = 0;
202         } else if (cp == NULL) {
203                 free (dp);
204                 dp = NULL;
205                 return NULL;
206         }
207
208         switch (parse_address ()) {
209                 case DONE:
210                         free (dp);
211                         dp = cp = NULL;
212                         return NULL;
213
214                 case OK:
215                         switch (last_lex) {
216                                 case LX_COMA:
217                                 case LX_END:
218                                         break;
219
220                                 default:  /* catch trailing comments */
221                                         bp = cp;
222                                         my_lex (adr);
223                                         cp = bp;
224                                         break;
225                         }
226                         break;
227
228                 default:
229                         break;
230                 }
231
232         if (err[0])
233                 for (;;) {
234                         switch (last_lex) {
235                                 case LX_COMA:
236                                 case LX_END:
237                                         break;
238
239                                 default:
240                                         my_lex (adr);
241                                         continue;
242                         }
243                         break;
244                 }
245         while (isspace (*ap))
246                 ap++;
247         if (cp)
248                 sprintf (adr, "%.*s", (int)(cp - ap), ap);
249         else
250                 strcpy (adr, ap);
251         bp = adr + strlen (adr) - 1;
252         if (*bp == ',' || *bp == ';' || *bp == '\n')
253                 *bp = 0;
254
255         adrxp->text = adr;
256         adrxp->pers = pers;
257         adrxp->mbox = mbox;
258         adrxp->host = host;
259         adrxp->path = path;
260         adrxp->grp = grp;
261         adrxp->ingrp = ingrp;
262         adrxp->note = note;
263         adrxp->err = err[0] ? err : NULL;
264
265         return adrxp;
266 }
267
268
269 static int
270 parse_address (void)
271 {
272         char buffer[BUFSIZ];
273
274 again: ;
275         ap = cp;
276         switch (my_lex (buffer)) {
277                 case LX_ATOM:
278                 case LX_QSTR:
279                         pers = getcpy (buffer);
280                         break;
281
282                 case LX_SEMI:
283                         if (glevel-- <= 0) {
284                                 strcpy (err, "extraneous semi-colon");
285                                 return NOTOK;
286                         }
287                 case LX_COMA:
288                         if (note) {
289                                 free (note);
290                                 note = NULL;
291                         }
292                         goto again;
293
294                 case LX_END:
295                         return DONE;
296
297                 case LX_LBRK:  /* sigh (2) */
298                         goto get_addr;
299
300                 case LX_AT:  /* sigh (3) */
301                         cp = ap;
302                         if (route_addr (buffer) == NOTOK)
303                                 return NOTOK;
304                         return OK;  /* why be choosy? */
305
306                 default:
307                         sprintf (err, "illegal address construct (%s)", buffer);
308                         return NOTOK;
309         }
310
311         switch (my_lex (buffer)) {
312                 case LX_ATOM:
313                 case LX_QSTR:
314                         pers = add (buffer, add (" ", pers));
315         more_phrase: ;  /* sigh (1) */
316                         if (phrase (buffer) == NOTOK)
317                                 return NOTOK;
318
319                         switch (last_lex) {
320                                 case LX_LBRK:
321                         get_addr: ;
322                                         if (route_addr (buffer) == NOTOK)
323                                                 return NOTOK;
324                                         if (last_lex == LX_RBRK)
325                                                 return OK;
326                                         sprintf (err, "missing right-bracket (%s)", buffer);
327                                         return NOTOK;
328
329                                 case LX_COLN:
330                         get_group: ;
331                                         if (glevel++ > 0) {
332                                                 sprintf (err, "nested groups not allowed (%s)", pers);
333                                                 return NOTOK;
334                                         }
335                                         grp = add (": ", pers);
336                                         pers = NULL;
337                                         {
338                                                 char   *pp = cp;
339
340                                                 for (;;)
341                                                         switch (my_lex (buffer)) {
342                                                                 case LX_SEMI:
343                                                                 case LX_END: /* tsk, tsk */
344                                                                         glevel--;
345                                                                         return OK;
346
347                                                                 case LX_COMA:
348                                                                         continue;
349
350                                                                 default:
351                                                                         cp = pp;
352                                                                         return parse_address ();
353                                                         }
354                                         }
355
356                                 case LX_DOT:  /* sigh (1) */
357                                         pers = add (".", pers);
358                                         goto more_phrase;
359
360                                 default:
361                                         sprintf (err, "no mailbox in address, only a phrase (%s%s)", pers, buffer);
362                                         return NOTOK;
363                         }
364
365                 case LX_LBRK:
366                         goto get_addr;
367
368                 case LX_COLN:
369                         goto get_group;
370
371                 case LX_DOT:
372                         mbox = add (buffer, pers);
373                         pers = NULL;
374                         if (route_addr (buffer) == NOTOK)
375                                 return NOTOK;
376                         goto check_end;
377
378                 case LX_AT:
379                         ingrp = glevel;
380                         mbox = pers;
381                         pers = NULL;
382                         if (domain (buffer) == NOTOK)
383                                 return NOTOK;
384         check_end: ;
385                         switch (last_lex) {
386                                 case LX_SEMI:
387                                         if (glevel-- <= 0) {
388                                                 strcpy (err, "extraneous semi-colon");
389                                                 return NOTOK;
390                                         }
391                                 case LX_COMA:
392                                 case LX_END:
393                                         return OK;
394
395                                 default:
396                                         sprintf (err, "junk after local@domain (%s)", buffer);
397                                         return NOTOK;
398                         }
399
400                 case LX_SEMI:  /* no host */
401                 case LX_COMA:
402                 case LX_END:
403                         ingrp = glevel;
404                         if (last_lex == LX_SEMI && glevel-- <= 0) {
405                                 strcpy (err, "extraneous semi-colon");
406                                 return NOTOK;
407                         }
408                         mbox = pers;
409                         pers = NULL;
410                         return OK;
411
412                 default:
413                         sprintf (err, "missing mailbox (%s)", buffer);
414                         return NOTOK;
415         }
416 }
417
418
419 static int
420 phrase (char *buffer)
421 {
422         for (;;)
423                 switch (my_lex (buffer)) {
424                         case LX_ATOM:
425                         case LX_QSTR:
426                                 pers = add (buffer, add (" ", pers));
427                                 continue;
428
429                         default:
430                                 return OK;
431                 }
432 }
433
434
435 static int
436 route_addr (char *buffer)
437 {
438         register char *pp = cp;
439
440         if (my_lex (buffer) == LX_AT) {
441                 if (route (buffer) == NOTOK)
442                         return NOTOK;
443         }
444         else
445                 cp = pp;
446
447         if (local_part (buffer) == NOTOK)
448                 return NOTOK;
449
450         switch (last_lex) {
451                 case LX_AT:
452                         return domain (buffer);
453
454                 case LX_SEMI:  /* if in group */
455                 case LX_RBRK:  /* no host */
456                 case LX_COMA:
457                 case LX_END:
458                         return OK;
459
460                 default:
461                         sprintf (err, "no at-sign after local-part (%s)", buffer);
462                         return NOTOK;
463         }
464 }
465
466
467 static int
468 local_part (char *buffer)
469 {
470         ingrp = glevel;
471
472         for (;;) {
473                 switch (my_lex (buffer)) {
474                         case LX_ATOM:
475                         case LX_QSTR:
476                                 mbox = add (buffer, mbox);
477                                 break;
478
479                         default:
480                                 sprintf (err, "no mailbox in local-part (%s)",
481                                                 buffer);
482                                 return NOTOK;
483                 }
484
485                 switch (my_lex (buffer)) {
486                         case LX_DOT:
487                                 mbox = add (buffer, mbox);
488                                 continue;
489
490                         default:
491                                 return OK;
492                 }
493         }
494 }
495
496
497 static int
498 domain (char *buffer)
499 {
500         for (;;) {
501                 switch (my_lex (buffer)) {
502                         case LX_ATOM:
503                         case LX_DLIT:
504                                 host = add (buffer, host);
505                                 break;
506
507                         default:
508                                 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
509                                 return NOTOK;
510                 }
511
512                 switch (my_lex (buffer)) {
513                         case LX_DOT:
514                                 host = add (buffer, host);
515                                 continue;
516
517                         case LX_AT:  /* sigh (0) */
518                                 mbox = add (host, add ("%", mbox));
519                                 free (host);
520                                 host = NULL;
521                                 continue;
522
523                         default:
524                                 return OK;
525                 }
526         }
527 }
528
529
530 static int
531 route (char *buffer)
532 {
533         path = getcpy ("@");
534
535         for (;;) {
536                 switch (my_lex (buffer)) {
537                         case LX_ATOM:
538                         case LX_DLIT:
539                                 path = add (buffer, path);
540                                 break;
541
542                         default:
543                                 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
544                                 return NOTOK;
545                 }
546                 switch (my_lex (buffer)) {
547                         case LX_COMA:
548                                 path = add (buffer, path);
549                                 for (;;) {
550                                         switch (my_lex (buffer)) {
551                                                 case LX_COMA:
552                                                         continue;
553
554                                                 case LX_AT:
555                                                         path = add (buffer, path);
556                                                         break;
557
558                                                 default:
559                                                         sprintf (err, "no at-sign found for next domain in route (%s)",
560                                                                          buffer);
561                                         }
562                                         break;
563                                 }
564                                 continue;
565
566                         case LX_AT:  /* XXX */
567                         case LX_DOT:
568                                 path = add (buffer, path);
569                                 continue;
570
571                         case LX_COLN:
572                                 path = add (buffer, path);
573                                 return OK;
574
575                         default:
576                                 sprintf (err, "no colon found to terminate route (%s)", buffer);
577                                 return NOTOK;
578                 }
579         }
580 }
581
582
583 static int
584 my_lex (char *buffer)
585 {
586         /* buffer should be at least BUFSIZ bytes long */
587         int i, gotat = 0;
588         register unsigned char c;
589         register char *bp;
590
591         /*
592         ** Add C to the buffer bp. After use of this macro *bp is guaranteed
593         ** to be within the buffer.
594         */
595 #define ADDCHR(C)  \
596         do { \
597                 *bp++ = (C); \
598                 if ((bp - buffer) == (BUFSIZ-1)) \
599                         goto my_lex_buffull; \
600         } while (0)
601
602         bp = buffer;
603         *bp = 0;
604         if (!cp)
605                 return (last_lex = LX_END);
606
607         gotat = isat (cp);
608         c = *cp++;
609         while (isspace (c))
610                 c = *cp++;
611         if (c == 0) {
612                 cp = NULL;
613                 return (last_lex = LX_END);
614         }
615
616         if (c == '(') {
617                 ADDCHR(c);
618                 for (i = 0;;)
619                         switch (c = *cp++) {
620                                 case 0:
621                                         cp = NULL;
622                                         return (last_lex = LX_ERR);
623                                 case QUOTE:
624                                         ADDCHR(c);
625                                         if ((c = *cp++) == 0) {
626                                                 cp = NULL;
627                                                 return (last_lex = LX_ERR);
628                                         }
629                                         ADDCHR(c);
630                                         continue;
631                                 case '(':
632                                         i++;
633                                 default:
634                                         ADDCHR(c);
635                                         continue;
636                                 case ')':
637                                         ADDCHR(c);
638                                         if (--i < 0) {
639                                                 *bp = 0;
640                                                 note = note ? add (buffer, add (" ", note))
641                                                         : getcpy (buffer);
642                                                 return my_lex (buffer);
643                                         }
644                         }
645         }
646
647         if (c == '"') {
648                 ADDCHR(c);
649                 for (;;)
650                         switch (c = *cp++) {
651                                 case 0:
652                                         cp = NULL;
653                                         return (last_lex = LX_ERR);
654                                 case QUOTE:
655                                         ADDCHR(c);
656                                         if ((c = *cp++) == 0) {
657                                                 cp = NULL;
658                                                 return (last_lex = LX_ERR);
659                                         }
660                                 default:
661                                         ADDCHR(c);
662                                         continue;
663                                 case '"':
664                                         ADDCHR(c);
665                                         *bp = 0;
666                                         return (last_lex = LX_QSTR);
667                         }
668         }
669
670         if (c == '[') {
671                 ADDCHR(c);
672                 for (;;)
673                         switch (c = *cp++) {
674                                 case 0:
675                                         cp = NULL;
676                                         return (last_lex = LX_ERR);
677                                 case QUOTE:
678                                         ADDCHR(c);
679                                         if ((c = *cp++) == 0) {
680                                                 cp = NULL;
681                                                 return (last_lex = LX_ERR);
682                                         }
683                                 default:
684                                         ADDCHR(c);
685                                         continue;
686                                 case ']':
687                                         ADDCHR(c);
688                                         *bp = 0;
689                                         return (last_lex = LX_DLIT);
690                         }
691         }
692
693         ADDCHR(c);
694         *bp = 0;
695         for (i = 0; special[i].lx_chr != 0; i++)
696                 if (c == special[i].lx_chr)
697                         return (last_lex = special[i].lx_val);
698
699         if (iscntrl (c))
700                 return (last_lex = LX_ERR);
701
702         for (;;) {
703                 if ((c = *cp++) == 0)
704                         break;
705                 for (i = 0; special[i].lx_chr != 0; i++)
706                         if (c == special[i].lx_chr)
707                                 goto got_atom;
708                 if (iscntrl (c) || isspace (c))
709                         break;
710                 ADDCHR(c);
711         }
712 got_atom: ;
713         if (c == 0)
714                 cp = NULL;
715         else
716                 cp--;
717         *bp = 0;
718         last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
719                 ? LX_ATOM : LX_AT;
720         return last_lex;
721
722  my_lex_buffull:
723         /* Out of buffer space. *bp is the last byte in the buffer */
724         *bp = 0;
725         return (last_lex = LX_ERR);
726 }
727
728
729 char *
730 legal_person (char *p)
731 {
732         int i;
733         register char *cp;
734         static char buffer[BUFSIZ];
735
736         if (*p == '"')
737                 return p;
738         for (cp = p; *cp; cp++)
739                 for (i = 0; special[i].lx_chr; i++)
740                         if (*cp == special[i].lx_chr) {
741                                 sprintf (buffer, "\"%s\"", p);
742                                 return buffer;
743                         }
744
745         return p;
746 }