Removed the space between function names and the opening parenthesis.
[mmh] / sbr / mf.c
1 /*
2 ** mf.c -- mail filter subroutines
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mf.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <h/utils.h>
13
14 /*
15 ** static prototypes
16 */
17 static char *getcpy(char *);
18 static int isat(char *);
19 static int parse_address(void);
20 static int phrase(char *);
21 static int route_addr(char *);
22 static int local_part(char *);
23 static int domain(char *);
24 static int route(char *);
25 static int my_lex(char *);
26
27
28 static char *
29 getcpy(char *s)
30 {
31         register char *p;
32
33         if (!s) {
34                 /*
35                 ** causes compiles to blow up because the symbol _cleanup
36                 ** is undefined where did this ever come from?
37                 */
38                 /* _cleanup(); */
39                 abort();
40                 for(;;)
41                         pause();
42         }
43         p = mh_xmalloc((size_t) (strlen(s) + 2));
44         strcpy(p, s);
45         return p;
46 }
47
48
49 #define CHKADR 0  /* undertermined address style */
50 #define UNIXDR 1  /* UNIX-style address */
51 #define ARPADR 2  /* ARPAnet-style address */
52
53
54 static int
55 isat(char *p)
56 {
57         return (strncmp(p, " AT ", 4)
58                 && strncmp(p, " At ", 4)
59                 && strncmp(p, " aT ", 4)
60                 && strncmp(p, " at ", 4) ? FALSE : TRUE);
61 }
62
63
64 /*
65 **
66 ** getadrx() implements a partial 822-style address parser.  The parser
67 ** is neither complete nor correct.  It does however recognize nearly all
68 ** of the 822 address syntax.  In addition it handles the majority of the
69 ** 733 syntax as well.  Most problems arise from trying to accomodate both.
70 **
71 ** In terms of 822, the route-specification in
72 **
73 **     "<" [route] local-part "@" domain ">"
74 **
75 ** is parsed and returned unchanged.  Multiple at-signs are compressed
76 ** via source-routing.  Recursive groups are not allowed as per the
77 ** standard.
78 **
79 ** In terms of 733, " at " is recognized as equivalent to "@".
80 **
81 ** In terms of both the parser will not complain about missing hosts.
82 **
83 ** -----
84 **
85 ** We should not allow addresses like
86 **
87 **     Marshall T. Rose <MRose@UCI>
88 **
89 ** but should insist on
90 **
91 **     "Marshall T. Rose" <MRose@UCI>
92 **
93 ** Unfortunately, a lot of mailers stupidly let people get away with this.
94 **
95 ** -----
96 **
97 ** We should not allow addresses like
98 **
99 **     <MRose@UCI>
100 **
101 ** but should insist on
102 **
103 **     MRose@UCI
104 **
105 ** Unfortunately, a lot of mailers stupidly let people's UAs get away with
106 ** this.
107 **
108 ** -----
109 **
110 ** We should not allow addresses like
111 **
112 **     @UCI:MRose@UCI-750a
113 **
114 ** but should insist on
115 **
116 **     Marshall Rose <@UCI:MRose@UCI-750a>
117 **
118 ** Unfortunately, a lot of mailers stupidly do this.
119 **
120 */
121
122 #define QUOTE  '\\'
123
124 #define LX_END   0
125 #define LX_ERR   1
126 #define LX_ATOM  2
127 #define LX_QSTR  3
128 #define LX_DLIT  4
129 #define LX_SEMI  5
130 #define LX_COMA  6
131 #define LX_LBRK  7
132 #define LX_RBRK  8
133 #define LX_COLN  9
134 #define LX_DOT  10
135 #define LX_AT   11
136
137 struct specials {
138         char lx_chr;
139         int  lx_val;
140 };
141
142 static struct specials special[] = {
143         { ';',   LX_SEMI },
144         { ',',   LX_COMA },
145         { '<',   LX_LBRK },
146         { '>',   LX_RBRK },
147         { ':',   LX_COLN },
148         { '.',   LX_DOT },
149         { '@',   LX_AT },
150         { '(',   LX_ERR },
151         { ')',   LX_ERR },
152         { QUOTE, LX_ERR },
153         { '"',   LX_ERR },
154         { '[',   LX_ERR },
155         { ']',   LX_ERR },
156         { 0,     0 }
157 };
158
159 static int glevel = 0;
160 static int ingrp = 0;
161 static int last_lex = LX_END;
162
163 static char *dp = NULL;
164 static unsigned char *cp = NULL;
165 static unsigned char *ap = NULL;
166 static char *pers = NULL;
167 static char *mbox = NULL;
168 static char *host = NULL;
169 static char *path = NULL;
170 static char *grp = NULL;
171 static char *note = NULL;
172 static char err[BUFSIZ];
173 static char adr[BUFSIZ];
174
175 static struct adrx  adrxs2;
176
177
178 struct adrx *
179 getadrx(char *addrs)
180 {
181         register char *bp;
182         register struct adrx *adrxp = &adrxs2;
183
184         if (pers)
185                 free(pers);
186         if (mbox)
187                 free(mbox);
188         if (host)
189                 free(host);
190         if (path)
191                 free(path);
192         if (grp)
193                 free(grp);
194         if (note)
195                 free(note);
196         pers = mbox = host = path = grp = note = NULL;
197         err[0] = 0;
198
199         if (dp == NULL) {
200                 dp = cp = getcpy(addrs ? addrs : "");
201                 glevel = 0;
202         } else if (cp == NULL) {
203                 free(dp);
204                 dp = NULL;
205                 return NULL;
206         }
207
208         switch (parse_address()) {
209                 case DONE:
210                         free(dp);
211                         dp = cp = NULL;
212                         return NULL;
213
214                 case OK:
215                         switch (last_lex) {
216                                 case LX_COMA:
217                                 case LX_END:
218                                         break;
219
220                                 default:  /* catch trailing comments */
221                                         bp = cp;
222                                         my_lex(adr);
223                                         cp = bp;
224                                         break;
225                         }
226                         break;
227
228                 default:
229                         break;
230                 }
231
232         if (err[0])
233                 for (;;) {
234                         switch (last_lex) {
235                                 case LX_COMA:
236                                 case LX_END:
237                                         break;
238
239                                 default:
240                                         my_lex(adr);
241                                         continue;
242                         }
243                         break;
244                 }
245         while (isspace(*ap))
246                 ap++;
247         if (cp)
248                 sprintf(adr, "%.*s", (int)(cp - ap), ap);
249         else
250                 strcpy(adr, ap);
251         bp = adr + strlen(adr) - 1;
252         if (*bp == ',' || *bp == ';' || *bp == '\n')
253                 *bp = 0;
254
255         adrxp->text = adr;
256         adrxp->pers = pers;
257         adrxp->mbox = mbox;
258         adrxp->host = host;
259         adrxp->path = path;
260         adrxp->grp = grp;
261         adrxp->ingrp = ingrp;
262         adrxp->note = note;
263         adrxp->err = err[0] ? err : NULL;
264
265         return adrxp;
266 }
267
268
269 static int
270 parse_address(void)
271 {
272         char buffer[BUFSIZ];
273
274 again: ;
275         ap = cp;
276         switch (my_lex(buffer)) {
277                 case LX_ATOM:
278                 case LX_QSTR:
279                         pers = getcpy(buffer);
280                         break;
281
282                 case LX_SEMI:
283                         if (glevel-- <= 0) {
284                                 strcpy(err, "extraneous semi-colon");
285                                 return NOTOK;
286                         }
287                 case LX_COMA:
288                         if (note) {
289                                 free(note);
290                                 note = NULL;
291                         }
292                         goto again;
293
294                 case LX_END:
295                         return DONE;
296
297                 case LX_LBRK:  /* sigh (2) */
298                         goto get_addr;
299
300                 case LX_AT:  /* sigh (3) */
301                         cp = ap;
302                         if (route_addr(buffer) == NOTOK)
303                                 return NOTOK;
304                         return OK;  /* why be choosy? */
305
306                 default:
307                         sprintf(err, "illegal address construct (%s)", buffer);
308                         return NOTOK;
309         }
310
311         switch (my_lex(buffer)) {
312                 case LX_ATOM:
313                 case LX_QSTR:
314                         pers = add(buffer, add(" ", pers));
315         more_phrase: ;  /* sigh (1) */
316                         if (phrase(buffer) == NOTOK)
317                                 return NOTOK;
318
319                         switch (last_lex) {
320                                 case LX_LBRK:
321                         get_addr: ;
322                                         if (route_addr(buffer) == NOTOK)
323                                                 return NOTOK;
324                                         if (last_lex == LX_RBRK)
325                                                 return OK;
326                                         sprintf(err, "missing right-bracket (%s)", buffer);
327                                         return NOTOK;
328
329                                 case LX_COLN:
330                         get_group: ;
331                                         if (glevel++ > 0) {
332                                                 sprintf(err, "nested groups not allowed (%s)", pers);
333                                                 return NOTOK;
334                                         }
335                                         grp = add(": ", pers);
336                                         pers = NULL;
337                                         {
338                                                 char   *pp = cp;
339
340                                                 for (;;)
341                                                         switch (my_lex(buffer)) {
342                                                                 case LX_SEMI:
343                                                                 case LX_END: /* tsk, tsk */
344                                                                         glevel--;
345                                                                         return OK;
346
347                                                                 case LX_COMA:
348                                                                         continue;
349
350                                                                 default:
351                                                                         cp = pp;
352                                                                         return parse_address();
353                                                         }
354                                         }
355
356                                 case LX_DOT:  /* sigh (1) */
357                                         pers = add(".", pers);
358                                         goto more_phrase;
359
360                                 default:
361                                         sprintf(err, "no mailbox in address, only a phrase (%s%s)", pers, buffer);
362                                         return NOTOK;
363                         }
364
365                 case LX_LBRK:
366                         goto get_addr;
367
368                 case LX_COLN:
369                         goto get_group;
370
371                 case LX_DOT:
372                         mbox = add(buffer, pers);
373                         pers = NULL;
374                         if (route_addr(buffer) == NOTOK)
375                                 return NOTOK;
376                         goto check_end;
377
378                 case LX_AT:
379                         ingrp = glevel;
380                         mbox = pers;
381                         pers = NULL;
382                         if (domain(buffer) == NOTOK)
383                                 return NOTOK;
384         check_end: ;
385                         switch (last_lex) {
386                                 case LX_SEMI:
387                                         if (glevel-- <= 0) {
388                                                 strcpy(err, "extraneous semi-colon");
389                                                 return NOTOK;
390                                         }
391                                 case LX_COMA:
392                                 case LX_END:
393                                         return OK;
394
395                                 default:
396                                         sprintf(err, "junk after local@domain (%s)", buffer);
397                                         return NOTOK;
398                         }
399
400                 case LX_SEMI:  /* no host */
401                 case LX_COMA:
402                 case LX_END:
403                         ingrp = glevel;
404                         if (last_lex == LX_SEMI && glevel-- <= 0) {
405                                 strcpy(err, "extraneous semi-colon");
406                                 return NOTOK;
407                         }
408                         mbox = pers;
409                         pers = NULL;
410                         return OK;
411
412                 default:
413                         sprintf(err, "missing mailbox (%s)", buffer);
414                         return NOTOK;
415         }
416 }
417
418
419 static int
420 phrase(char *buffer)
421 {
422         for (;;)
423                 switch (my_lex(buffer)) {
424                         case LX_ATOM:
425                         case LX_QSTR:
426                                 pers = add(buffer, add(" ", pers));
427                                 continue;
428
429                         default:
430                                 return OK;
431                 }
432 }
433
434
435 static int
436 route_addr(char *buffer)
437 {
438         register char *pp = cp;
439
440         if (my_lex(buffer) == LX_AT) {
441                 if (route(buffer) == NOTOK)
442                         return NOTOK;
443         }
444         else
445                 cp = pp;
446
447         if (local_part(buffer) == NOTOK)
448                 return NOTOK;
449
450         switch (last_lex) {
451                 case LX_AT:
452                         return domain(buffer);
453
454                 case LX_SEMI:  /* if in group */
455                 case LX_RBRK:  /* no host */
456                 case LX_COMA:
457                 case LX_END:
458                         return OK;
459
460                 default:
461                         sprintf(err, "no at-sign after local-part (%s)", buffer);
462                         return NOTOK;
463         }
464 }
465
466
467 static int
468 local_part(char *buffer)
469 {
470         ingrp = glevel;
471
472         for (;;) {
473                 switch (my_lex(buffer)) {
474                         case LX_ATOM:
475                         case LX_QSTR:
476                                 mbox = add(buffer, mbox);
477                                 break;
478
479                         default:
480                                 sprintf(err, "no mailbox in local-part (%s)",
481                                                 buffer);
482                                 return NOTOK;
483                 }
484
485                 switch (my_lex(buffer)) {
486                         case LX_DOT:
487                                 mbox = add(buffer, mbox);
488                                 continue;
489
490                         default:
491                                 return OK;
492                 }
493         }
494 }
495
496
497 static int
498 domain(char *buffer)
499 {
500         for (;;) {
501                 switch (my_lex(buffer)) {
502                         case LX_ATOM:
503                         case LX_DLIT:
504                                 host = add(buffer, host);
505                                 break;
506
507                         default:
508                                 sprintf(err, "no sub-domain in domain-part of address (%s)", buffer);
509                                 return NOTOK;
510                 }
511
512                 switch (my_lex(buffer)) {
513                         case LX_DOT:
514                                 host = add(buffer, host);
515                                 continue;
516
517                         case LX_AT:  /* sigh (0) */
518                                 mbox = add(host, add("%", mbox));
519                                 free(host);
520                                 host = NULL;
521                                 continue;
522
523                         default:
524                                 return OK;
525                 }
526         }
527 }
528
529
530 static int
531 route(char *buffer)
532 {
533         path = getcpy("@");
534
535         for (;;) {
536                 switch (my_lex(buffer)) {
537                         case LX_ATOM:
538                         case LX_DLIT:
539                                 path = add(buffer, path);
540                                 break;
541
542                         default:
543                                 sprintf(err, "no sub-domain in domain-part of address (%s)", buffer);
544                                 return NOTOK;
545                 }
546                 switch (my_lex(buffer)) {
547                         case LX_COMA:
548                                 path = add(buffer, path);
549                                 for (;;) {
550                                         switch (my_lex(buffer)) {
551                                                 case LX_COMA:
552                                                         continue;
553
554                                                 case LX_AT:
555                                                         path = add(buffer, path);
556                                                         break;
557
558                                                 default:
559                                                         sprintf(err, "no at-sign found for next domain in route (%s)",
560                                                                          buffer);
561                                         }
562                                         break;
563                                 }
564                                 continue;
565
566                         case LX_AT:  /* XXX */
567                         case LX_DOT:
568                                 path = add(buffer, path);
569                                 continue;
570
571                         case LX_COLN:
572                                 path = add(buffer, path);
573                                 return OK;
574
575                         default:
576                                 sprintf(err, "no colon found to terminate route (%s)", buffer);
577                                 return NOTOK;
578                 }
579         }
580 }
581
582
583 static int
584 my_lex(char *buffer)
585 {
586         /* buffer should be at least BUFSIZ bytes long */
587         int i, gotat = 0;
588         register unsigned char c;
589         register char *bp;
590
591         /*
592         ** Add C to the buffer bp. After use of this macro *bp is guaranteed
593         ** to be within the buffer.
594         */
595 #define ADDCHR(C)  \
596         do { \
597                 *bp++ = (C); \
598                 if ((bp - buffer) == (BUFSIZ-1)) \
599                         goto my_lex_buffull; \
600         } while (0)
601
602         bp = buffer;
603         *bp = 0;
604         if (!cp)
605                 return (last_lex = LX_END);
606
607         gotat = isat(cp);
608         c = *cp++;
609         while (isspace(c))
610                 c = *cp++;
611         if (c == 0) {
612                 cp = NULL;
613                 return (last_lex = LX_END);
614         }
615
616         if (c == '(') {
617                 ADDCHR(c);
618                 for (i = 0;;)
619                         switch (c = *cp++) {
620                                 case 0:
621                                         cp = NULL;
622                                         return (last_lex = LX_ERR);
623                                 case QUOTE:
624                                         ADDCHR(c);
625                                         if ((c = *cp++) == 0) {
626                                                 cp = NULL;
627                                                 return (last_lex = LX_ERR);
628                                         }
629                                         ADDCHR(c);
630                                         continue;
631                                 case '(':
632                                         i++;
633                                 default:
634                                         ADDCHR(c);
635                                         continue;
636                                 case ')':
637                                         ADDCHR(c);
638                                         if (--i < 0) {
639                                                 *bp = 0;
640                                                 note = note ? add(buffer, add(" ", note)) : getcpy(buffer);
641                                                 return my_lex(buffer);
642                                         }
643                         }
644         }
645
646         if (c == '"') {
647                 ADDCHR(c);
648                 for (;;)
649                         switch (c = *cp++) {
650                                 case 0:
651                                         cp = NULL;
652                                         return (last_lex = LX_ERR);
653                                 case QUOTE:
654                                         ADDCHR(c);
655                                         if ((c = *cp++) == 0) {
656                                                 cp = NULL;
657                                                 return (last_lex = LX_ERR);
658                                         }
659                                 default:
660                                         ADDCHR(c);
661                                         continue;
662                                 case '"':
663                                         ADDCHR(c);
664                                         *bp = 0;
665                                         return (last_lex = LX_QSTR);
666                         }
667         }
668
669         if (c == '[') {
670                 ADDCHR(c);
671                 for (;;)
672                         switch (c = *cp++) {
673                                 case 0:
674                                         cp = NULL;
675                                         return (last_lex = LX_ERR);
676                                 case QUOTE:
677                                         ADDCHR(c);
678                                         if ((c = *cp++) == 0) {
679                                                 cp = NULL;
680                                                 return (last_lex = LX_ERR);
681                                         }
682                                 default:
683                                         ADDCHR(c);
684                                         continue;
685                                 case ']':
686                                         ADDCHR(c);
687                                         *bp = 0;
688                                         return (last_lex = LX_DLIT);
689                         }
690         }
691
692         ADDCHR(c);
693         *bp = 0;
694         for (i = 0; special[i].lx_chr != 0; i++)
695                 if (c == special[i].lx_chr)
696                         return (last_lex = special[i].lx_val);
697
698         if (iscntrl(c))
699                 return (last_lex = LX_ERR);
700
701         for (;;) {
702                 if ((c = *cp++) == 0)
703                         break;
704                 for (i = 0; special[i].lx_chr != 0; i++)
705                         if (c == special[i].lx_chr)
706                                 goto got_atom;
707                 if (iscntrl(c) || isspace(c))
708                         break;
709                 ADDCHR(c);
710         }
711 got_atom: ;
712         if (c == 0)
713                 cp = NULL;
714         else
715                 cp--;
716         *bp = 0;
717         last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
718                 ? LX_ATOM : LX_AT;
719         return last_lex;
720
721  my_lex_buffull:
722         /* Out of buffer space. *bp is the last byte in the buffer */
723         *bp = 0;
724         return (last_lex = LX_ERR);
725 }
726
727
728 char *
729 legal_person(char *p)
730 {
731         int i;
732         register char *cp;
733         static char buffer[BUFSIZ];
734
735         if (*p == '"')
736                 return p;
737         for (cp = p; *cp; cp++)
738                 for (i = 0; special[i].lx_chr; i++)
739                         if (*cp == special[i].lx_chr) {
740                                 sprintf(buffer, "\"%s\"", p);
741                                 return buffer;
742                         }
743
744         return p;
745 }