Added mh_xcalloc().
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         int compnum, state;
235         char buf[BUFSIZ], name[NAMESZ];
236         char *np, *vp;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242                 adios(EX_OSERR, NULL, "out of memory");
243
244         ct->c_fp = in;
245         ct->c_file = getcpy(file);
246         ct->c_begin = ftell(ct->c_fp) + 1;
247
248         /*
249         ** Parse the header fields for this
250         ** content into a linked list.
251         */
252         for (compnum = 1, state = FLD;;) {
253                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
254                 case FLD:
255                 case FLDPLUS:
256                 case FLDEOF:
257                         compnum++;
258
259                         /* get copies of the buffers */
260                         np = getcpy(name);
261                         vp = getcpy(buf);
262
263                         /* if necessary, get rest of field */
264                         while (state == FLDPLUS) {
265                                 state = m_getfld(state, name, buf,
266                                                 sizeof(buf), in);
267                                 vp = add(buf, vp);  /* add to previous value */
268                         }
269
270                         /* Now add the header data to the list */
271                         add_header(ct, np, vp);
272
273                         /* continue, if this isn't the last header field */
274                         if (state != FLDEOF) {
275                                 ct->c_begin = ftell(in) + 1;
276                                 continue;
277                         }
278                         /* else fall... */
279
280                 case BODY:
281                 case BODYEOF:
282                         ct->c_begin = ftell(in) - strlen(buf);
283                         break;
284
285                 case FILEEOF:
286                         ct->c_begin = ftell(in);
287                         break;
288
289                 case LENERR:
290                 case FMTERR:
291                         adios(EX_DATAERR, NULL, "message format error in component #%d",
292                                         compnum);
293
294                 default:
295                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
296                 }
297
298                 /* break out of the loop */
299                 break;
300         }
301
302         /*
303         ** Read the content headers.  We will parse the
304         ** MIME related header fields into their various
305         ** structures and set internal flags related to
306         ** content type/subtype, etc.
307         */
308
309         hp = ct->c_first_hf;  /* start at first header field */
310         while (hp) {
311                 /* Get MIME-Version field */
312                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
313                         int ucmp;
314                         char c;
315                         unsigned char *cp, *dp;
316
317                         if (ct->c_vrsn) {
318                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
319                                 goto next_header;
320                         }
321                         ct->c_vrsn = getcpy(hp->value);
322
323                         /* Now, cleanup this field */
324                         cp = ct->c_vrsn;
325
326                         while (isspace(*cp))
327                                 cp++;
328                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
329                                 *dp++ = ' ';
330                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
331                                 if (!isspace(*dp))
332                                         break;
333                         *++dp = '\0';
334                         if (debugsw)
335                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
336
337                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
338                                 goto out;
339
340                         for (dp = cp; istoken(*dp); dp++)
341                                 continue;
342                         c = *dp;
343                         *dp = '\0';
344                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
345                         *dp = c;
346                         if (!ucmp) {
347                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
348                         }
349
350                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
351                         /* Get Content-Type field */
352                         struct str2init *s2i;
353                         CI ci = &ct->c_ctinfo;
354
355                         /* Check if we've already seen a Content-Type header */
356                         if (ct->c_ctline) {
357                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
358                                 goto next_header;
359                         }
360
361                         /* Parse the Content-Type field */
362                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
363                                 goto out;
364
365                         /*
366                         ** Set the Init function and the internal
367                         ** flag for this content type.
368                         */
369                         for (s2i = str2cts; s2i->si_key; s2i++)
370                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
371                                         break;
372                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
373                                 s2i++;
374                         ct->c_type = s2i->si_val;
375                         ct->c_ctinitfnx = s2i->si_init;
376
377                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
378                         /* Get Content-Transfer-Encoding field */
379                         char c;
380                         unsigned char *cp, *dp;
381                         struct str2init *s2i;
382
383                         /*
384                         ** Check if we've already seen the
385                         ** Content-Transfer-Encoding field
386                         */
387                         if (ct->c_celine) {
388                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
389                                 goto next_header;
390                         }
391
392                         /* get copy of this field */
393                         ct->c_celine = cp = getcpy(hp->value);
394
395                         while (isspace(*cp))
396                                 cp++;
397                         for (dp = cp; istoken(*dp); dp++)
398                                 continue;
399                         c = *dp;
400                         *dp = '\0';
401
402                         /*
403                         ** Find the internal flag and Init function
404                         ** for this transfer encoding.
405                         */
406                         for (s2i = str2ces; s2i->si_key; s2i++)
407                                 if (!mh_strcasecmp(cp, s2i->si_key))
408                                         break;
409                         if (!s2i->si_key && !uprf(cp, "X-"))
410                                 s2i++;
411                         *dp = c;
412                         ct->c_encoding = s2i->si_val;
413
414                         /* Call the Init function for this encoding */
415                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
416                                 goto out;
417
418                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
419                         /* Get Content-ID field */
420                         ct->c_id = add(hp->value, ct->c_id);
421
422                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
423                         /* Get Content-Description field */
424                         ct->c_descr = add(hp->value, ct->c_descr);
425
426                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
427                         /* Get Content-Disposition field */
428                         ct->c_dispo = add(hp->value, ct->c_dispo);
429                 }
430
431 next_header:
432                 hp = hp->next;  /* next header field */
433         }
434
435         /*
436         ** Check if we saw a Content-Type field.
437         ** If not, then assign a default value for
438         ** it, and the Init function.
439         */
440         if (!ct->c_ctline) {
441                 /*
442                 ** If we are inside a multipart/digest message,
443                 ** so default type is message/rfc822
444                 */
445                 if (toplevel < 0) {
446                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
447                                 goto out;
448                         ct->c_type = CT_MESSAGE;
449                         ct->c_ctinitfnx = InitMessage;
450                 } else {
451                         /*
452                         ** Else default type is text/plain
453                         */
454                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
455                                 goto out;
456                         ct->c_type = CT_TEXT;
457                         ct->c_ctinitfnx = InitText;
458                 }
459         }
460
461         /* Use default Transfer-Encoding, if necessary */
462         if (!ct->c_celine) {
463                 ct->c_encoding = CE_7BIT;
464                 Init7Bit(ct);
465         }
466
467         return ct;
468
469 out:
470         free_content(ct);
471         return NULL;
472 }
473
474
475 /*
476 ** small routine to add header field to list
477 */
478
479 int
480 add_header(CT ct, char *name, char *value)
481 {
482         HF hp;
483
484         /* allocate header field structure */
485         hp = mh_xmalloc(sizeof(*hp));
486
487         /* link data into header structure */
488         hp->name = name;
489         hp->value = value;
490         hp->next = NULL;
491
492         /* link header structure into the list */
493         if (ct->c_first_hf == NULL) {
494                 ct->c_first_hf = hp;  /* this is the first */
495                 ct->c_last_hf = hp;
496         } else {
497                 ct->c_last_hf->next = hp;  /* add it to the end */
498                 ct->c_last_hf = hp;
499         }
500
501         return 0;
502 }
503
504
505 /*
506 ** Make sure that buf contains at least one appearance of name,
507 ** followed by =.  If not, insert both name and value, just after
508 ** first semicolon, if any.  Note that name should not contain a
509 ** trailing =.  And quotes will be added around the value.  Typical
510 ** usage:  make sure that a Content-Disposition header contains
511 ** filename="foo".  If it doesn't and value does, use value from
512 ** that.
513 */
514 static char *
515 incl_name_value(unsigned char *buf, char *name, char *value) {
516         char *newbuf = buf;
517
518         /* Assume that name is non-null. */
519         if (buf && value) {
520                 char *name_plus_equal = concat(name, "=", NULL);
521
522                 if (!strstr(buf, name_plus_equal)) {
523                         char *insertion;
524                         unsigned char *cp;
525                         char *prefix, *suffix;
526
527                         /* Trim trailing space, esp. newline. */
528                         for (cp = &buf[strlen(buf) - 1];
529                                          cp >= buf && isspace(*cp); --cp) {
530                                 *cp = '\0';
531                         }
532
533                         insertion = concat("; ", name, "=", "\"", value, "\"",
534                                         NULL);
535
536                         /*
537                         ** Insert at first semicolon, if any.
538                         ** If none, append to end.
539                         */
540                         prefix = getcpy(buf);
541                         if ((cp = strchr(prefix, ';'))) {
542                                 suffix = concat(cp, NULL);
543                                 *cp = '\0';
544                                 newbuf = concat(prefix, insertion, suffix,
545                                                 "\n", NULL);
546                                 free(suffix);
547                         } else {
548                                 /* Append to end. */
549                                 newbuf = concat(buf, insertion, "\n", NULL);
550                         }
551
552                         free(prefix);
553                         free(insertion);
554                         free(buf);
555                 }
556
557                 free(name_plus_equal);
558         }
559
560         return newbuf;
561 }
562
563 /*
564 ** Extract just name_suffix="foo", if any, from value.  If there isn't
565 ** one, return the entire value.  Note that, for example, a name_suffix
566 ** of name will match filename="foo", and return foo.
567 */
568 static char *
569 extract_name_value(char *name_suffix, char *value) {
570         char *extracted_name_value = value;
571         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
572         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
573         char *cp;
574
575         free(name_suffix_plus_quote);
576         if (name_suffix_equals) {
577                 char *name_suffix_begin;
578
579                 /* Find first \". */
580                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
581                         ;
582                 name_suffix_begin = ++cp;
583                 /* Find second \". */
584                 for (; *cp != '"'; ++cp)
585                         ;
586
587                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
588                 memcpy(extracted_name_value, name_suffix_begin,
589                                 cp - name_suffix_begin);
590                 extracted_name_value[cp - name_suffix_begin] = '\0';
591         }
592
593         return extracted_name_value;
594 }
595
596 /*
597 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
598 ** directives.  Fills in the information of the CTinfo structure.
599 */
600 int
601 get_ctinfo(unsigned char *cp, CT ct, int magic)
602 {
603         int i;
604         unsigned char *dp;
605         char **ap, **ep;
606         char c;
607         CI ci;
608
609         ci = &ct->c_ctinfo;
610         i = strlen(invo_name) + 2;
611
612         /* store copy of Content-Type line */
613         cp = ct->c_ctline = getcpy(cp);
614
615         while (isspace(*cp))  /* trim leading spaces */
616                 cp++;
617
618         /* change newlines to spaces */
619         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
620                 *dp++ = ' ';
621
622         /* trim trailing spaces */
623         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
624                 if (!isspace(*dp))
625                         break;
626         *++dp = '\0';
627
628         if (debugsw)
629                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
630
631         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
632                 return NOTOK;
633
634         for (dp = cp; istoken(*dp); dp++)
635                 continue;
636         c = *dp, *dp = '\0';
637         ci->ci_type = getcpy(cp);  /* store content type */
638         *dp = c, cp = dp;
639
640         if (!*ci->ci_type) {
641                 advise(NULL, "invalid %s: field in message %s (empty type)",
642                                 TYPE_FIELD, ct->c_file);
643                 return NOTOK;
644         }
645
646         /* down case the content type string */
647         for (dp = ci->ci_type; *dp; dp++)
648                 if (isalpha(*dp) && isupper(*dp))
649                         *dp = tolower(*dp);
650
651         while (isspace(*cp))
652                 cp++;
653
654         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
655                 return NOTOK;
656
657         if (*cp != '/') {
658                 if (!magic)
659                         ci->ci_subtype = getcpy("");
660                 goto magic_skip;
661         }
662
663         cp++;
664         while (isspace(*cp))
665                 cp++;
666
667         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
668                 return NOTOK;
669
670         for (dp = cp; istoken(*dp); dp++)
671                 continue;
672         c = *dp, *dp = '\0';
673         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
674         *dp = c, cp = dp;
675
676         if (!*ci->ci_subtype) {
677                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
678                 return NOTOK;
679         }
680
681         /* down case the content subtype string */
682         for (dp = ci->ci_subtype; *dp; dp++)
683                 if (isalpha(*dp) && isupper(*dp))
684                         *dp = tolower(*dp);
685
686 magic_skip:
687         while (isspace(*cp))
688                 cp++;
689
690         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
691                 return NOTOK;
692
693         /*
694         ** Parse attribute/value pairs given with Content-Type
695         */
696         ep = (ap = ci->ci_attrs) + NPARMS;
697         while (*cp == ';') {
698                 char *vp;
699                 unsigned char *up;
700
701                 if (ap >= ep) {
702                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
703                         return NOTOK;
704                 }
705
706                 cp++;
707                 while (isspace(*cp))
708                         cp++;
709
710                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
711                         return NOTOK;
712
713                 if (*cp == 0) {
714                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
715                         return OK;
716                 }
717
718                 /* down case the attribute name */
719                 for (dp = cp; istoken(*dp); dp++)
720                         if (isalpha(*dp) && isupper(*dp))
721                                 *dp = tolower(*dp);
722
723                 for (up = dp; isspace(*dp);)
724                         dp++;
725                 if (dp == cp || *dp != '=') {
726                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
727                         return NOTOK;
728                 }
729
730                 vp = (*ap = getcpy(cp)) + (up - cp);
731                 *vp = '\0';
732                 for (dp++; isspace(*dp);)
733                         dp++;
734
735                 /* now add the attribute value */
736                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
737
738                 if (*dp == '"') {
739                         for (cp = ++dp, dp = vp;;) {
740                                 switch (c = *cp++) {
741                                 case '\0':
742 bad_quote:
743                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
744                                         return NOTOK;
745
746                                 case '\\':
747                                         *dp++ = c;
748                                         if ((c = *cp++) == '\0')
749                                                 goto bad_quote;
750                                         /* else fall... */
751
752                                 default:
753                                         *dp++ = c;
754                                         continue;
755
756                                 case '"':
757                                         *dp = '\0';
758                                         break;
759                                 }
760                                 break;
761                         }
762                 } else {
763                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
764                                 continue;
765                         *dp = '\0';
766                 }
767                 if (!*vp) {
768                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
769                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
770                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
771                         continue;
772                 }
773                 ap++;
774
775                 while (isspace(*cp))
776                         cp++;
777
778                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
779                         return NOTOK;
780         }
781
782         /*
783         ** Get any <Content-Id> given in buffer
784         */
785         if (magic && *cp == '<') {
786                 if (ct->c_id) {
787                         free(ct->c_id);
788                         ct->c_id = NULL;
789                 }
790                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
791                         advise(NULL, "invalid ID in message %s", ct->c_file);
792                         return NOTOK;
793                 }
794                 c = *dp;
795                 *dp = '\0';
796                 if (*ct->c_id)
797                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
798                 else
799                         ct->c_id = NULL;
800                 *dp++ = c;
801                 cp = dp;
802
803                 while (isspace(*cp))
804                         cp++;
805         }
806
807         /*
808         ** Get any [Content-Description] given in buffer.
809         */
810         if (magic && *cp == '[') {
811                 ct->c_descr = ++cp;
812                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
813                         if (*dp == ']')
814                                 break;
815                 if (dp < cp) {
816                         advise(NULL, "invalid description in message %s",
817                                         ct->c_file);
818                         ct->c_descr = NULL;
819                         return NOTOK;
820                 }
821
822                 c = *dp;
823                 *dp = '\0';
824                 if (*ct->c_descr)
825                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
826                 else
827                         ct->c_descr = NULL;
828                 *dp++ = c;
829                 cp = dp;
830
831                 while (isspace(*cp))
832                         cp++;
833         }
834
835         /*
836         ** Get any {Content-Disposition} given in buffer.
837         */
838         if (magic && *cp == '{') {
839                 ct->c_dispo = ++cp;
840                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
841                         if (*dp == '}')
842                                 break;
843                 if (dp < cp) {
844                         advise(NULL, "invalid disposition in message %s",
845                                         ct->c_file);
846                         ct->c_dispo = NULL;
847                         return NOTOK;
848                 }
849
850                 c = *dp;
851                 *dp = '\0';
852                 if (*ct->c_dispo)
853                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
854                 else
855                         ct->c_dispo = NULL;
856                 *dp++ = c;
857                 cp = dp;
858
859                 while (isspace(*cp))
860                         cp++;
861         }
862
863         /*
864         ** Check if anything is left over
865         */
866         if (*cp) {
867                 if (magic) {
868                         ci->ci_magic = getcpy(cp);
869
870                         /*
871                         ** If there is a Content-Disposition header and
872                         ** it doesn't have a *filename=, extract it from
873                         ** the magic contents.  The mhbasename call skips
874                         ** any leading directory components.
875                         */
876                         if (ct->c_dispo)
877                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
878                         } else
879                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
880         }
881
882         return OK;
883 }
884
885
886 static int
887 get_comment(CT ct, unsigned char **ap, int istype)
888 {
889         int i;
890         char *bp;
891         unsigned char *cp;
892         char c, buffer[BUFSIZ], *dp;
893         CI ci;
894
895         ci = &ct->c_ctinfo;
896         cp = *ap;
897         bp = buffer;
898         cp++;
899
900         for (i = 0;;) {
901                 switch (c = *cp++) {
902                 case '\0':
903 invalid:
904                 advise(NULL, "invalid comment in message %s's %s: field",
905                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
906                 return NOTOK;
907
908                 case '\\':
909                         *bp++ = c;
910                         if ((c = *cp++) == '\0')
911                                 goto invalid;
912                         *bp++ = c;
913                         continue;
914
915                 case '(':
916                         i++;
917                         /* and fall... */
918                 default:
919                         *bp++ = c;
920                         continue;
921
922                 case ')':
923                         if (--i < 0)
924                                 break;
925                         *bp++ = c;
926                         continue;
927                 }
928                 break;
929         }
930         *bp = '\0';
931
932         if (istype) {
933                 if ((dp = ci->ci_comment)) {
934                         ci->ci_comment = concat(dp, " ", buffer, NULL);
935                         free(dp);
936                 } else {
937                         ci->ci_comment = getcpy(buffer);
938                 }
939         }
940
941         while (isspace(*cp))
942                 cp++;
943
944         *ap = cp;
945         return OK;
946 }
947
948
949 /*
950 ** CONTENTS
951 **
952 ** Handles content types audio, image, and video.
953 ** There's not much to do right here.
954 */
955
956 static int
957 InitGeneric(CT ct)
958 {
959         return OK;  /* not much to do here */
960 }
961
962
963 /*
964 ** TEXT
965 */
966
967 static int
968 InitText(CT ct)
969 {
970         char **ap, **ep;
971         struct k2v *kv;
972         struct text *t;
973         CI ci = &ct->c_ctinfo;
974
975         /* check for missing subtype */
976         if (!*ci->ci_subtype)
977                 ci->ci_subtype = add("plain", ci->ci_subtype);
978
979         /* match subtype */
980         for (kv = SubText; kv->kv_key; kv++)
981                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
982                         break;
983         ct->c_subtype = kv->kv_value;
984
985         /* allocate text character set structure */
986         if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
987                 adios(EX_OSERR, NULL, "out of memory");
988         ct->c_ctparams = (void *) t;
989
990         /* scan for charset parameter */
991         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
992                 if (!mh_strcasecmp(*ap, "charset"))
993                         break;
994
995         /* check if content specified a character set */
996         if (*ap) {
997                 /* store its name */
998                 ct->c_charset = getcpy(norm_charmap(*ep));
999                 /* match character set or set to CHARSET_UNKNOWN */
1000                 for (kv = Charset; kv->kv_key; kv++) {
1001                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
1002                                 break;
1003                         }
1004                 }
1005                 t->tx_charset = kv->kv_value;
1006         } else {
1007                 t->tx_charset = CHARSET_UNSPECIFIED;
1008         }
1009
1010         return OK;
1011 }
1012
1013
1014 /*
1015 ** MULTIPART
1016 */
1017
1018 static int
1019 InitMultiPart(CT ct)
1020 {
1021         int inout;
1022         long last, pos;
1023         unsigned char *cp, *dp;
1024         char **ap, **ep;
1025         char *bp, buffer[BUFSIZ];
1026         struct multipart *m;
1027         struct k2v *kv;
1028         struct part *part, **next;
1029         CI ci = &ct->c_ctinfo;
1030         CT p;
1031         FILE *fp;
1032
1033         /*
1034         ** The encoding for multipart messages must be either
1035         ** 7bit, 8bit, or binary (per RFC2045).
1036         */
1037         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1038                 && ct->c_encoding != CE_BINARY) {
1039                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1040                 ct->c_encoding = CE_7BIT;
1041         }
1042
1043         /* match subtype */
1044         for (kv = SubMultiPart; kv->kv_key; kv++)
1045                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1046                         break;
1047         ct->c_subtype = kv->kv_value;
1048
1049         /*
1050         ** Check for "boundary" parameter, which is
1051         ** required for multipart messages.
1052         */
1053         bp = 0;
1054         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1055                 if (!mh_strcasecmp(*ap, "boundary")) {
1056                         bp = *ep;
1057                         break;
1058                 }
1059         }
1060
1061         /* complain if boundary parameter is missing */
1062         if (!*ap) {
1063                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1064                 return NOTOK;
1065         }
1066
1067         /* allocate primary structure for multipart info */
1068         if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1069                 adios(EX_OSERR, NULL, "out of memory");
1070         ct->c_ctparams = (void *) m;
1071
1072         /* check if boundary parameter contains only whitespace characters */
1073         for (cp = bp; isspace(*cp); cp++)
1074                 continue;
1075         if (!*cp) {
1076                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1077                 return NOTOK;
1078         }
1079
1080         /* remove trailing whitespace from boundary parameter */
1081         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1082                 if (!isspace(*dp))
1083                         break;
1084         *++dp = '\0';
1085
1086         /* record boundary separators */
1087         m->mp_start = concat(bp, "\n", NULL);
1088         m->mp_stop = concat(bp, "--\n", NULL);
1089
1090         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1091                 advise(ct->c_file, "unable to open for reading");
1092                 return NOTOK;
1093         }
1094
1095         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1096         last = ct->c_end;
1097         next = &m->mp_parts;
1098         part = NULL;
1099         inout = 1;
1100
1101         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1102                 if (pos > last)
1103                         break;
1104
1105                 pos += strlen(buffer);
1106                 if (buffer[0] != '-' || buffer[1] != '-')
1107                         continue;
1108                 if (inout) {
1109                         if (strcmp(buffer + 2, m->mp_start)!=0)
1110                                 continue;
1111 next_part:
1112                         if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1113                                         == NULL)
1114                                 adios(EX_OSERR, NULL, "out of memory");
1115                         *next = part;
1116                         next = &part->mp_next;
1117
1118                         if (!(p = get_content(fp, ct->c_file,
1119                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1120                                 ct->c_fp = NULL;
1121                                 return NOTOK;
1122                         }
1123                         p->c_fp = NULL;
1124                         part->mp_part = p;
1125                         pos = p->c_begin;
1126                         fseek(fp, pos, SEEK_SET);
1127                         inout = 0;
1128                 } else {
1129                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1130                                 inout = 1;
1131 end_part:
1132                                 p = part->mp_part;
1133                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1134                                 if (p->c_end < p->c_begin)
1135                                         p->c_begin = p->c_end;
1136                                 if (inout)
1137                                         goto next_part;
1138                                 goto last_part;
1139                         } else {
1140                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1141                                         goto end_part;
1142                         }
1143                 }
1144         }
1145
1146         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1147         if (!inout && part) {
1148                 p = part->mp_part;
1149                 p->c_end = ct->c_end;
1150
1151                 if (p->c_begin >= p->c_end) {
1152                         for (next = &m->mp_parts; *next != part;
1153                                 next = &((*next)->mp_next))
1154                                 continue;
1155                         *next = NULL;
1156                         free_content(p);
1157                         free((char *) part);
1158                 }
1159         }
1160
1161 last_part:
1162         /* reverse the order of the parts for multipart/alternative */
1163         if (ct->c_subtype == MULTI_ALTERNATE)
1164                 reverse_parts(ct);
1165
1166         /*
1167         ** label all subparts with part number, and
1168         ** then initialize the content of the subpart.
1169         */
1170         {
1171                 int partnum;
1172                 char *pp;
1173                 char partnam[BUFSIZ];
1174
1175                 if (ct->c_partno) {
1176                         snprintf(partnam, sizeof(partnam), "%s.",
1177                                         ct->c_partno);
1178                         pp = partnam + strlen(partnam);
1179                 } else {
1180                         pp = partnam;
1181                 }
1182
1183                 for (part = m->mp_parts, partnum = 1; part;
1184                         part = part->mp_next, partnum++) {
1185                         p = part->mp_part;
1186
1187                         sprintf(pp, "%d", partnum);
1188                         p->c_partno = getcpy(partnam);
1189
1190                         /* initialize the content of the subparts */
1191                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1192                                 fclose(ct->c_fp);
1193                                 ct->c_fp = NULL;
1194                                 return NOTOK;
1195                         }
1196                 }
1197         }
1198
1199         fclose(ct->c_fp);
1200         ct->c_fp = NULL;
1201         return OK;
1202 }
1203
1204
1205 /*
1206 ** reverse the order of the parts of a multipart
1207 */
1208
1209 static void
1210 reverse_parts(CT ct)
1211 {
1212         int i;
1213         struct multipart *m;
1214         struct part **base, **bmp, **next, *part;
1215
1216         m = (struct multipart *) ct->c_ctparams;
1217
1218         /* if only one part, just return */
1219         if (!m->mp_parts || !m->mp_parts->mp_next)
1220                 return;
1221
1222         /* count number of parts */
1223         i = 0;
1224         for (part = m->mp_parts; part; part = part->mp_next)
1225                 i++;
1226
1227         /* allocate array of pointers to the parts */
1228         if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1229                 adios(EX_OSERR, NULL, "out of memory");
1230         bmp = base;
1231
1232         /* point at all the parts */
1233         for (part = m->mp_parts; part; part = part->mp_next)
1234                 *bmp++ = part;
1235         *bmp = NULL;
1236
1237         /* reverse the order of the parts */
1238         next = &m->mp_parts;
1239         for (bmp--; bmp >= base; bmp--) {
1240                 part = *bmp;
1241                 *next = part;
1242                 next = &part->mp_next;
1243         }
1244         *next = NULL;
1245
1246         /* free array of pointers */
1247         free((char *) base);
1248 }
1249
1250
1251 /*
1252 ** MESSAGE
1253 */
1254
1255 static int
1256 InitMessage(CT ct)
1257 {
1258         struct k2v *kv;
1259         CI ci = &ct->c_ctinfo;
1260
1261         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1262                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1263                 return NOTOK;
1264         }
1265
1266         /* check for missing subtype */
1267         if (!*ci->ci_subtype)
1268                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1269
1270         /* match subtype */
1271         for (kv = SubMessage; kv->kv_key; kv++)
1272                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1273                         break;
1274         ct->c_subtype = kv->kv_value;
1275
1276         switch (ct->c_subtype) {
1277         case MESSAGE_RFC822:
1278                 break;
1279
1280         case MESSAGE_PARTIAL:
1281                 {
1282                 char **ap, **ep;
1283                 struct partial *p;
1284
1285                 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1286                 adios(EX_OSERR, NULL, "out of memory");
1287                 ct->c_ctparams = (void *) p;
1288
1289                 /*
1290                 ** scan for parameters "id", "number",
1291                 ** and "total"
1292                 */
1293                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1294                         if (!mh_strcasecmp(*ap, "id")) {
1295                                 p->pm_partid = getcpy(*ep);
1296                                 continue;
1297                         }
1298                         if (!mh_strcasecmp(*ap, "number")) {
1299                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1300 invalid_param:
1301                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1302                                         return NOTOK;
1303                                 }
1304                                 continue;
1305                         }
1306                         if (!mh_strcasecmp(*ap, "total")) {
1307                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1308                                                 p->pm_maxno < 1)
1309                                         goto invalid_param;
1310                                 continue;
1311                         }
1312                 }
1313
1314                 if (!p->pm_partid || !p->pm_partno
1315                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1316                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1317                         return NOTOK;
1318                 }
1319                 }
1320                 break;
1321
1322         case MESSAGE_EXTERNAL:
1323                 {
1324                 CT p;
1325                 FILE *fp;
1326
1327                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1328                         advise(ct->c_file, "unable to open for reading");
1329                         return NOTOK;
1330                 }
1331
1332                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1333
1334                 if (!(p = get_content(fp, ct->c_file, 0))) {
1335                         ct->c_fp = NULL;
1336                         return NOTOK;
1337                 }
1338
1339                 p->c_fp = NULL;
1340                 p->c_end = p->c_begin;
1341
1342                 fclose(ct->c_fp);
1343                 ct->c_fp = NULL;
1344
1345                 switch (p->c_type) {
1346                 case CT_MULTIPART:
1347                         break;
1348
1349                 case CT_MESSAGE:
1350                         if (p->c_subtype != MESSAGE_RFC822)
1351                                 break;
1352                         /* else fall... */
1353                 default:
1354                         if (p->c_ctinitfnx)
1355                                 (*p->c_ctinitfnx) (p);
1356                         break;
1357                 }
1358                 }
1359                 break;
1360
1361         default:
1362                 break;
1363         }
1364
1365         return OK;
1366 }
1367
1368
1369 /*
1370 ** APPLICATION
1371 */
1372
1373 static int
1374 InitApplication(CT ct)
1375 {
1376         struct k2v *kv;
1377         CI ci = &ct->c_ctinfo;
1378
1379         /* match subtype */
1380         for (kv = SubApplication; kv->kv_key; kv++)
1381                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1382                         break;
1383         ct->c_subtype = kv->kv_value;
1384
1385         return OK;
1386 }
1387
1388
1389 /*
1390 ** TRANSFER ENCODINGS
1391 */
1392
1393 static int
1394 init_encoding(CT ct, OpenCEFunc openfnx)
1395 {
1396         CE ce;
1397
1398         if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1399                 adios(EX_OSERR, NULL, "out of memory");
1400
1401         ct->c_cefile     = ce;
1402         ct->c_ceopenfnx  = openfnx;
1403         ct->c_ceclosefnx = close_encoding;
1404         ct->c_cesizefnx  = size_encoding;
1405
1406         return OK;
1407 }
1408
1409
1410 void
1411 close_encoding(CT ct)
1412 {
1413         CE ce;
1414
1415         if (!(ce = ct->c_cefile))
1416                 return;
1417
1418         if (ce->ce_fp) {
1419                 fclose(ce->ce_fp);
1420                 ce->ce_fp = NULL;
1421         }
1422 }
1423
1424
1425 static unsigned long
1426 size_encoding(CT ct)
1427 {
1428         int fd;
1429         unsigned long size;
1430         char *file;
1431         CE ce;
1432         struct stat st;
1433
1434         if (!(ce = ct->c_cefile))
1435                 return (ct->c_end - ct->c_begin);
1436
1437         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1438                 return (long) st.st_size;
1439
1440         if (ce->ce_file) {
1441                 if (stat(ce->ce_file, &st) != NOTOK)
1442                         return (long) st.st_size;
1443                 else
1444                         return 0L;
1445         }
1446
1447         if (ct->c_encoding == CE_EXTERNAL)
1448                 return (ct->c_end - ct->c_begin);
1449
1450         file = NULL;
1451         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1452                 return (ct->c_end - ct->c_begin);
1453
1454         if (fstat(fd, &st) != NOTOK)
1455                 size = (long) st.st_size;
1456         else
1457                 size = 0L;
1458
1459         (*ct->c_ceclosefnx) (ct);
1460         return size;
1461 }
1462
1463
1464 /*
1465 ** BASE64
1466 */
1467
1468 static unsigned char b642nib[0x80] = {
1469         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1475         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1476         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1477         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1478         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1479         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1480         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1481         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1482         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1483         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1484         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1485 };
1486
1487
1488 static int
1489 InitBase64(CT ct)
1490 {
1491         return init_encoding(ct, openBase64);
1492 }
1493
1494
1495 static int
1496 openBase64(CT ct, char **file)
1497 {
1498         int bitno, cc;
1499         int fd, len, skip, own_ct_fp = 0;
1500         unsigned long bits;
1501         unsigned char value, *b, *b1, *b2, *b3;
1502         unsigned char *cp, *ep;
1503         char buffer[BUFSIZ];
1504         /* sbeck -- handle suffixes */
1505         CI ci;
1506         CE ce;
1507
1508         b  = (unsigned char *) &bits;
1509         b1 = &b[endian > 0 ? 1 : 2];
1510         b2 = &b[endian > 0 ? 2 : 1];
1511         b3 = &b[endian > 0 ? 3 : 0];
1512
1513         ce = ct->c_cefile;
1514         if (ce->ce_fp) {
1515                 fseek(ce->ce_fp, 0L, SEEK_SET);
1516                 goto ready_to_go;
1517         }
1518
1519         if (ce->ce_file) {
1520                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1521                         content_error(ce->ce_file, ct,
1522                                         "unable to fopen for reading");
1523                         return NOTOK;
1524                 }
1525                 goto ready_to_go;
1526         }
1527
1528         if (*file == NULL) {
1529                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1530                 ce->ce_unlink = 1;
1531         } else {
1532                 ce->ce_file = getcpy(*file);
1533                 ce->ce_unlink = 0;
1534         }
1535
1536         /* sbeck@cise.ufl.edu -- handle suffixes */
1537         ci = &ct->c_ctinfo;
1538         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1539                         invo_name, ci->ci_type, ci->ci_subtype);
1540         cp = context_find(buffer);
1541         if (cp == NULL || *cp == '\0') {
1542                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1543                                 ci->ci_type);
1544                 cp = context_find(buffer);
1545         }
1546         if (cp != NULL && *cp != '\0') {
1547                 if (ce->ce_unlink) {
1548                         /*
1549                         ** Temporary file already exists, so we rename to
1550                         ** version with extension.
1551                         */
1552                         char *file_org = strdup(ce->ce_file);
1553                         ce->ce_file = add(cp, ce->ce_file);
1554                         if (rename(file_org, ce->ce_file)) {
1555                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1556                                                 file_org);
1557                         }
1558                         free(file_org);
1559
1560                 } else {
1561                         ce->ce_file = add(cp, ce->ce_file);
1562                 }
1563         }
1564
1565         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1566                 content_error(ce->ce_file, ct,
1567                                 "unable to fopen for reading/writing");
1568                 return NOTOK;
1569         }
1570
1571         if ((len = ct->c_end - ct->c_begin) < 0)
1572                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1573
1574         if (!ct->c_fp) {
1575                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1576                         content_error(ct->c_file, ct,
1577                                         "unable to open for reading");
1578                         return NOTOK;
1579                 }
1580                 own_ct_fp = 1;
1581         }
1582
1583         bitno = 18;
1584         bits = 0L;
1585         skip = 0;
1586
1587         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1588         while (len > 0) {
1589                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1590                 case NOTOK:
1591                         content_error(ct->c_file, ct, "error reading from");
1592                         goto clean_up;
1593
1594                 case OK:
1595                         content_error(NULL, ct, "premature eof");
1596                         goto clean_up;
1597
1598                 default:
1599                         if (cc > len)
1600                                 cc = len;
1601                         len -= cc;
1602
1603                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1604                                 switch (*cp) {
1605                                 default:
1606                                         if (isspace(*cp))
1607                                                 break;
1608                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1609                                                 if (debugsw) {
1610                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1611                                                 }
1612                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1613                                                 continue;
1614                                         }
1615
1616                                         bits |= value << bitno;
1617 test_end:
1618                                         if ((bitno -= 6) < 0) {
1619                                                 putc((char) *b1, ce->ce_fp);
1620                                                 if (skip < 2) {
1621                                                         putc((char) *b2, ce->ce_fp);
1622                                                         if (skip < 1) {
1623                                                                 putc((char) *b3, ce->ce_fp);
1624                                                         }
1625                                                 }
1626
1627                                                 if (ferror(ce->ce_fp)) {
1628                                                         content_error(ce->ce_file, ct,
1629                                                                                    "error writing to");
1630                                                         goto clean_up;
1631                                                 }
1632                                                 bitno = 18, bits = 0L, skip = 0;
1633                                         }
1634                                         break;
1635
1636                                 case '=':
1637                                         if (++skip > 3)
1638                                                 goto self_delimiting;
1639                                         goto test_end;
1640                                 }
1641                         }
1642                 }
1643         }
1644
1645         if (bitno != 18) {
1646                 if (debugsw)
1647                         fprintf(stderr, "premature ending (bitno %d)\n",
1648                                         bitno);
1649
1650                 content_error(NULL, ct, "invalid BASE64 encoding");
1651                 goto clean_up;
1652         }
1653
1654 self_delimiting:
1655         fseek(ct->c_fp, 0L, SEEK_SET);
1656
1657         if (fflush(ce->ce_fp)) {
1658                 content_error(ce->ce_file, ct, "error writing to");
1659                 goto clean_up;
1660         }
1661
1662         fseek(ce->ce_fp, 0L, SEEK_SET);
1663
1664 ready_to_go:
1665         *file = ce->ce_file;
1666         if (own_ct_fp) {
1667                 fclose(ct->c_fp);
1668                 ct->c_fp = NULL;
1669         }
1670         return fileno(ce->ce_fp);
1671
1672 clean_up:
1673         free_encoding(ct, 0);
1674         if (own_ct_fp) {
1675                 fclose(ct->c_fp);
1676                 ct->c_fp = NULL;
1677         }
1678         return NOTOK;
1679 }
1680
1681
1682 /*
1683 ** QUOTED PRINTABLE
1684 */
1685
1686 static char hex2nib[0x80] = {
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1694         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1700         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1702         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1703 };
1704
1705
1706 static int
1707 InitQuoted(CT ct)
1708 {
1709         return init_encoding(ct, openQuoted);
1710 }
1711
1712
1713 static int
1714 openQuoted(CT ct, char **file)
1715 {
1716         int cc, len, quoted, own_ct_fp = 0;
1717         unsigned char *cp, *ep;
1718         char buffer[BUFSIZ];
1719         unsigned char mask = 0;
1720         CE ce;
1721         /* sbeck -- handle suffixes */
1722         CI ci;
1723
1724         ce = ct->c_cefile;
1725         if (ce->ce_fp) {
1726                 fseek(ce->ce_fp, 0L, SEEK_SET);
1727                 goto ready_to_go;
1728         }
1729
1730         if (ce->ce_file) {
1731                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1732                         content_error(ce->ce_file, ct,
1733                                         "unable to fopen for reading");
1734                         return NOTOK;
1735                 }
1736                 goto ready_to_go;
1737         }
1738
1739         if (*file == NULL) {
1740                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1741                 ce->ce_unlink = 1;
1742         } else {
1743                 ce->ce_file = getcpy(*file);
1744                 ce->ce_unlink = 0;
1745         }
1746
1747         /* sbeck@cise.ufl.edu -- handle suffixes */
1748         ci = &ct->c_ctinfo;
1749         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1750                         invo_name, ci->ci_type, ci->ci_subtype);
1751         cp = context_find(buffer);
1752         if (cp == NULL || *cp == '\0') {
1753                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1754                                 ci->ci_type);
1755                 cp = context_find(buffer);
1756         }
1757         if (cp != NULL && *cp != '\0') {
1758                 if (ce->ce_unlink) {
1759                         /*
1760                         ** Temporary file already exists, so we rename to
1761                         ** version with extension.
1762                         */
1763                         char *file_org = strdup(ce->ce_file);
1764                         ce->ce_file = add(cp, ce->ce_file);
1765                         if (rename(file_org, ce->ce_file)) {
1766                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1767                                                 file_org);
1768                         }
1769                         free(file_org);
1770
1771                 } else {
1772                         ce->ce_file = add(cp, ce->ce_file);
1773                 }
1774         }
1775
1776         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1777                 content_error(ce->ce_file, ct,
1778                                 "unable to fopen for reading/writing");
1779                 return NOTOK;
1780         }
1781
1782         if ((len = ct->c_end - ct->c_begin) < 0)
1783                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1784
1785         if (!ct->c_fp) {
1786                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1787                         content_error(ct->c_file, ct,
1788                                         "unable to open for reading");
1789                         return NOTOK;
1790                 }
1791                 own_ct_fp = 1;
1792         }
1793
1794         quoted = 0;
1795
1796         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1797         while (len > 0) {
1798                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1799                         content_error(NULL, ct, "premature eof");
1800                         goto clean_up;
1801                 }
1802
1803                 if ((cc = strlen(buffer)) > len)
1804                         cc = len;
1805                 len -= cc;
1806
1807                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1808                         if (!isspace(*ep))
1809                                 break;
1810                 *++ep = '\n', ep++;
1811
1812                 for (; cp < ep; cp++) {
1813                         if (quoted > 0) {
1814                                 /* in an escape sequence */
1815                                 if (quoted == 1) {
1816                                         /* at byte 1 of an escape sequence */
1817                                         mask = hex2nib[*cp & 0x7f];
1818                                         /* next is byte 2 */
1819                                         quoted = 2;
1820                                 } else {
1821                                         /* at byte 2 of an escape sequence */
1822                                         mask <<= 4;
1823                                         mask |= hex2nib[*cp & 0x7f];
1824                                         putc(mask, ce->ce_fp);
1825                                         if (ferror(ce->ce_fp)) {
1826                                                 content_error(ce->ce_file, ct, "error writing to");
1827                                                 goto clean_up;
1828                                         }
1829                                         /*
1830                                         ** finished escape sequence; next may
1831                                         ** be literal or a new escape sequence
1832                                         */
1833                                         quoted = 0;
1834                                 }
1835                                 /* on to next byte */
1836                                 continue;
1837                         }
1838
1839                         /* not in an escape sequence */
1840                         if (*cp == '=') {
1841                                 /*
1842                                 ** starting an escape sequence,
1843                                 ** or invalid '='?
1844                                 */
1845                                 if (cp + 1 < ep && cp[1] == '\n') {
1846                                         /* "=\n" soft line break, eat the \n */
1847                                         cp++;
1848                                         continue;
1849                                 }
1850                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1851                                         /*
1852                                         ** We don't have 2 bytes left,
1853                                         ** so this is an invalid escape
1854                                         ** sequence; just show the raw bytes
1855                                         ** (below).
1856                                         */
1857                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1858                                         /*
1859                                         ** Next 2 bytes are hex digits,
1860                                         ** making this a valid escape
1861                                         ** sequence; let's decode it (above).
1862                                         */
1863                                         quoted = 1;
1864                                         continue;
1865                                 } else {
1866                                         /*
1867                                         ** One or both of the next 2 is
1868                                         ** out of range, making this an
1869                                         ** invalid escape sequence; just
1870                                         ** show the raw bytes (below).
1871                                         */
1872                                 }
1873                         }
1874
1875                         /* Just show the raw byte. */
1876                         putc(*cp, ce->ce_fp);
1877                         if (ferror(ce->ce_fp)) {
1878                                 content_error(ce->ce_file, ct,
1879                                                 "error writing to");
1880                                 goto clean_up;
1881                         }
1882                 }
1883         }
1884         if (quoted) {
1885                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1886                 goto clean_up;
1887         }
1888
1889         fseek(ct->c_fp, 0L, SEEK_SET);
1890
1891         if (fflush(ce->ce_fp)) {
1892                 content_error(ce->ce_file, ct, "error writing to");
1893                 goto clean_up;
1894         }
1895
1896         fseek(ce->ce_fp, 0L, SEEK_SET);
1897
1898 ready_to_go:
1899         *file = ce->ce_file;
1900         if (own_ct_fp) {
1901                 fclose(ct->c_fp);
1902                 ct->c_fp = NULL;
1903         }
1904         return fileno(ce->ce_fp);
1905
1906 clean_up:
1907         free_encoding(ct, 0);
1908         if (own_ct_fp) {
1909                 fclose(ct->c_fp);
1910                 ct->c_fp = NULL;
1911         }
1912         return NOTOK;
1913 }
1914
1915
1916 /*
1917 ** 7BIT
1918 */
1919
1920 static int
1921 Init7Bit(CT ct)
1922 {
1923         if (init_encoding(ct, open7Bit) == NOTOK)
1924                 return NOTOK;
1925
1926         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1927         return OK;
1928 }
1929
1930
1931 int
1932 open7Bit(CT ct, char **file)
1933 {
1934         int cc, fd, len, own_ct_fp = 0;
1935         char buffer[BUFSIZ];
1936         /* sbeck -- handle suffixes */
1937         char *cp;
1938         CI ci;
1939         CE ce;
1940
1941         ce = ct->c_cefile;
1942         if (ce->ce_fp) {
1943                 fseek(ce->ce_fp, 0L, SEEK_SET);
1944                 goto ready_to_go;
1945         }
1946
1947         if (ce->ce_file) {
1948                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1949                         content_error(ce->ce_file, ct,
1950                                         "unable to fopen for reading");
1951                         return NOTOK;
1952                 }
1953                 goto ready_to_go;
1954         }
1955
1956         if (*file == NULL) {
1957                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1958                 ce->ce_unlink = 1;
1959         } else {
1960                 ce->ce_file = getcpy(*file);
1961                 ce->ce_unlink = 0;
1962         }
1963
1964         /* sbeck@cise.ufl.edu -- handle suffixes */
1965         ci = &ct->c_ctinfo;
1966         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1967                         invo_name, ci->ci_type, ci->ci_subtype);
1968         cp = context_find(buffer);
1969         if (cp == NULL || *cp == '\0') {
1970                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1971                                 ci->ci_type);
1972                 cp = context_find(buffer);
1973         }
1974         if (cp != NULL && *cp != '\0') {
1975                 if (ce->ce_unlink) {
1976                         /*
1977                         ** Temporary file already exists, so we rename to
1978                         ** version with extension.
1979                         */
1980                         char *file_org = strdup(ce->ce_file);
1981                         ce->ce_file = add(cp, ce->ce_file);
1982                         if (rename(file_org, ce->ce_file)) {
1983                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1984                                                 file_org);
1985                         }
1986                         free(file_org);
1987
1988                 } else {
1989                         ce->ce_file = add(cp, ce->ce_file);
1990                 }
1991         }
1992
1993         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1994                 content_error(ce->ce_file, ct,
1995                                 "unable to fopen for reading/writing");
1996                 return NOTOK;
1997         }
1998
1999         if (ct->c_type == CT_MULTIPART) {
2000                 char **ap, **ep;
2001                 CI ci = &ct->c_ctinfo;
2002
2003                 len = 0;
2004                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2005                                 ci->ci_subtype);
2006                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2007                                 strlen(ci->ci_subtype);
2008                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2009                         putc(';', ce->ce_fp);
2010                         len++;
2011
2012                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2013                                         *ap, *ep);
2014
2015                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2016                                 fputs("\n\t", ce->ce_fp);
2017                                 len = 8;
2018                         } else {
2019                                 putc(' ', ce->ce_fp);
2020                                 len++;
2021                         }
2022                         fprintf(ce->ce_fp, "%s", buffer);
2023                         len += cc;
2024                 }
2025
2026                 if (ci->ci_comment) {
2027                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2028                                                 >= CPERLIN) {
2029                                 fputs("\n\t", ce->ce_fp);
2030                                 len = 8;
2031                         } else {
2032                                 putc(' ', ce->ce_fp);
2033                                 len++;
2034                         }
2035                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2036                         len += cc;
2037                 }
2038                 fprintf(ce->ce_fp, "\n");
2039                 if (ct->c_id)
2040                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2041                 if (ct->c_descr)
2042                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2043                 if (ct->c_dispo)
2044                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2045                 fprintf(ce->ce_fp, "\n");
2046         }
2047
2048         if ((len = ct->c_end - ct->c_begin) < 0)
2049                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2050
2051         if (!ct->c_fp) {
2052                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2053                         content_error(ct->c_file, ct,
2054                                         "unable to open for reading");
2055                         return NOTOK;
2056                 }
2057                 own_ct_fp = 1;
2058         }
2059
2060         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2061         while (len > 0)
2062                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2063                 case NOTOK:
2064                         content_error(ct->c_file, ct, "error reading from");
2065                         goto clean_up;
2066
2067                 case OK:
2068                         content_error(NULL, ct, "premature eof");
2069                         goto clean_up;
2070
2071                 default:
2072                         if (cc > len)
2073                                 cc = len;
2074                         len -= cc;
2075
2076                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2077                         if (ferror(ce->ce_fp)) {
2078                                 content_error(ce->ce_file, ct,
2079                                                 "error writing to");
2080                                 goto clean_up;
2081                         }
2082                 }
2083
2084         fseek(ct->c_fp, 0L, SEEK_SET);
2085
2086         if (fflush(ce->ce_fp)) {
2087                 content_error(ce->ce_file, ct, "error writing to");
2088                 goto clean_up;
2089         }
2090
2091         fseek(ce->ce_fp, 0L, SEEK_SET);
2092
2093 ready_to_go:
2094         *file = ce->ce_file;
2095         if (own_ct_fp) {
2096                 fclose(ct->c_fp);
2097                 ct->c_fp = NULL;
2098         }
2099         return fileno(ce->ce_fp);
2100
2101 clean_up:
2102         free_encoding(ct, 0);
2103         if (own_ct_fp) {
2104                 fclose(ct->c_fp);
2105                 ct->c_fp = NULL;
2106         }
2107         return NOTOK;
2108 }