add irc channel to README
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         char *buf;
238         CT ct;
239         HF hp;
240
241         /* allocate the content structure */
242         ct = mh_xcalloc(1, sizeof(*ct));
243
244         ct->c_fp = in;
245         ct->c_file = mh_xstrdup(file);
246         ct->c_begin = ftell(ct->c_fp) + 1;
247
248         /*
249         ** Parse the header fields for this
250         ** content into a linked list.
251         */
252         for (compnum = 1, state = FLD2;;) {
253                 switch (state = m_getfld2(state, &f, in)) {
254                 case LENERR2:
255                         state = FLD2;
256                         /* FALL */
257                 case FLD2:
258                         if (compnum == 1) {
259                                 ct->crlf = f.value[f.valuelen-2] == '\r';
260                         }
261                         compnum++;
262
263                         /* decode rfc2047 */
264                         buf = mh_xcalloc(sizeof(char *), f.valuelen);
265                         if (!decode_rfc2047(f.value, buf, f.valuelen)) {
266                                 mh_free0(&buf);
267                                 buf = mh_xstrdup(f.value);
268                         }
269
270                         /* add the header data to the list */
271                         add_header(ct, mh_xstrdup(f.name), buf);
272
273                         buf = NULL;
274
275                         ct->c_begin = ftell(in) + 1;
276                         continue;
277
278                 case BODY2:
279                         ct->c_begin = ftell(in) - strlen(f.value);
280                         break;
281
282                 case FILEEOF2:
283                         ct->c_begin = ftell(in);
284                         break;
285
286                 case FMTERR2:
287                         advise(NULL, "message format error in component #%d", compnum);
288                         state = FLD2;
289                         continue;
290
291                 case IOERR2:
292                         adios(EX_IOERR, "m_getfld2", "io error");
293
294                 default:
295                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
296                 }
297                 break;
298         }
299
300         /*
301         ** Read the content headers.  We will parse the
302         ** MIME related header fields into their various
303         ** structures and set internal flags related to
304         ** content type/subtype, etc.
305         */
306
307         hp = ct->c_first_hf;  /* start at first header field */
308         while (hp) {
309                 /* Get MIME-Version field */
310                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
311                         int ucmp;
312                         char c;
313                         unsigned char *cp, *dp;
314
315                         if (ct->c_vrsn) {
316                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
317                                 goto next_header;
318                         }
319                         ct->c_vrsn = mh_xstrdup(hp->value);
320
321                         /* Now, cleanup this field */
322                         cp = ct->c_vrsn;
323
324                         while (isspace(*cp))
325                                 cp++;
326                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
327                                 *dp++ = ' ';
328                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
329                                 if (!isspace(*dp))
330                                         break;
331                         *++dp = '\0';
332                         if (debugsw)
333                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
334
335                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
336                                 goto out;
337
338                         for (dp = cp; istoken(*dp); dp++)
339                                 continue;
340                         c = *dp;
341                         *dp = '\0';
342                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
343                         *dp = c;
344                         if (!ucmp) {
345                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
346                         }
347
348                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349                         /* Get Content-Type field */
350                         struct str2init *s2i;
351                         CI ci = &ct->c_ctinfo;
352
353                         /* Check if we've already seen a Content-Type header */
354                         if (ct->c_ctline) {
355                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
356                                 goto next_header;
357                         }
358
359                         /* Parse the Content-Type field */
360                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
361                                 goto out;
362
363                         /*
364                         ** Set the Init function and the internal
365                         ** flag for this content type.
366                         */
367                         for (s2i = str2cts; s2i->si_key; s2i++)
368                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
369                                         break;
370                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
371                                 s2i++;
372                         ct->c_type = s2i->si_val;
373                         ct->c_ctinitfnx = s2i->si_init;
374
375                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376                         /* Get Content-Transfer-Encoding field */
377                         char c;
378                         unsigned char *cp, *dp;
379                         struct str2init *s2i;
380
381                         /*
382                         ** Check if we've already seen the
383                         ** Content-Transfer-Encoding field
384                         */
385                         if (ct->c_celine) {
386                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
387                                 goto next_header;
388                         }
389
390                         /* get copy of this field */
391                         ct->c_celine = cp = mh_xstrdup(hp->value);
392
393                         while (isspace(*cp))
394                                 cp++;
395                         for (dp = cp; istoken(*dp); dp++)
396                                 continue;
397                         c = *dp;
398                         *dp = '\0';
399
400                         /*
401                         ** Find the internal flag and Init function
402                         ** for this transfer encoding.
403                         */
404                         for (s2i = str2ces; s2i->si_key; s2i++)
405                                 if (!mh_strcasecmp(cp, s2i->si_key))
406                                         break;
407                         if (!s2i->si_key && !uprf(cp, "X-"))
408                                 s2i++;
409                         *dp = c;
410                         ct->c_encoding = s2i->si_val;
411
412                         /* Call the Init function for this encoding */
413                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
414                                 goto out;
415
416                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417                         /* Get Content-ID field */
418                         ct->c_id = add(hp->value, ct->c_id);
419
420                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421                         /* Get Content-Description field */
422                         ct->c_descr = add(hp->value, ct->c_descr);
423
424                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425                         /* Get Content-Disposition field */
426                         ct->c_dispo = add(hp->value, ct->c_dispo);
427                 }
428
429 next_header:
430                 hp = hp->next;  /* next header field */
431         }
432
433         /*
434         ** Check if we saw a Content-Type field.
435         ** If not, then assign a default value for
436         ** it, and the Init function.
437         */
438         if (!ct->c_ctline) {
439                 /*
440                 ** If we are inside a multipart/digest message,
441                 ** so default type is message/rfc822
442                 */
443                 if (toplevel < 0) {
444                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
445                                 goto out;
446                         ct->c_type = CT_MESSAGE;
447                         ct->c_ctinitfnx = InitMessage;
448                 } else {
449                         /*
450                         ** Else default type is text/plain
451                         */
452                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
453                                 goto out;
454                         ct->c_type = CT_TEXT;
455                         ct->c_ctinitfnx = InitText;
456                 }
457         }
458
459         /* Use default Transfer-Encoding, if necessary */
460         if (!ct->c_celine) {
461                 ct->c_encoding = CE_7BIT;
462                 Init7Bit(ct);
463         }
464
465         return ct;
466
467 out:
468         free_content(ct);
469         return NULL;
470 }
471
472
473 /*
474 ** small routine to add header field to list
475 */
476
477 int
478 add_header(CT ct, char *name, char *value)
479 {
480         HF hp;
481
482         /* allocate header field structure */
483         hp = mh_xcalloc(1, sizeof(*hp));
484
485         /* link data into header structure */
486         hp->name = name;
487         hp->value = value;
488         hp->next = NULL;
489
490         /* link header structure into the list */
491         if (ct->c_first_hf == NULL) {
492                 ct->c_first_hf = hp;  /* this is the first */
493                 ct->c_last_hf = hp;
494         } else {
495                 ct->c_last_hf->next = hp;  /* add it to the end */
496                 ct->c_last_hf = hp;
497         }
498
499         return 0;
500 }
501
502
503 /*
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =.  If not, insert both name and value, just after
506 ** first semicolon, if any.  Note that name should not contain a
507 ** trailing =.  And quotes will be added around the value.  Typical
508 ** usage:  make sure that a Content-Disposition header contains
509 ** filename="foo".  If it doesn't and value does, use value from
510 ** that.
511 */
512 static char *
513 incl_name_value(unsigned char *buf, char *name, char *value) {
514         char *newbuf = buf;
515
516         /* Assume that name is non-null. */
517         if (buf && value) {
518                 char *name_plus_equal = concat(name, "=", NULL);
519
520                 if (!strstr(buf, name_plus_equal)) {
521                         char *insertion;
522                         unsigned char *cp;
523                         char *prefix, *suffix;
524
525                         /* Trim trailing space, esp. newline. */
526                         for (cp = &buf[strlen(buf) - 1];
527                                          cp >= buf && isspace(*cp); --cp) {
528                                 *cp = '\0';
529                         }
530
531                         insertion = concat("; ", name, "=", "\"", value, "\"",
532                                         NULL);
533
534                         /*
535                         ** Insert at first semicolon, if any.
536                         ** If none, append to end.
537                         */
538                         prefix = mh_xstrdup(buf);
539                         if ((cp = strchr(prefix, ';'))) {
540                                 suffix = concat(cp, NULL);
541                                 *cp = '\0';
542                                 newbuf = concat(prefix, insertion, suffix,
543                                                 "\n", NULL);
544                                 mh_free0(&suffix);
545                         } else {
546                                 /* Append to end. */
547                                 newbuf = concat(buf, insertion, "\n", NULL);
548                         }
549
550                         mh_free0(&prefix);
551                         mh_free0(&insertion);
552                         mh_free0(&buf);
553                 }
554
555                 mh_free0(&name_plus_equal);
556         }
557
558         return newbuf;
559 }
560
561 /*
562 ** Extract just name_suffix="foo", if any, from value.  If there isn't
563 ** one, return the entire value.  Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
565 */
566 char *
567 extract_name_value(char *name_suffix, char *value) {
568         char *extracted_name_value;
569         char *name_suffix_plus_quote;
570         char *name_suffix_equals;
571         char *cp;
572
573         if (!value) {
574                 return value;
575         }
576         extracted_name_value = value;
577         name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
578         name_suffix_equals = strstr(value, name_suffix_plus_quote);
579         mh_free0(&name_suffix_plus_quote);
580         if (name_suffix_equals) {
581                 char *name_suffix_begin;
582
583                 /* Find first \". */
584                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
585                         ;
586                 name_suffix_begin = ++cp;
587                 /* Find second \". */
588                 for (; *cp != '"'; ++cp)
589                         ;
590
591                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
592                 memcpy(extracted_name_value, name_suffix_begin,
593                                 cp - name_suffix_begin);
594                 extracted_name_value[cp - name_suffix_begin] = '\0';
595         }
596
597         return extracted_name_value;
598 }
599
600 /*
601 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
602 ** directives.  Fills in the information of the CTinfo structure.
603 */
604 int
605 get_ctinfo(unsigned char *cp, CT ct, int magic)
606 {
607         int i;
608         unsigned char *dp;
609         char **ap, **ep;
610         char c;
611         CI ci;
612
613         ci = &ct->c_ctinfo;
614         i = strlen(invo_name) + 2;
615
616         /* store copy of Content-Type line */
617         cp = ct->c_ctline = mh_xstrdup(cp);
618
619         while (isspace(*cp))  /* trim leading spaces */
620                 cp++;
621
622         /* change newlines to spaces */
623         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
624                 *dp++ = ' ';
625
626         /* trim trailing spaces */
627         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
628                 if (!isspace(*dp))
629                         break;
630         *++dp = '\0';
631
632         if (debugsw)
633                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
634
635         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
636                 return NOTOK;
637
638         for (dp = cp; istoken(*dp); dp++)
639                 continue;
640         c = *dp, *dp = '\0';
641         ci->ci_type = mh_xstrdup(cp);  /* store content type */
642         *dp = c, cp = dp;
643
644         if (!*ci->ci_type) {
645                 advise(NULL, "invalid %s: field in message %s (empty type)",
646                                 TYPE_FIELD, ct->c_file);
647                 return NOTOK;
648         }
649
650         /* down case the content type string */
651         for (dp = ci->ci_type; *dp; dp++)
652                 if (isalpha(*dp) && isupper(*dp))
653                         *dp = tolower(*dp);
654
655         while (isspace(*cp))
656                 cp++;
657
658         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
659                 return NOTOK;
660
661         if (*cp != '/') {
662                 if (!magic)
663                         ci->ci_subtype = mh_xstrdup("");
664                 goto magic_skip;
665         }
666
667         cp++;
668         while (isspace(*cp))
669                 cp++;
670
671         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
672                 return NOTOK;
673
674         for (dp = cp; istoken(*dp); dp++)
675                 continue;
676         c = *dp, *dp = '\0';
677         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
678         *dp = c, cp = dp;
679
680         if (!*ci->ci_subtype) {
681                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
682                 return NOTOK;
683         }
684
685         /* down case the content subtype string */
686         for (dp = ci->ci_subtype; *dp; dp++)
687                 if (isalpha(*dp) && isupper(*dp))
688                         *dp = tolower(*dp);
689
690 magic_skip:
691         while (isspace(*cp))
692                 cp++;
693
694         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
695                 return NOTOK;
696
697         /*
698         ** Parse attribute/value pairs given with Content-Type
699         */
700         ep = (ap = ci->ci_attrs) + NPARMS;
701         while (*cp == ';') {
702                 char *vp;
703                 unsigned char *up;
704
705                 if (ap >= ep) {
706                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
707                         return NOTOK;
708                 }
709
710                 cp++;
711                 while (isspace(*cp))
712                         cp++;
713
714                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
715                         return NOTOK;
716
717                 if (*cp == 0) {
718                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
719                         return OK;
720                 }
721
722                 /* down case the attribute name */
723                 for (dp = cp; istoken(*dp); dp++)
724                         if (isalpha(*dp) && isupper(*dp))
725                                 *dp = tolower(*dp);
726
727                 for (up = dp; isspace(*dp);)
728                         dp++;
729                 if (dp == cp || *dp != '=') {
730                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
731                         return NOTOK;
732                 }
733
734                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
735                 *vp = '\0';
736                 for (dp++; isspace(*dp);)
737                         dp++;
738
739                 /* now add the attribute value */
740                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
741
742                 if (*dp == '"') {
743                         for (cp = ++dp, dp = vp;;) {
744                                 switch (c = *cp++) {
745                                 case '\0':
746 bad_quote:
747                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
748                                         return NOTOK;
749
750                                 case '\\':
751                                         *dp++ = c;
752                                         if ((c = *cp++) == '\0')
753                                                 goto bad_quote;
754                                         /* else fall... */
755
756                                 default:
757                                         *dp++ = c;
758                                         continue;
759
760                                 case '"':
761                                         *dp = '\0';
762                                         break;
763                                 }
764                                 break;
765                         }
766                 } else {
767                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
768                                 continue;
769                         *dp = '\0';
770                 }
771                 if (!*vp) {
772                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
773                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
774                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
775                         continue;
776                 }
777                 ap++;
778
779                 while (isspace(*cp))
780                         cp++;
781
782                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
783                         return NOTOK;
784         }
785
786         /*
787         ** Get any <Content-Id> given in buffer
788         */
789         if (magic && *cp == '<') {
790                 if (ct->c_id) {
791                         mh_free0(&(ct->c_id));
792                 }
793                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
794                         advise(NULL, "invalid ID in message %s", ct->c_file);
795                         return NOTOK;
796                 }
797                 c = *dp;
798                 *dp = '\0';
799                 if (*ct->c_id)
800                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
801                 else
802                         ct->c_id = NULL;
803                 *dp++ = c;
804                 cp = dp;
805
806                 while (isspace(*cp))
807                         cp++;
808         }
809
810         /*
811         ** Get any [Content-Description] given in buffer.
812         */
813         if (magic && *cp == '[') {
814                 ct->c_descr = ++cp;
815                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
816                         if (*dp == ']')
817                                 break;
818                 if (dp < cp) {
819                         advise(NULL, "invalid description in message %s",
820                                         ct->c_file);
821                         ct->c_descr = NULL;
822                         return NOTOK;
823                 }
824
825                 c = *dp;
826                 *dp = '\0';
827                 if (*ct->c_descr)
828                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
829                 else
830                         ct->c_descr = NULL;
831                 *dp++ = c;
832                 cp = dp;
833
834                 while (isspace(*cp))
835                         cp++;
836         }
837
838         /*
839         ** Get any {Content-Disposition} given in buffer.
840         */
841         if (magic && *cp == '{') {
842                 ct->c_dispo = ++cp;
843                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
844                         if (*dp == '}')
845                                 break;
846                 if (dp < cp) {
847                         advise(NULL, "invalid disposition in message %s",
848                                         ct->c_file);
849                         ct->c_dispo = NULL;
850                         return NOTOK;
851                 }
852
853                 c = *dp;
854                 *dp = '\0';
855                 if (*ct->c_dispo)
856                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
857                 else
858                         ct->c_dispo = NULL;
859                 *dp++ = c;
860                 cp = dp;
861
862                 while (isspace(*cp))
863                         cp++;
864         }
865
866         /*
867         ** Check if anything is left over
868         */
869         if (*cp) {
870                 if (magic) {
871                         ci->ci_magic = mh_xstrdup(cp);
872
873                         /*
874                         ** If there is a Content-Disposition header and
875                         ** it doesn't have a *filename=, extract it from
876                         ** the magic contents.  The mhbasename call skips
877                         ** any leading directory components.
878                         */
879                         if (ct->c_dispo)
880                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
881                         } else
882                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
883         }
884
885         return OK;
886 }
887
888
889 static int
890 get_comment(CT ct, unsigned char **ap, int istype)
891 {
892         int i;
893         char *bp;
894         unsigned char *cp;
895         char c, buffer[BUFSIZ], *dp;
896         CI ci;
897
898         ci = &ct->c_ctinfo;
899         cp = *ap;
900         bp = buffer;
901         cp++;
902
903         for (i = 0;;) {
904                 switch (c = *cp++) {
905                 case '\0':
906 invalid:
907                 advise(NULL, "invalid comment in message %s's %s: field",
908                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
909                 return NOTOK;
910
911                 case '\\':
912                         *bp++ = c;
913                         if ((c = *cp++) == '\0')
914                                 goto invalid;
915                         *bp++ = c;
916                         continue;
917
918                 case '(':
919                         i++;
920                         /* and fall... */
921                 default:
922                         *bp++ = c;
923                         continue;
924
925                 case ')':
926                         if (--i < 0)
927                                 break;
928                         *bp++ = c;
929                         continue;
930                 }
931                 break;
932         }
933         *bp = '\0';
934
935         if (istype) {
936                 if ((dp = ci->ci_comment)) {
937                         ci->ci_comment = concat(dp, " ", buffer, NULL);
938                         mh_free0(&dp);
939                 } else {
940                         ci->ci_comment = mh_xstrdup(buffer);
941                 }
942         }
943
944         while (isspace(*cp))
945                 cp++;
946
947         *ap = cp;
948         return OK;
949 }
950
951
952 /*
953 ** CONTENTS
954 **
955 ** Handles content types audio, image, and video.
956 ** There's not much to do right here.
957 */
958
959 static int
960 InitGeneric(CT ct)
961 {
962         return OK;  /* not much to do here */
963 }
964
965
966 /*
967 ** TEXT
968 */
969
970 static int
971 InitText(CT ct)
972 {
973         char **ap, **ep;
974         struct k2v *kv;
975         struct text *t;
976         CI ci = &ct->c_ctinfo;
977
978         /* check for missing subtype */
979         if (!*ci->ci_subtype)
980                 ci->ci_subtype = add("plain", ci->ci_subtype);
981
982         /* match subtype */
983         for (kv = SubText; kv->kv_key; kv++)
984                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
985                         break;
986         ct->c_subtype = kv->kv_value;
987
988         /* allocate text character set structure */
989         t = mh_xcalloc(1, sizeof(*t));
990         ct->c_ctparams = (void *) t;
991
992         /* scan for charset parameter */
993         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
994                 if (!mh_strcasecmp(*ap, "charset"))
995                         break;
996
997         /* check if content specified a character set */
998         if (*ap) {
999                 /* store its name */
1000                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
1001                 /* match character set or set to CHARSET_UNKNOWN */
1002                 for (kv = Charset; kv->kv_key; kv++) {
1003                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
1004                                 break;
1005                         }
1006                 }
1007                 t->tx_charset = kv->kv_value;
1008         } else {
1009                 t->tx_charset = CHARSET_UNSPECIFIED;
1010         }
1011
1012         return OK;
1013 }
1014
1015
1016 /*
1017 ** MULTIPART
1018 */
1019
1020 static int
1021 InitMultiPart(CT ct)
1022 {
1023         int inout;
1024         long last, pos;
1025         unsigned char *cp, *dp;
1026         char **ap, **ep;
1027         char *bp, buffer[BUFSIZ];
1028         struct multipart *m;
1029         struct k2v *kv;
1030         struct part *part, **next;
1031         CI ci = &ct->c_ctinfo;
1032         CT p;
1033         FILE *fp;
1034
1035         /*
1036         ** The encoding for multipart messages must be either
1037         ** 7bit, 8bit, or binary (per RFC2045).
1038         */
1039         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1040                 && ct->c_encoding != CE_BINARY) {
1041                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1042                 ct->c_encoding = CE_7BIT;
1043         }
1044
1045         /* match subtype */
1046         for (kv = SubMultiPart; kv->kv_key; kv++)
1047                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1048                         break;
1049         ct->c_subtype = kv->kv_value;
1050
1051         /*
1052         ** Check for "boundary" parameter, which is
1053         ** required for multipart messages.
1054         */
1055         bp = 0;
1056         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1057                 if (!mh_strcasecmp(*ap, "boundary")) {
1058                         bp = *ep;
1059                         break;
1060                 }
1061         }
1062
1063         /* complain if boundary parameter is missing */
1064         if (!*ap) {
1065                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1066                 return NOTOK;
1067         }
1068
1069         /* allocate primary structure for multipart info */
1070         m = mh_xcalloc(1, sizeof(*m));
1071         ct->c_ctparams = (void *) m;
1072
1073         /* check if boundary parameter contains only whitespace characters */
1074         for (cp = bp; isspace(*cp); cp++)
1075                 continue;
1076         if (!*cp) {
1077                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1078                 return NOTOK;
1079         }
1080
1081         /* remove trailing whitespace from boundary parameter */
1082         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1083                 if (!isspace(*dp))
1084                         break;
1085         *++dp = '\0';
1086
1087         /* record boundary separators */
1088         if (!ct->crlf) {
1089                 m->mp_start = concat(bp, "\n", NULL);
1090                 m->mp_stop = concat(bp, "--\n", NULL);
1091         } else {
1092                 m->mp_start = concat(bp, "\r\n", NULL);
1093                 m->mp_stop = concat(bp, "--\r\n", NULL);
1094         }
1095
1096
1097         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1098                 advise(ct->c_file, "unable to open for reading");
1099                 return NOTOK;
1100         }
1101
1102         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1103         last = ct->c_end;
1104         next = &m->mp_parts;
1105         part = NULL;
1106         inout = 1;
1107
1108         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1109                 if (pos > last)
1110                         break;
1111
1112                 pos += strlen(buffer);
1113                 if (buffer[0] != '-' || buffer[1] != '-')
1114                         continue;
1115                 if (inout) {
1116                         if (strcmp(buffer + 2, m->mp_start)!=0)
1117                                 continue;
1118 next_part:
1119                         part = mh_xcalloc(1, sizeof(*part));
1120                         *next = part;
1121                         next = &part->mp_next;
1122
1123                         if (!(p = get_content(fp, ct->c_file,
1124                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1125                                 ct->c_fp = NULL;
1126                                 return NOTOK;
1127                         }
1128                         p->c_fp = NULL;
1129                         part->mp_part = p;
1130                         pos = p->c_begin;
1131                         fseek(fp, pos, SEEK_SET);
1132                         inout = 0;
1133                 } else {
1134                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1135                                 inout = 1;
1136 end_part:
1137                                 p = part->mp_part;
1138                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1139                                 if (p->c_end < p->c_begin)
1140                                         p->c_begin = p->c_end;
1141                                 if (inout)
1142                                         goto next_part;
1143                                 goto last_part;
1144                         } else {
1145                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1146                                         goto end_part;
1147                         }
1148                 }
1149         }
1150
1151         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1152         if (!inout && part) {
1153                 p = part->mp_part;
1154                 p->c_end = ct->c_end;
1155
1156                 if (p->c_begin >= p->c_end) {
1157                         for (next = &m->mp_parts; *next != part;
1158                                 next = &((*next)->mp_next))
1159                                 continue;
1160                         *next = NULL;
1161                         free_content(p);
1162                         mh_free0(&part);
1163                 }
1164         }
1165
1166 last_part:
1167         /* reverse the order of the parts for multipart/alternative */
1168         if (ct->c_subtype == MULTI_ALTERNATE)
1169                 reverse_parts(ct);
1170
1171         /*
1172         ** label all subparts with part number, and
1173         ** then initialize the content of the subpart.
1174         */
1175         {
1176                 int partnum;
1177                 char *pp;
1178                 char partnam[BUFSIZ];
1179
1180                 if (ct->c_partno) {
1181                         snprintf(partnam, sizeof(partnam), "%s.",
1182                                         ct->c_partno);
1183                         pp = partnam + strlen(partnam);
1184                 } else {
1185                         pp = partnam;
1186                 }
1187
1188                 for (part = m->mp_parts, partnum = 1; part;
1189                         part = part->mp_next, partnum++) {
1190                         p = part->mp_part;
1191
1192                         sprintf(pp, "%d", partnum);
1193                         p->c_partno = mh_xstrdup(partnam);
1194
1195                         /* initialize the content of the subparts */
1196                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1197                                 fclose(ct->c_fp);
1198                                 ct->c_fp = NULL;
1199                                 return NOTOK;
1200                         }
1201                 }
1202         }
1203
1204         fclose(ct->c_fp);
1205         ct->c_fp = NULL;
1206         return OK;
1207 }
1208
1209
1210 /*
1211 ** reverse the order of the parts of a multipart
1212 */
1213
1214 static void
1215 reverse_parts(CT ct)
1216 {
1217         int i;
1218         struct multipart *m;
1219         struct part **base, **bmp, **next, *part;
1220
1221         m = (struct multipart *) ct->c_ctparams;
1222
1223         /* if only one part, just return */
1224         if (!m->mp_parts || !m->mp_parts->mp_next)
1225                 return;
1226
1227         /* count number of parts */
1228         i = 0;
1229         for (part = m->mp_parts; part; part = part->mp_next)
1230                 i++;
1231
1232         /* allocate array of pointers to the parts */
1233         base = mh_xcalloc(i + 1, sizeof(*base));
1234         bmp = base;
1235
1236         /* point at all the parts */
1237         for (part = m->mp_parts; part; part = part->mp_next)
1238                 *bmp++ = part;
1239         *bmp = NULL;
1240
1241         /* reverse the order of the parts */
1242         next = &m->mp_parts;
1243         for (bmp--; bmp >= base; bmp--) {
1244                 part = *bmp;
1245                 *next = part;
1246                 next = &part->mp_next;
1247         }
1248         *next = NULL;
1249
1250         /* free array of pointers */
1251         mh_free0(&base);
1252 }
1253
1254
1255 /*
1256 ** MESSAGE
1257 */
1258
1259 static int
1260 InitMessage(CT ct)
1261 {
1262         struct k2v *kv;
1263         CI ci = &ct->c_ctinfo;
1264
1265         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT) && (ct->c_encoding != CE_BINARY)) {
1266                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1267                 ct->c_encoding = CE_7BIT;
1268         }
1269
1270         /* check for missing subtype */
1271         if (!*ci->ci_subtype)
1272                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1273
1274         /* match subtype */
1275         for (kv = SubMessage; kv->kv_key; kv++)
1276                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1277                         break;
1278         ct->c_subtype = kv->kv_value;
1279
1280         switch (ct->c_subtype) {
1281         case MESSAGE_RFC822:
1282                 break;
1283
1284         case MESSAGE_PARTIAL:
1285                 {
1286                 char **ap, **ep;
1287                 struct partial *p;
1288
1289                 p = mh_xcalloc(1, sizeof(*p));
1290                 ct->c_ctparams = (void *) p;
1291
1292                 /*
1293                 ** scan for parameters "id", "number",
1294                 ** and "total"
1295                 */
1296                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1297                         if (!mh_strcasecmp(*ap, "id")) {
1298                                 p->pm_partid = mh_xstrdup(*ep);
1299                                 continue;
1300                         }
1301                         if (!mh_strcasecmp(*ap, "number")) {
1302                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1303 invalid_param:
1304                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1305                                         return NOTOK;
1306                                 }
1307                                 continue;
1308                         }
1309                         if (!mh_strcasecmp(*ap, "total")) {
1310                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1311                                                 p->pm_maxno < 1)
1312                                         goto invalid_param;
1313                                 continue;
1314                         }
1315                 }
1316
1317                 if (!p->pm_partid || !p->pm_partno
1318                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1319                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1320                         return NOTOK;
1321                 }
1322                 }
1323                 break;
1324
1325         case MESSAGE_EXTERNAL:
1326                 {
1327                 CT p;
1328                 FILE *fp;
1329
1330                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1331                         advise(ct->c_file, "unable to open for reading");
1332                         return NOTOK;
1333                 }
1334
1335                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1336
1337                 if (!(p = get_content(fp, ct->c_file, 0))) {
1338                         ct->c_fp = NULL;
1339                         return NOTOK;
1340                 }
1341
1342                 p->c_fp = NULL;
1343                 p->c_end = p->c_begin;
1344
1345                 fclose(ct->c_fp);
1346                 ct->c_fp = NULL;
1347
1348                 switch (p->c_type) {
1349                 case CT_MULTIPART:
1350                         break;
1351
1352                 case CT_MESSAGE:
1353                         if (p->c_subtype != MESSAGE_RFC822)
1354                                 break;
1355                         /* else fall... */
1356                 default:
1357                         if (p->c_ctinitfnx)
1358                                 (*p->c_ctinitfnx) (p);
1359                         break;
1360                 }
1361                 }
1362                 break;
1363
1364         default:
1365                 break;
1366         }
1367
1368         return OK;
1369 }
1370
1371
1372 /*
1373 ** APPLICATION
1374 */
1375
1376 static int
1377 InitApplication(CT ct)
1378 {
1379         struct k2v *kv;
1380         CI ci = &ct->c_ctinfo;
1381
1382         /* match subtype */
1383         for (kv = SubApplication; kv->kv_key; kv++)
1384                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1385                         break;
1386         ct->c_subtype = kv->kv_value;
1387
1388         return OK;
1389 }
1390
1391
1392 /*
1393 ** TRANSFER ENCODINGS
1394 */
1395
1396 static int
1397 init_encoding(CT ct, OpenCEFunc openfnx)
1398 {
1399         CE ce;
1400
1401         ce = mh_xcalloc(1, sizeof(*ce));
1402
1403         ct->c_cefile     = ce;
1404         ct->c_ceopenfnx  = openfnx;
1405         ct->c_ceclosefnx = close_encoding;
1406         ct->c_cesizefnx  = size_encoding;
1407
1408         return OK;
1409 }
1410
1411
1412 void
1413 close_encoding(CT ct)
1414 {
1415         CE ce;
1416
1417         if (!(ce = ct->c_cefile))
1418                 return;
1419
1420         if (ce->ce_fp) {
1421                 fclose(ce->ce_fp);
1422                 ce->ce_fp = NULL;
1423         }
1424 }
1425
1426
1427 static unsigned long
1428 size_encoding(CT ct)
1429 {
1430         int fd;
1431         unsigned long size;
1432         char *file;
1433         CE ce;
1434         struct stat st;
1435
1436         if (!(ce = ct->c_cefile))
1437                 return (ct->c_end - ct->c_begin);
1438
1439         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1440                 return (long) st.st_size;
1441
1442         if (ce->ce_file) {
1443                 if (stat(ce->ce_file, &st) != NOTOK)
1444                         return (long) st.st_size;
1445                 else
1446                         return 0L;
1447         }
1448
1449         if (ct->c_encoding == CE_EXTERNAL)
1450                 return (ct->c_end - ct->c_begin);
1451
1452         file = NULL;
1453         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1454                 return (ct->c_end - ct->c_begin);
1455
1456         if (fstat(fd, &st) != NOTOK)
1457                 size = (long) st.st_size;
1458         else
1459                 size = 0L;
1460
1461         (*ct->c_ceclosefnx) (ct);
1462         return size;
1463 }
1464
1465
1466 /*
1467 ** BASE64
1468 */
1469
1470 static unsigned char b642nib[0x80] = {
1471         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1475         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1476         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1477         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1478         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1479         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1480         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1481         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1482         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1483         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1484         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1485         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1486         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1487 };
1488
1489
1490 static int
1491 InitBase64(CT ct)
1492 {
1493         return init_encoding(ct, openBase64);
1494 }
1495
1496
1497 static int
1498 openBase64(CT ct, char **file)
1499 {
1500         int bitno, cc;
1501         int fd, len, skip, own_ct_fp = 0;
1502         unsigned long bits;
1503         unsigned char value, *b, *b1, *b2, *b3;
1504         unsigned char *cp, *ep;
1505         char buffer[BUFSIZ];
1506         /* sbeck -- handle suffixes */
1507         CI ci;
1508         CE ce;
1509
1510         b  = (unsigned char *) &bits;
1511         b1 = &b[endian > 0 ? 1 : 2];
1512         b2 = &b[endian > 0 ? 2 : 1];
1513         b3 = &b[endian > 0 ? 3 : 0];
1514
1515         ce = ct->c_cefile;
1516         if (ce->ce_fp) {
1517                 fseek(ce->ce_fp, 0L, SEEK_SET);
1518                 goto ready_to_go;
1519         }
1520
1521         if (ce->ce_file) {
1522                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1523                         content_error(ce->ce_file, ct,
1524                                         "unable to fopen for reading");
1525                         return NOTOK;
1526                 }
1527                 goto ready_to_go;
1528         }
1529
1530         if (*file == NULL) {
1531                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1532                 ce->ce_unlink = 1;
1533         } else {
1534                 ce->ce_file = mh_xstrdup(*file);
1535                 ce->ce_unlink = 0;
1536         }
1537
1538         /* sbeck@cise.ufl.edu -- handle suffixes */
1539         ci = &ct->c_ctinfo;
1540         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1541                         invo_name, ci->ci_type, ci->ci_subtype);
1542         cp = context_find(buffer);
1543         if (cp == NULL || *cp == '\0') {
1544                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1545                                 ci->ci_type);
1546                 cp = context_find(buffer);
1547         }
1548         if (cp != NULL && *cp != '\0') {
1549                 if (ce->ce_unlink) {
1550                         /*
1551                         ** Temporary file already exists, so we rename to
1552                         ** version with extension.
1553                         */
1554                         char *file_org = mh_xstrdup(ce->ce_file);
1555                         ce->ce_file = add(cp, ce->ce_file);
1556                         if (rename(file_org, ce->ce_file)) {
1557                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1558                                                 file_org);
1559                         }
1560                         mh_free0(&file_org);
1561
1562                 } else {
1563                         ce->ce_file = add(cp, ce->ce_file);
1564                 }
1565         }
1566
1567         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1568                 content_error(ce->ce_file, ct,
1569                                 "unable to fopen for reading/writing");
1570                 return NOTOK;
1571         }
1572
1573         if ((len = ct->c_end - ct->c_begin) < 0)
1574                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1575
1576         if (!ct->c_fp) {
1577                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1578                         content_error(ct->c_file, ct,
1579                                         "unable to open for reading");
1580                         return NOTOK;
1581                 }
1582                 own_ct_fp = 1;
1583         }
1584
1585         bitno = 18;
1586         bits = 0L;
1587         skip = 0;
1588
1589         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1590         while (len > 0) {
1591                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1592                 case NOTOK:
1593                         content_error(ct->c_file, ct, "error reading from");
1594                         goto clean_up;
1595
1596                 case OK:
1597                         content_error(NULL, ct, "premature eof");
1598                         goto clean_up;
1599
1600                 default:
1601                         if (cc > len)
1602                                 cc = len;
1603                         len -= cc;
1604
1605                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1606                                 switch (*cp) {
1607                                 default:
1608                                         if (isspace(*cp))
1609                                                 break;
1610                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1611                                                 if (debugsw) {
1612                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1613                                                 }
1614                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1615                                                 continue;
1616                                         }
1617
1618                                         bits |= value << bitno;
1619 test_end:
1620                                         if ((bitno -= 6) < 0) {
1621                                                 putc((char) *b1, ce->ce_fp);
1622                                                 if (skip < 2) {
1623                                                         putc((char) *b2, ce->ce_fp);
1624                                                         if (skip < 1) {
1625                                                                 putc((char) *b3, ce->ce_fp);
1626                                                         }
1627                                                 }
1628
1629                                                 if (ferror(ce->ce_fp)) {
1630                                                         content_error(ce->ce_file, ct,
1631                                                                                    "error writing to");
1632                                                         goto clean_up;
1633                                                 }
1634                                                 bitno = 18, bits = 0L, skip = 0;
1635                                         }
1636                                         break;
1637
1638                                 case '=':
1639                                         if (++skip > 3)
1640                                                 goto self_delimiting;
1641                                         goto test_end;
1642                                 }
1643                         }
1644                 }
1645         }
1646
1647         if (bitno != 18) {
1648                 if (debugsw)
1649                         fprintf(stderr, "premature ending (bitno %d)\n",
1650                                         bitno);
1651
1652                 content_error(NULL, ct, "invalid BASE64 encoding");
1653                 goto clean_up;
1654         }
1655
1656 self_delimiting:
1657         fseek(ct->c_fp, 0L, SEEK_SET);
1658
1659         if (fflush(ce->ce_fp)) {
1660                 content_error(ce->ce_file, ct, "error writing to");
1661                 goto clean_up;
1662         }
1663
1664         fseek(ce->ce_fp, 0L, SEEK_SET);
1665
1666 ready_to_go:
1667         *file = ce->ce_file;
1668         if (own_ct_fp) {
1669                 fclose(ct->c_fp);
1670                 ct->c_fp = NULL;
1671         }
1672         return fileno(ce->ce_fp);
1673
1674 clean_up:
1675         free_encoding(ct, 0);
1676         if (own_ct_fp) {
1677                 fclose(ct->c_fp);
1678                 ct->c_fp = NULL;
1679         }
1680         return NOTOK;
1681 }
1682
1683
1684 /*
1685 ** QUOTED PRINTABLE
1686 */
1687
1688 static char hex2nib[0x80] = {
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1696         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1702         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1703         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1704         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1705 };
1706
1707
1708 static int
1709 InitQuoted(CT ct)
1710 {
1711         return init_encoding(ct, openQuoted);
1712 }
1713
1714
1715 static int
1716 openQuoted(CT ct, char **file)
1717 {
1718         int cc, len, quoted, own_ct_fp = 0;
1719         unsigned char *cp, *ep;
1720         char buffer[BUFSIZ];
1721         unsigned char mask = 0;
1722         CE ce;
1723         /* sbeck -- handle suffixes */
1724         CI ci;
1725
1726         ce = ct->c_cefile;
1727         if (ce->ce_fp) {
1728                 fseek(ce->ce_fp, 0L, SEEK_SET);
1729                 goto ready_to_go;
1730         }
1731
1732         if (ce->ce_file) {
1733                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1734                         content_error(ce->ce_file, ct,
1735                                         "unable to fopen for reading");
1736                         return NOTOK;
1737                 }
1738                 goto ready_to_go;
1739         }
1740
1741         if (*file == NULL) {
1742                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1743                 ce->ce_unlink = 1;
1744         } else {
1745                 ce->ce_file = mh_xstrdup(*file);
1746                 ce->ce_unlink = 0;
1747         }
1748
1749         /* sbeck@cise.ufl.edu -- handle suffixes */
1750         ci = &ct->c_ctinfo;
1751         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1752                         invo_name, ci->ci_type, ci->ci_subtype);
1753         cp = context_find(buffer);
1754         if (cp == NULL || *cp == '\0') {
1755                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1756                                 ci->ci_type);
1757                 cp = context_find(buffer);
1758         }
1759         if (cp != NULL && *cp != '\0') {
1760                 if (ce->ce_unlink) {
1761                         /*
1762                         ** Temporary file already exists, so we rename to
1763                         ** version with extension.
1764                         */
1765                         char *file_org = mh_xstrdup(ce->ce_file);
1766                         ce->ce_file = add(cp, ce->ce_file);
1767                         if (rename(file_org, ce->ce_file)) {
1768                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1769                                                 file_org);
1770                         }
1771                         mh_free0(&file_org);
1772
1773                 } else {
1774                         ce->ce_file = add(cp, ce->ce_file);
1775                 }
1776         }
1777
1778         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1779                 content_error(ce->ce_file, ct,
1780                                 "unable to fopen for reading/writing");
1781                 return NOTOK;
1782         }
1783
1784         if ((len = ct->c_end - ct->c_begin) < 0)
1785                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1786
1787         if (!ct->c_fp) {
1788                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1789                         content_error(ct->c_file, ct,
1790                                         "unable to open for reading");
1791                         return NOTOK;
1792                 }
1793                 own_ct_fp = 1;
1794         }
1795
1796         quoted = 0;
1797
1798         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1799         while (len > 0) {
1800                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1801                         content_error(NULL, ct, "premature eof");
1802                         goto clean_up;
1803                 }
1804
1805                 if ((cc = strlen(buffer)) > len)
1806                         cc = len;
1807                 len -= cc;
1808
1809                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1810                         if (!isspace(*ep))
1811                                 break;
1812                 *++ep = '\n', ep++;
1813
1814                 for (; cp < ep; cp++) {
1815                         if (quoted > 0) {
1816                                 /* in an escape sequence */
1817                                 if (quoted == 1) {
1818                                         /* at byte 1 of an escape sequence */
1819                                         mask = hex2nib[*cp & 0x7f];
1820                                         /* next is byte 2 */
1821                                         quoted = 2;
1822                                 } else {
1823                                         /* at byte 2 of an escape sequence */
1824                                         mask <<= 4;
1825                                         mask |= hex2nib[*cp & 0x7f];
1826                                         putc(mask, ce->ce_fp);
1827                                         if (ferror(ce->ce_fp)) {
1828                                                 content_error(ce->ce_file, ct, "error writing to");
1829                                                 goto clean_up;
1830                                         }
1831                                         /*
1832                                         ** finished escape sequence; next may
1833                                         ** be literal or a new escape sequence
1834                                         */
1835                                         quoted = 0;
1836                                 }
1837                                 /* on to next byte */
1838                                 continue;
1839                         }
1840
1841                         /* not in an escape sequence */
1842                         if (*cp == '=') {
1843                                 /*
1844                                 ** starting an escape sequence,
1845                                 ** or invalid '='?
1846                                 */
1847                                 if (cp + 1 < ep && cp[1] == '\n') {
1848                                         /* "=\n" soft line break, eat the \n */
1849                                         cp++;
1850                                         continue;
1851                                 }
1852                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1853                                         /*
1854                                         ** We don't have 2 bytes left,
1855                                         ** so this is an invalid escape
1856                                         ** sequence; just show the raw bytes
1857                                         ** (below).
1858                                         */
1859                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1860                                         /*
1861                                         ** Next 2 bytes are hex digits,
1862                                         ** making this a valid escape
1863                                         ** sequence; let's decode it (above).
1864                                         */
1865                                         quoted = 1;
1866                                         continue;
1867                                 } else {
1868                                         /*
1869                                         ** One or both of the next 2 is
1870                                         ** out of range, making this an
1871                                         ** invalid escape sequence; just
1872                                         ** show the raw bytes (below).
1873                                         */
1874                                 }
1875                         }
1876
1877                         /* Just show the raw byte. */
1878                         putc(*cp, ce->ce_fp);
1879                         if (ferror(ce->ce_fp)) {
1880                                 content_error(ce->ce_file, ct,
1881                                                 "error writing to");
1882                                 goto clean_up;
1883                         }
1884                 }
1885         }
1886         if (quoted) {
1887                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1888                 goto clean_up;
1889         }
1890
1891         fseek(ct->c_fp, 0L, SEEK_SET);
1892
1893         if (fflush(ce->ce_fp)) {
1894                 content_error(ce->ce_file, ct, "error writing to");
1895                 goto clean_up;
1896         }
1897
1898         fseek(ce->ce_fp, 0L, SEEK_SET);
1899
1900 ready_to_go:
1901         *file = ce->ce_file;
1902         if (own_ct_fp) {
1903                 fclose(ct->c_fp);
1904                 ct->c_fp = NULL;
1905         }
1906         return fileno(ce->ce_fp);
1907
1908 clean_up:
1909         free_encoding(ct, 0);
1910         if (own_ct_fp) {
1911                 fclose(ct->c_fp);
1912                 ct->c_fp = NULL;
1913         }
1914         return NOTOK;
1915 }
1916
1917
1918 /*
1919 ** 7BIT
1920 */
1921
1922 static int
1923 Init7Bit(CT ct)
1924 {
1925         if (init_encoding(ct, open7Bit) == NOTOK)
1926                 return NOTOK;
1927
1928         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1929         return OK;
1930 }
1931
1932
1933 int
1934 open7Bit(CT ct, char **file)
1935 {
1936         int cc, fd, len, own_ct_fp = 0;
1937         char buffer[BUFSIZ];
1938         /* sbeck -- handle suffixes */
1939         char *cp;
1940         CI ci;
1941         CE ce;
1942
1943         ce = ct->c_cefile;
1944         if (ce->ce_fp) {
1945                 fseek(ce->ce_fp, 0L, SEEK_SET);
1946                 goto ready_to_go;
1947         }
1948
1949         if (ce->ce_file) {
1950                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1951                         content_error(ce->ce_file, ct,
1952                                         "unable to fopen for reading");
1953                         return NOTOK;
1954                 }
1955                 goto ready_to_go;
1956         }
1957
1958         if (*file == NULL) {
1959                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1960                 ce->ce_unlink = 1;
1961         } else {
1962                 ce->ce_file = mh_xstrdup(*file);
1963                 ce->ce_unlink = 0;
1964         }
1965
1966         /* sbeck@cise.ufl.edu -- handle suffixes */
1967         ci = &ct->c_ctinfo;
1968         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1969                         invo_name, ci->ci_type, ci->ci_subtype);
1970         cp = context_find(buffer);
1971         if (cp == NULL || *cp == '\0') {
1972                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1973                                 ci->ci_type);
1974                 cp = context_find(buffer);
1975         }
1976         if (cp != NULL && *cp != '\0') {
1977                 if (ce->ce_unlink) {
1978                         /*
1979                         ** Temporary file already exists, so we rename to
1980                         ** version with extension.
1981                         */
1982                         char *file_org = mh_xstrdup(ce->ce_file);
1983                         ce->ce_file = add(cp, ce->ce_file);
1984                         if (rename(file_org, ce->ce_file)) {
1985                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1986                                                 file_org);
1987                         }
1988                         mh_free0(&file_org);
1989
1990                 } else {
1991                         ce->ce_file = add(cp, ce->ce_file);
1992                 }
1993         }
1994
1995         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1996                 content_error(ce->ce_file, ct,
1997                                 "unable to fopen for reading/writing");
1998                 return NOTOK;
1999         }
2000
2001         if (ct->c_type == CT_MULTIPART) {
2002                 char **ap, **ep;
2003                 CI ci = &ct->c_ctinfo;
2004
2005                 len = 0;
2006                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2007                                 ci->ci_subtype);
2008                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2009                                 strlen(ci->ci_subtype);
2010                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2011                         putc(';', ce->ce_fp);
2012                         len++;
2013
2014                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2015                                         *ap, *ep);
2016
2017                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2018                                 fputs("\n\t", ce->ce_fp);
2019                                 len = 8;
2020                         } else {
2021                                 putc(' ', ce->ce_fp);
2022                                 len++;
2023                         }
2024                         fprintf(ce->ce_fp, "%s", buffer);
2025                         len += cc;
2026                 }
2027
2028                 if (ci->ci_comment) {
2029                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2030                                                 >= CPERLIN) {
2031                                 fputs("\n\t", ce->ce_fp);
2032                                 len = 8;
2033                         } else {
2034                                 putc(' ', ce->ce_fp);
2035                                 len++;
2036                         }
2037                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2038                         len += cc;
2039                 }
2040                 fprintf(ce->ce_fp, "\n");
2041                 if (ct->c_id)
2042                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2043                 if (ct->c_descr)
2044                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2045                 if (ct->c_dispo)
2046                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2047                 fprintf(ce->ce_fp, "\n");
2048         }
2049
2050         if ((len = ct->c_end - ct->c_begin) < 0)
2051                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2052
2053         if (!ct->c_fp) {
2054                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2055                         content_error(ct->c_file, ct,
2056                                         "unable to open for reading");
2057                         return NOTOK;
2058                 }
2059                 own_ct_fp = 1;
2060         }
2061
2062         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2063         while (len > 0)
2064                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2065                 case NOTOK:
2066                         content_error(ct->c_file, ct, "error reading from");
2067                         goto clean_up;
2068
2069                 case OK:
2070                         content_error(NULL, ct, "premature eof");
2071                         goto clean_up;
2072
2073                 default:
2074                         if (cc > len)
2075                                 cc = len;
2076                         len -= cc;
2077
2078                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2079                         if (ferror(ce->ce_fp)) {
2080                                 content_error(ce->ce_file, ct,
2081                                                 "error writing to");
2082                                 goto clean_up;
2083                         }
2084                 }
2085
2086         fseek(ct->c_fp, 0L, SEEK_SET);
2087
2088         if (fflush(ce->ce_fp)) {
2089                 content_error(ce->ce_file, ct, "error writing to");
2090                 goto clean_up;
2091         }
2092
2093         fseek(ce->ce_fp, 0L, SEEK_SET);
2094
2095 ready_to_go:
2096         *file = ce->ce_file;
2097         if (own_ct_fp) {
2098                 fclose(ct->c_fp);
2099                 ct->c_fp = NULL;
2100         }
2101         return fileno(ce->ce_fp);
2102
2103 clean_up:
2104         free_encoding(ct, 0);
2105         if (own_ct_fp) {
2106                 fclose(ct->c_fp);
2107                 ct->c_fp = NULL;
2108         }
2109         return NOTOK;
2110 }