c2506755663e4e7c498861ad18a5b87c703b8f8f
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = mh_xstrdup(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD2;;) {
252                 switch (state = m_getfld2(state, &f, in)) {
253                 case LENERR2:
254                         advise(NULL, "To long field");
255                         state = FLD2;
256                         /* FALL */
257                 case FLD2:
258                         compnum++;
259
260                         /* add the header data to the list */
261                         add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
262
263                         ct->c_begin = ftell(in) + 1;
264                         continue;
265
266                 case BODY2:
267                         ct->c_begin = ftell(in) - strlen(f.value);
268                         break;
269
270                 case FILEEOF2:
271                         ct->c_begin = ftell(in);
272                         break;
273
274                 case FMTERR2:
275                         advise(NULL, "message format error in component #%d", compnum);
276                         state = FLD2;
277                         continue;
278
279                 case IOERR2:
280                         adios(EX_IOERR, "m_getfld2", "io error");
281
282                 default:
283                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
284                 }
285                 break;
286         }
287
288         /*
289         ** Read the content headers.  We will parse the
290         ** MIME related header fields into their various
291         ** structures and set internal flags related to
292         ** content type/subtype, etc.
293         */
294
295         hp = ct->c_first_hf;  /* start at first header field */
296         while (hp) {
297                 /* Get MIME-Version field */
298                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
299                         int ucmp;
300                         char c;
301                         unsigned char *cp, *dp;
302
303                         if (ct->c_vrsn) {
304                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
305                                 goto next_header;
306                         }
307                         ct->c_vrsn = mh_xstrdup(hp->value);
308
309                         /* Now, cleanup this field */
310                         cp = ct->c_vrsn;
311
312                         while (isspace(*cp))
313                                 cp++;
314                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
315                                 *dp++ = ' ';
316                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
317                                 if (!isspace(*dp))
318                                         break;
319                         *++dp = '\0';
320                         if (debugsw)
321                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
322
323                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
324                                 goto out;
325
326                         for (dp = cp; istoken(*dp); dp++)
327                                 continue;
328                         c = *dp;
329                         *dp = '\0';
330                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
331                         *dp = c;
332                         if (!ucmp) {
333                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
334                         }
335
336                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
337                         /* Get Content-Type field */
338                         struct str2init *s2i;
339                         CI ci = &ct->c_ctinfo;
340
341                         /* Check if we've already seen a Content-Type header */
342                         if (ct->c_ctline) {
343                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
344                                 goto next_header;
345                         }
346
347                         /* Parse the Content-Type field */
348                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
349                                 goto out;
350
351                         /*
352                         ** Set the Init function and the internal
353                         ** flag for this content type.
354                         */
355                         for (s2i = str2cts; s2i->si_key; s2i++)
356                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
357                                         break;
358                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
359                                 s2i++;
360                         ct->c_type = s2i->si_val;
361                         ct->c_ctinitfnx = s2i->si_init;
362
363                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
364                         /* Get Content-Transfer-Encoding field */
365                         char c;
366                         unsigned char *cp, *dp;
367                         struct str2init *s2i;
368
369                         /*
370                         ** Check if we've already seen the
371                         ** Content-Transfer-Encoding field
372                         */
373                         if (ct->c_celine) {
374                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
375                                 goto next_header;
376                         }
377
378                         /* get copy of this field */
379                         ct->c_celine = cp = mh_xstrdup(hp->value);
380
381                         while (isspace(*cp))
382                                 cp++;
383                         for (dp = cp; istoken(*dp); dp++)
384                                 continue;
385                         c = *dp;
386                         *dp = '\0';
387
388                         /*
389                         ** Find the internal flag and Init function
390                         ** for this transfer encoding.
391                         */
392                         for (s2i = str2ces; s2i->si_key; s2i++)
393                                 if (!mh_strcasecmp(cp, s2i->si_key))
394                                         break;
395                         if (!s2i->si_key && !uprf(cp, "X-"))
396                                 s2i++;
397                         *dp = c;
398                         ct->c_encoding = s2i->si_val;
399
400                         /* Call the Init function for this encoding */
401                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
402                                 goto out;
403
404                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
405                         /* Get Content-ID field */
406                         ct->c_id = add(hp->value, ct->c_id);
407
408                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
409                         /* Get Content-Description field */
410                         ct->c_descr = add(hp->value, ct->c_descr);
411
412                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
413                         /* Get Content-Disposition field */
414                         ct->c_dispo = add(hp->value, ct->c_dispo);
415                 }
416
417 next_header:
418                 hp = hp->next;  /* next header field */
419         }
420
421         /*
422         ** Check if we saw a Content-Type field.
423         ** If not, then assign a default value for
424         ** it, and the Init function.
425         */
426         if (!ct->c_ctline) {
427                 /*
428                 ** If we are inside a multipart/digest message,
429                 ** so default type is message/rfc822
430                 */
431                 if (toplevel < 0) {
432                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
433                                 goto out;
434                         ct->c_type = CT_MESSAGE;
435                         ct->c_ctinitfnx = InitMessage;
436                 } else {
437                         /*
438                         ** Else default type is text/plain
439                         */
440                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
441                                 goto out;
442                         ct->c_type = CT_TEXT;
443                         ct->c_ctinitfnx = InitText;
444                 }
445         }
446
447         /* Use default Transfer-Encoding, if necessary */
448         if (!ct->c_celine) {
449                 ct->c_encoding = CE_7BIT;
450                 Init7Bit(ct);
451         }
452
453         return ct;
454
455 out:
456         free_content(ct);
457         return NULL;
458 }
459
460
461 /*
462 ** small routine to add header field to list
463 */
464
465 int
466 add_header(CT ct, char *name, char *value)
467 {
468         HF hp;
469
470         /* allocate header field structure */
471         hp = mh_xcalloc(1, sizeof(*hp));
472
473         /* link data into header structure */
474         hp->name = name;
475         hp->value = value;
476         hp->next = NULL;
477
478         /* link header structure into the list */
479         if (ct->c_first_hf == NULL) {
480                 ct->c_first_hf = hp;  /* this is the first */
481                 ct->c_last_hf = hp;
482         } else {
483                 ct->c_last_hf->next = hp;  /* add it to the end */
484                 ct->c_last_hf = hp;
485         }
486
487         return 0;
488 }
489
490
491 /*
492 ** Make sure that buf contains at least one appearance of name,
493 ** followed by =.  If not, insert both name and value, just after
494 ** first semicolon, if any.  Note that name should not contain a
495 ** trailing =.  And quotes will be added around the value.  Typical
496 ** usage:  make sure that a Content-Disposition header contains
497 ** filename="foo".  If it doesn't and value does, use value from
498 ** that.
499 */
500 static char *
501 incl_name_value(unsigned char *buf, char *name, char *value) {
502         char *newbuf = buf;
503
504         /* Assume that name is non-null. */
505         if (buf && value) {
506                 char *name_plus_equal = concat(name, "=", NULL);
507
508                 if (!strstr(buf, name_plus_equal)) {
509                         char *insertion;
510                         unsigned char *cp;
511                         char *prefix, *suffix;
512
513                         /* Trim trailing space, esp. newline. */
514                         for (cp = &buf[strlen(buf) - 1];
515                                          cp >= buf && isspace(*cp); --cp) {
516                                 *cp = '\0';
517                         }
518
519                         insertion = concat("; ", name, "=", "\"", value, "\"",
520                                         NULL);
521
522                         /*
523                         ** Insert at first semicolon, if any.
524                         ** If none, append to end.
525                         */
526                         prefix = mh_xstrdup(buf);
527                         if ((cp = strchr(prefix, ';'))) {
528                                 suffix = concat(cp, NULL);
529                                 *cp = '\0';
530                                 newbuf = concat(prefix, insertion, suffix,
531                                                 "\n", NULL);
532                                 mh_free0(&suffix);
533                         } else {
534                                 /* Append to end. */
535                                 newbuf = concat(buf, insertion, "\n", NULL);
536                         }
537
538                         mh_free0(&prefix);
539                         mh_free0(&insertion);
540                         mh_free0(&buf);
541                 }
542
543                 mh_free0(&name_plus_equal);
544         }
545
546         return newbuf;
547 }
548
549 /*
550 ** Extract just name_suffix="foo", if any, from value.  If there isn't
551 ** one, return the entire value.  Note that, for example, a name_suffix
552 ** of name will match filename="foo", and return foo.
553 */
554 static char *
555 extract_name_value(char *name_suffix, char *value) {
556         char *extracted_name_value = value;
557         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
558         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
559         char *cp;
560
561         mh_free0(&name_suffix_plus_quote);
562         if (name_suffix_equals) {
563                 char *name_suffix_begin;
564
565                 /* Find first \". */
566                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
567                         ;
568                 name_suffix_begin = ++cp;
569                 /* Find second \". */
570                 for (; *cp != '"'; ++cp)
571                         ;
572
573                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
574                 memcpy(extracted_name_value, name_suffix_begin,
575                                 cp - name_suffix_begin);
576                 extracted_name_value[cp - name_suffix_begin] = '\0';
577         }
578
579         return extracted_name_value;
580 }
581
582 /*
583 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
584 ** directives.  Fills in the information of the CTinfo structure.
585 */
586 int
587 get_ctinfo(unsigned char *cp, CT ct, int magic)
588 {
589         int i;
590         unsigned char *dp;
591         char **ap, **ep;
592         char c;
593         CI ci;
594
595         ci = &ct->c_ctinfo;
596         i = strlen(invo_name) + 2;
597
598         /* store copy of Content-Type line */
599         cp = ct->c_ctline = mh_xstrdup(cp);
600
601         while (isspace(*cp))  /* trim leading spaces */
602                 cp++;
603
604         /* change newlines to spaces */
605         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
606                 *dp++ = ' ';
607
608         /* trim trailing spaces */
609         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
610                 if (!isspace(*dp))
611                         break;
612         *++dp = '\0';
613
614         if (debugsw)
615                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
616
617         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
618                 return NOTOK;
619
620         for (dp = cp; istoken(*dp); dp++)
621                 continue;
622         c = *dp, *dp = '\0';
623         ci->ci_type = mh_xstrdup(cp);  /* store content type */
624         *dp = c, cp = dp;
625
626         if (!*ci->ci_type) {
627                 advise(NULL, "invalid %s: field in message %s (empty type)",
628                                 TYPE_FIELD, ct->c_file);
629                 return NOTOK;
630         }
631
632         /* down case the content type string */
633         for (dp = ci->ci_type; *dp; dp++)
634                 if (isalpha(*dp) && isupper(*dp))
635                         *dp = tolower(*dp);
636
637         while (isspace(*cp))
638                 cp++;
639
640         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
641                 return NOTOK;
642
643         if (*cp != '/') {
644                 if (!magic)
645                         ci->ci_subtype = mh_xstrdup("");
646                 goto magic_skip;
647         }
648
649         cp++;
650         while (isspace(*cp))
651                 cp++;
652
653         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
654                 return NOTOK;
655
656         for (dp = cp; istoken(*dp); dp++)
657                 continue;
658         c = *dp, *dp = '\0';
659         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
660         *dp = c, cp = dp;
661
662         if (!*ci->ci_subtype) {
663                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
664                 return NOTOK;
665         }
666
667         /* down case the content subtype string */
668         for (dp = ci->ci_subtype; *dp; dp++)
669                 if (isalpha(*dp) && isupper(*dp))
670                         *dp = tolower(*dp);
671
672 magic_skip:
673         while (isspace(*cp))
674                 cp++;
675
676         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
677                 return NOTOK;
678
679         /*
680         ** Parse attribute/value pairs given with Content-Type
681         */
682         ep = (ap = ci->ci_attrs) + NPARMS;
683         while (*cp == ';') {
684                 char *vp;
685                 unsigned char *up;
686
687                 if (ap >= ep) {
688                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
689                         return NOTOK;
690                 }
691
692                 cp++;
693                 while (isspace(*cp))
694                         cp++;
695
696                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
697                         return NOTOK;
698
699                 if (*cp == 0) {
700                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
701                         return OK;
702                 }
703
704                 /* down case the attribute name */
705                 for (dp = cp; istoken(*dp); dp++)
706                         if (isalpha(*dp) && isupper(*dp))
707                                 *dp = tolower(*dp);
708
709                 for (up = dp; isspace(*dp);)
710                         dp++;
711                 if (dp == cp || *dp != '=') {
712                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
713                         return NOTOK;
714                 }
715
716                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
717                 *vp = '\0';
718                 for (dp++; isspace(*dp);)
719                         dp++;
720
721                 /* now add the attribute value */
722                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
723
724                 if (*dp == '"') {
725                         for (cp = ++dp, dp = vp;;) {
726                                 switch (c = *cp++) {
727                                 case '\0':
728 bad_quote:
729                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
730                                         return NOTOK;
731
732                                 case '\\':
733                                         *dp++ = c;
734                                         if ((c = *cp++) == '\0')
735                                                 goto bad_quote;
736                                         /* else fall... */
737
738                                 default:
739                                         *dp++ = c;
740                                         continue;
741
742                                 case '"':
743                                         *dp = '\0';
744                                         break;
745                                 }
746                                 break;
747                         }
748                 } else {
749                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
750                                 continue;
751                         *dp = '\0';
752                 }
753                 if (!*vp) {
754                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
755                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
756                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
757                         continue;
758                 }
759                 ap++;
760
761                 while (isspace(*cp))
762                         cp++;
763
764                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
765                         return NOTOK;
766         }
767
768         /*
769         ** Get any <Content-Id> given in buffer
770         */
771         if (magic && *cp == '<') {
772                 if (ct->c_id) {
773                         mh_free0(&(ct->c_id));
774                 }
775                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
776                         advise(NULL, "invalid ID in message %s", ct->c_file);
777                         return NOTOK;
778                 }
779                 c = *dp;
780                 *dp = '\0';
781                 if (*ct->c_id)
782                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
783                 else
784                         ct->c_id = NULL;
785                 *dp++ = c;
786                 cp = dp;
787
788                 while (isspace(*cp))
789                         cp++;
790         }
791
792         /*
793         ** Get any [Content-Description] given in buffer.
794         */
795         if (magic && *cp == '[') {
796                 ct->c_descr = ++cp;
797                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
798                         if (*dp == ']')
799                                 break;
800                 if (dp < cp) {
801                         advise(NULL, "invalid description in message %s",
802                                         ct->c_file);
803                         ct->c_descr = NULL;
804                         return NOTOK;
805                 }
806
807                 c = *dp;
808                 *dp = '\0';
809                 if (*ct->c_descr)
810                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
811                 else
812                         ct->c_descr = NULL;
813                 *dp++ = c;
814                 cp = dp;
815
816                 while (isspace(*cp))
817                         cp++;
818         }
819
820         /*
821         ** Get any {Content-Disposition} given in buffer.
822         */
823         if (magic && *cp == '{') {
824                 ct->c_dispo = ++cp;
825                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
826                         if (*dp == '}')
827                                 break;
828                 if (dp < cp) {
829                         advise(NULL, "invalid disposition in message %s",
830                                         ct->c_file);
831                         ct->c_dispo = NULL;
832                         return NOTOK;
833                 }
834
835                 c = *dp;
836                 *dp = '\0';
837                 if (*ct->c_dispo)
838                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
839                 else
840                         ct->c_dispo = NULL;
841                 *dp++ = c;
842                 cp = dp;
843
844                 while (isspace(*cp))
845                         cp++;
846         }
847
848         /*
849         ** Check if anything is left over
850         */
851         if (*cp) {
852                 if (magic) {
853                         ci->ci_magic = mh_xstrdup(cp);
854
855                         /*
856                         ** If there is a Content-Disposition header and
857                         ** it doesn't have a *filename=, extract it from
858                         ** the magic contents.  The mhbasename call skips
859                         ** any leading directory components.
860                         */
861                         if (ct->c_dispo)
862                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
863                         } else
864                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
865         }
866
867         return OK;
868 }
869
870
871 static int
872 get_comment(CT ct, unsigned char **ap, int istype)
873 {
874         int i;
875         char *bp;
876         unsigned char *cp;
877         char c, buffer[BUFSIZ], *dp;
878         CI ci;
879
880         ci = &ct->c_ctinfo;
881         cp = *ap;
882         bp = buffer;
883         cp++;
884
885         for (i = 0;;) {
886                 switch (c = *cp++) {
887                 case '\0':
888 invalid:
889                 advise(NULL, "invalid comment in message %s's %s: field",
890                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
891                 return NOTOK;
892
893                 case '\\':
894                         *bp++ = c;
895                         if ((c = *cp++) == '\0')
896                                 goto invalid;
897                         *bp++ = c;
898                         continue;
899
900                 case '(':
901                         i++;
902                         /* and fall... */
903                 default:
904                         *bp++ = c;
905                         continue;
906
907                 case ')':
908                         if (--i < 0)
909                                 break;
910                         *bp++ = c;
911                         continue;
912                 }
913                 break;
914         }
915         *bp = '\0';
916
917         if (istype) {
918                 if ((dp = ci->ci_comment)) {
919                         ci->ci_comment = concat(dp, " ", buffer, NULL);
920                         mh_free0(&dp);
921                 } else {
922                         ci->ci_comment = mh_xstrdup(buffer);
923                 }
924         }
925
926         while (isspace(*cp))
927                 cp++;
928
929         *ap = cp;
930         return OK;
931 }
932
933
934 /*
935 ** CONTENTS
936 **
937 ** Handles content types audio, image, and video.
938 ** There's not much to do right here.
939 */
940
941 static int
942 InitGeneric(CT ct)
943 {
944         return OK;  /* not much to do here */
945 }
946
947
948 /*
949 ** TEXT
950 */
951
952 static int
953 InitText(CT ct)
954 {
955         char **ap, **ep;
956         struct k2v *kv;
957         struct text *t;
958         CI ci = &ct->c_ctinfo;
959
960         /* check for missing subtype */
961         if (!*ci->ci_subtype)
962                 ci->ci_subtype = add("plain", ci->ci_subtype);
963
964         /* match subtype */
965         for (kv = SubText; kv->kv_key; kv++)
966                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
967                         break;
968         ct->c_subtype = kv->kv_value;
969
970         /* allocate text character set structure */
971         t = mh_xcalloc(1, sizeof(*t));
972         ct->c_ctparams = (void *) t;
973
974         /* scan for charset parameter */
975         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
976                 if (!mh_strcasecmp(*ap, "charset"))
977                         break;
978
979         /* check if content specified a character set */
980         if (*ap) {
981                 /* store its name */
982                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
983                 /* match character set or set to CHARSET_UNKNOWN */
984                 for (kv = Charset; kv->kv_key; kv++) {
985                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
986                                 break;
987                         }
988                 }
989                 t->tx_charset = kv->kv_value;
990         } else {
991                 t->tx_charset = CHARSET_UNSPECIFIED;
992         }
993
994         return OK;
995 }
996
997
998 /*
999 ** MULTIPART
1000 */
1001
1002 static int
1003 InitMultiPart(CT ct)
1004 {
1005         int inout;
1006         long last, pos;
1007         unsigned char *cp, *dp;
1008         char **ap, **ep;
1009         char *bp, buffer[BUFSIZ];
1010         struct multipart *m;
1011         struct k2v *kv;
1012         struct part *part, **next;
1013         CI ci = &ct->c_ctinfo;
1014         CT p;
1015         FILE *fp;
1016
1017         /*
1018         ** The encoding for multipart messages must be either
1019         ** 7bit, 8bit, or binary (per RFC2045).
1020         */
1021         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1022                 && ct->c_encoding != CE_BINARY) {
1023                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1024                 ct->c_encoding = CE_7BIT;
1025         }
1026
1027         /* match subtype */
1028         for (kv = SubMultiPart; kv->kv_key; kv++)
1029                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1030                         break;
1031         ct->c_subtype = kv->kv_value;
1032
1033         /*
1034         ** Check for "boundary" parameter, which is
1035         ** required for multipart messages.
1036         */
1037         bp = 0;
1038         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1039                 if (!mh_strcasecmp(*ap, "boundary")) {
1040                         bp = *ep;
1041                         break;
1042                 }
1043         }
1044
1045         /* complain if boundary parameter is missing */
1046         if (!*ap) {
1047                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1048                 return NOTOK;
1049         }
1050
1051         /* allocate primary structure for multipart info */
1052         m = mh_xcalloc(1, sizeof(*m));
1053         ct->c_ctparams = (void *) m;
1054
1055         /* check if boundary parameter contains only whitespace characters */
1056         for (cp = bp; isspace(*cp); cp++)
1057                 continue;
1058         if (!*cp) {
1059                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1060                 return NOTOK;
1061         }
1062
1063         /* remove trailing whitespace from boundary parameter */
1064         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1065                 if (!isspace(*dp))
1066                         break;
1067         *++dp = '\0';
1068
1069         /* record boundary separators */
1070         m->mp_start = concat(bp, "\n", NULL);
1071         m->mp_stop = concat(bp, "--\n", NULL);
1072
1073         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1074                 advise(ct->c_file, "unable to open for reading");
1075                 return NOTOK;
1076         }
1077
1078         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1079         last = ct->c_end;
1080         next = &m->mp_parts;
1081         part = NULL;
1082         inout = 1;
1083
1084         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1085                 if (pos > last)
1086                         break;
1087
1088                 pos += strlen(buffer);
1089                 if (buffer[0] != '-' || buffer[1] != '-')
1090                         continue;
1091                 if (inout) {
1092                         if (strcmp(buffer + 2, m->mp_start)!=0)
1093                                 continue;
1094 next_part:
1095                         part = mh_xcalloc(1, sizeof(*part));
1096                         *next = part;
1097                         next = &part->mp_next;
1098
1099                         if (!(p = get_content(fp, ct->c_file,
1100                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1101                                 ct->c_fp = NULL;
1102                                 return NOTOK;
1103                         }
1104                         p->c_fp = NULL;
1105                         part->mp_part = p;
1106                         pos = p->c_begin;
1107                         fseek(fp, pos, SEEK_SET);
1108                         inout = 0;
1109                 } else {
1110                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1111                                 inout = 1;
1112 end_part:
1113                                 p = part->mp_part;
1114                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1115                                 if (p->c_end < p->c_begin)
1116                                         p->c_begin = p->c_end;
1117                                 if (inout)
1118                                         goto next_part;
1119                                 goto last_part;
1120                         } else {
1121                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1122                                         goto end_part;
1123                         }
1124                 }
1125         }
1126
1127         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1128         if (!inout && part) {
1129                 p = part->mp_part;
1130                 p->c_end = ct->c_end;
1131
1132                 if (p->c_begin >= p->c_end) {
1133                         for (next = &m->mp_parts; *next != part;
1134                                 next = &((*next)->mp_next))
1135                                 continue;
1136                         *next = NULL;
1137                         free_content(p);
1138                         mh_free0(&part);
1139                 }
1140         }
1141
1142 last_part:
1143         /* reverse the order of the parts for multipart/alternative */
1144         if (ct->c_subtype == MULTI_ALTERNATE)
1145                 reverse_parts(ct);
1146
1147         /*
1148         ** label all subparts with part number, and
1149         ** then initialize the content of the subpart.
1150         */
1151         {
1152                 int partnum;
1153                 char *pp;
1154                 char partnam[BUFSIZ];
1155
1156                 if (ct->c_partno) {
1157                         snprintf(partnam, sizeof(partnam), "%s.",
1158                                         ct->c_partno);
1159                         pp = partnam + strlen(partnam);
1160                 } else {
1161                         pp = partnam;
1162                 }
1163
1164                 for (part = m->mp_parts, partnum = 1; part;
1165                         part = part->mp_next, partnum++) {
1166                         p = part->mp_part;
1167
1168                         sprintf(pp, "%d", partnum);
1169                         p->c_partno = mh_xstrdup(partnam);
1170
1171                         /* initialize the content of the subparts */
1172                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1173                                 fclose(ct->c_fp);
1174                                 ct->c_fp = NULL;
1175                                 return NOTOK;
1176                         }
1177                 }
1178         }
1179
1180         fclose(ct->c_fp);
1181         ct->c_fp = NULL;
1182         return OK;
1183 }
1184
1185
1186 /*
1187 ** reverse the order of the parts of a multipart
1188 */
1189
1190 static void
1191 reverse_parts(CT ct)
1192 {
1193         int i;
1194         struct multipart *m;
1195         struct part **base, **bmp, **next, *part;
1196
1197         m = (struct multipart *) ct->c_ctparams;
1198
1199         /* if only one part, just return */
1200         if (!m->mp_parts || !m->mp_parts->mp_next)
1201                 return;
1202
1203         /* count number of parts */
1204         i = 0;
1205         for (part = m->mp_parts; part; part = part->mp_next)
1206                 i++;
1207
1208         /* allocate array of pointers to the parts */
1209         base = mh_xcalloc(i + 1, sizeof(*base));
1210         bmp = base;
1211
1212         /* point at all the parts */
1213         for (part = m->mp_parts; part; part = part->mp_next)
1214                 *bmp++ = part;
1215         *bmp = NULL;
1216
1217         /* reverse the order of the parts */
1218         next = &m->mp_parts;
1219         for (bmp--; bmp >= base; bmp--) {
1220                 part = *bmp;
1221                 *next = part;
1222                 next = &part->mp_next;
1223         }
1224         *next = NULL;
1225
1226         /* free array of pointers */
1227         mh_free0(&base);
1228 }
1229
1230
1231 /*
1232 ** MESSAGE
1233 */
1234
1235 static int
1236 InitMessage(CT ct)
1237 {
1238         struct k2v *kv;
1239         CI ci = &ct->c_ctinfo;
1240
1241         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1242                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1243                 return NOTOK;
1244         }
1245
1246         /* check for missing subtype */
1247         if (!*ci->ci_subtype)
1248                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1249
1250         /* match subtype */
1251         for (kv = SubMessage; kv->kv_key; kv++)
1252                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1253                         break;
1254         ct->c_subtype = kv->kv_value;
1255
1256         switch (ct->c_subtype) {
1257         case MESSAGE_RFC822:
1258                 break;
1259
1260         case MESSAGE_PARTIAL:
1261                 {
1262                 char **ap, **ep;
1263                 struct partial *p;
1264
1265                 p = mh_xcalloc(1, sizeof(*p));
1266                 ct->c_ctparams = (void *) p;
1267
1268                 /*
1269                 ** scan for parameters "id", "number",
1270                 ** and "total"
1271                 */
1272                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1273                         if (!mh_strcasecmp(*ap, "id")) {
1274                                 p->pm_partid = mh_xstrdup(*ep);
1275                                 continue;
1276                         }
1277                         if (!mh_strcasecmp(*ap, "number")) {
1278                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1279 invalid_param:
1280                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1281                                         return NOTOK;
1282                                 }
1283                                 continue;
1284                         }
1285                         if (!mh_strcasecmp(*ap, "total")) {
1286                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1287                                                 p->pm_maxno < 1)
1288                                         goto invalid_param;
1289                                 continue;
1290                         }
1291                 }
1292
1293                 if (!p->pm_partid || !p->pm_partno
1294                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1295                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1296                         return NOTOK;
1297                 }
1298                 }
1299                 break;
1300
1301         case MESSAGE_EXTERNAL:
1302                 {
1303                 CT p;
1304                 FILE *fp;
1305
1306                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1307                         advise(ct->c_file, "unable to open for reading");
1308                         return NOTOK;
1309                 }
1310
1311                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1312
1313                 if (!(p = get_content(fp, ct->c_file, 0))) {
1314                         ct->c_fp = NULL;
1315                         return NOTOK;
1316                 }
1317
1318                 p->c_fp = NULL;
1319                 p->c_end = p->c_begin;
1320
1321                 fclose(ct->c_fp);
1322                 ct->c_fp = NULL;
1323
1324                 switch (p->c_type) {
1325                 case CT_MULTIPART:
1326                         break;
1327
1328                 case CT_MESSAGE:
1329                         if (p->c_subtype != MESSAGE_RFC822)
1330                                 break;
1331                         /* else fall... */
1332                 default:
1333                         if (p->c_ctinitfnx)
1334                                 (*p->c_ctinitfnx) (p);
1335                         break;
1336                 }
1337                 }
1338                 break;
1339
1340         default:
1341                 break;
1342         }
1343
1344         return OK;
1345 }
1346
1347
1348 /*
1349 ** APPLICATION
1350 */
1351
1352 static int
1353 InitApplication(CT ct)
1354 {
1355         struct k2v *kv;
1356         CI ci = &ct->c_ctinfo;
1357
1358         /* match subtype */
1359         for (kv = SubApplication; kv->kv_key; kv++)
1360                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1361                         break;
1362         ct->c_subtype = kv->kv_value;
1363
1364         return OK;
1365 }
1366
1367
1368 /*
1369 ** TRANSFER ENCODINGS
1370 */
1371
1372 static int
1373 init_encoding(CT ct, OpenCEFunc openfnx)
1374 {
1375         CE ce;
1376
1377         ce = mh_xcalloc(1, sizeof(*ce));
1378
1379         ct->c_cefile     = ce;
1380         ct->c_ceopenfnx  = openfnx;
1381         ct->c_ceclosefnx = close_encoding;
1382         ct->c_cesizefnx  = size_encoding;
1383
1384         return OK;
1385 }
1386
1387
1388 void
1389 close_encoding(CT ct)
1390 {
1391         CE ce;
1392
1393         if (!(ce = ct->c_cefile))
1394                 return;
1395
1396         if (ce->ce_fp) {
1397                 fclose(ce->ce_fp);
1398                 ce->ce_fp = NULL;
1399         }
1400 }
1401
1402
1403 static unsigned long
1404 size_encoding(CT ct)
1405 {
1406         int fd;
1407         unsigned long size;
1408         char *file;
1409         CE ce;
1410         struct stat st;
1411
1412         if (!(ce = ct->c_cefile))
1413                 return (ct->c_end - ct->c_begin);
1414
1415         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1416                 return (long) st.st_size;
1417
1418         if (ce->ce_file) {
1419                 if (stat(ce->ce_file, &st) != NOTOK)
1420                         return (long) st.st_size;
1421                 else
1422                         return 0L;
1423         }
1424
1425         if (ct->c_encoding == CE_EXTERNAL)
1426                 return (ct->c_end - ct->c_begin);
1427
1428         file = NULL;
1429         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1430                 return (ct->c_end - ct->c_begin);
1431
1432         if (fstat(fd, &st) != NOTOK)
1433                 size = (long) st.st_size;
1434         else
1435                 size = 0L;
1436
1437         (*ct->c_ceclosefnx) (ct);
1438         return size;
1439 }
1440
1441
1442 /*
1443 ** BASE64
1444 */
1445
1446 static unsigned char b642nib[0x80] = {
1447         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1448         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1452         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1453         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1454         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1456         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1457         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1458         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1459         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1460         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1461         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1462         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1463 };
1464
1465
1466 static int
1467 InitBase64(CT ct)
1468 {
1469         return init_encoding(ct, openBase64);
1470 }
1471
1472
1473 static int
1474 openBase64(CT ct, char **file)
1475 {
1476         int bitno, cc;
1477         int fd, len, skip, own_ct_fp = 0;
1478         unsigned long bits;
1479         unsigned char value, *b, *b1, *b2, *b3;
1480         unsigned char *cp, *ep;
1481         char buffer[BUFSIZ];
1482         /* sbeck -- handle suffixes */
1483         CI ci;
1484         CE ce;
1485
1486         b  = (unsigned char *) &bits;
1487         b1 = &b[endian > 0 ? 1 : 2];
1488         b2 = &b[endian > 0 ? 2 : 1];
1489         b3 = &b[endian > 0 ? 3 : 0];
1490
1491         ce = ct->c_cefile;
1492         if (ce->ce_fp) {
1493                 fseek(ce->ce_fp, 0L, SEEK_SET);
1494                 goto ready_to_go;
1495         }
1496
1497         if (ce->ce_file) {
1498                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1499                         content_error(ce->ce_file, ct,
1500                                         "unable to fopen for reading");
1501                         return NOTOK;
1502                 }
1503                 goto ready_to_go;
1504         }
1505
1506         if (*file == NULL) {
1507                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1508                 ce->ce_unlink = 1;
1509         } else {
1510                 ce->ce_file = mh_xstrdup(*file);
1511                 ce->ce_unlink = 0;
1512         }
1513
1514         /* sbeck@cise.ufl.edu -- handle suffixes */
1515         ci = &ct->c_ctinfo;
1516         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1517                         invo_name, ci->ci_type, ci->ci_subtype);
1518         cp = context_find(buffer);
1519         if (cp == NULL || *cp == '\0') {
1520                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1521                                 ci->ci_type);
1522                 cp = context_find(buffer);
1523         }
1524         if (cp != NULL && *cp != '\0') {
1525                 if (ce->ce_unlink) {
1526                         /*
1527                         ** Temporary file already exists, so we rename to
1528                         ** version with extension.
1529                         */
1530                         char *file_org = mh_xstrdup(ce->ce_file);
1531                         ce->ce_file = add(cp, ce->ce_file);
1532                         if (rename(file_org, ce->ce_file)) {
1533                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1534                                                 file_org);
1535                         }
1536                         mh_free0(&file_org);
1537
1538                 } else {
1539                         ce->ce_file = add(cp, ce->ce_file);
1540                 }
1541         }
1542
1543         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1544                 content_error(ce->ce_file, ct,
1545                                 "unable to fopen for reading/writing");
1546                 return NOTOK;
1547         }
1548
1549         if ((len = ct->c_end - ct->c_begin) < 0)
1550                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1551
1552         if (!ct->c_fp) {
1553                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1554                         content_error(ct->c_file, ct,
1555                                         "unable to open for reading");
1556                         return NOTOK;
1557                 }
1558                 own_ct_fp = 1;
1559         }
1560
1561         bitno = 18;
1562         bits = 0L;
1563         skip = 0;
1564
1565         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1566         while (len > 0) {
1567                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1568                 case NOTOK:
1569                         content_error(ct->c_file, ct, "error reading from");
1570                         goto clean_up;
1571
1572                 case OK:
1573                         content_error(NULL, ct, "premature eof");
1574                         goto clean_up;
1575
1576                 default:
1577                         if (cc > len)
1578                                 cc = len;
1579                         len -= cc;
1580
1581                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1582                                 switch (*cp) {
1583                                 default:
1584                                         if (isspace(*cp))
1585                                                 break;
1586                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1587                                                 if (debugsw) {
1588                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1589                                                 }
1590                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1591                                                 continue;
1592                                         }
1593
1594                                         bits |= value << bitno;
1595 test_end:
1596                                         if ((bitno -= 6) < 0) {
1597                                                 putc((char) *b1, ce->ce_fp);
1598                                                 if (skip < 2) {
1599                                                         putc((char) *b2, ce->ce_fp);
1600                                                         if (skip < 1) {
1601                                                                 putc((char) *b3, ce->ce_fp);
1602                                                         }
1603                                                 }
1604
1605                                                 if (ferror(ce->ce_fp)) {
1606                                                         content_error(ce->ce_file, ct,
1607                                                                                    "error writing to");
1608                                                         goto clean_up;
1609                                                 }
1610                                                 bitno = 18, bits = 0L, skip = 0;
1611                                         }
1612                                         break;
1613
1614                                 case '=':
1615                                         if (++skip > 3)
1616                                                 goto self_delimiting;
1617                                         goto test_end;
1618                                 }
1619                         }
1620                 }
1621         }
1622
1623         if (bitno != 18) {
1624                 if (debugsw)
1625                         fprintf(stderr, "premature ending (bitno %d)\n",
1626                                         bitno);
1627
1628                 content_error(NULL, ct, "invalid BASE64 encoding");
1629                 goto clean_up;
1630         }
1631
1632 self_delimiting:
1633         fseek(ct->c_fp, 0L, SEEK_SET);
1634
1635         if (fflush(ce->ce_fp)) {
1636                 content_error(ce->ce_file, ct, "error writing to");
1637                 goto clean_up;
1638         }
1639
1640         fseek(ce->ce_fp, 0L, SEEK_SET);
1641
1642 ready_to_go:
1643         *file = ce->ce_file;
1644         if (own_ct_fp) {
1645                 fclose(ct->c_fp);
1646                 ct->c_fp = NULL;
1647         }
1648         return fileno(ce->ce_fp);
1649
1650 clean_up:
1651         free_encoding(ct, 0);
1652         if (own_ct_fp) {
1653                 fclose(ct->c_fp);
1654                 ct->c_fp = NULL;
1655         }
1656         return NOTOK;
1657 }
1658
1659
1660 /*
1661 ** QUOTED PRINTABLE
1662 */
1663
1664 static char hex2nib[0x80] = {
1665         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1672         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1678         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1681 };
1682
1683
1684 static int
1685 InitQuoted(CT ct)
1686 {
1687         return init_encoding(ct, openQuoted);
1688 }
1689
1690
1691 static int
1692 openQuoted(CT ct, char **file)
1693 {
1694         int cc, len, quoted, own_ct_fp = 0;
1695         unsigned char *cp, *ep;
1696         char buffer[BUFSIZ];
1697         unsigned char mask = 0;
1698         CE ce;
1699         /* sbeck -- handle suffixes */
1700         CI ci;
1701
1702         ce = ct->c_cefile;
1703         if (ce->ce_fp) {
1704                 fseek(ce->ce_fp, 0L, SEEK_SET);
1705                 goto ready_to_go;
1706         }
1707
1708         if (ce->ce_file) {
1709                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1710                         content_error(ce->ce_file, ct,
1711                                         "unable to fopen for reading");
1712                         return NOTOK;
1713                 }
1714                 goto ready_to_go;
1715         }
1716
1717         if (*file == NULL) {
1718                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1719                 ce->ce_unlink = 1;
1720         } else {
1721                 ce->ce_file = mh_xstrdup(*file);
1722                 ce->ce_unlink = 0;
1723         }
1724
1725         /* sbeck@cise.ufl.edu -- handle suffixes */
1726         ci = &ct->c_ctinfo;
1727         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1728                         invo_name, ci->ci_type, ci->ci_subtype);
1729         cp = context_find(buffer);
1730         if (cp == NULL || *cp == '\0') {
1731                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1732                                 ci->ci_type);
1733                 cp = context_find(buffer);
1734         }
1735         if (cp != NULL && *cp != '\0') {
1736                 if (ce->ce_unlink) {
1737                         /*
1738                         ** Temporary file already exists, so we rename to
1739                         ** version with extension.
1740                         */
1741                         char *file_org = mh_xstrdup(ce->ce_file);
1742                         ce->ce_file = add(cp, ce->ce_file);
1743                         if (rename(file_org, ce->ce_file)) {
1744                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1745                                                 file_org);
1746                         }
1747                         mh_free0(&file_org);
1748
1749                 } else {
1750                         ce->ce_file = add(cp, ce->ce_file);
1751                 }
1752         }
1753
1754         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1755                 content_error(ce->ce_file, ct,
1756                                 "unable to fopen for reading/writing");
1757                 return NOTOK;
1758         }
1759
1760         if ((len = ct->c_end - ct->c_begin) < 0)
1761                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1762
1763         if (!ct->c_fp) {
1764                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1765                         content_error(ct->c_file, ct,
1766                                         "unable to open for reading");
1767                         return NOTOK;
1768                 }
1769                 own_ct_fp = 1;
1770         }
1771
1772         quoted = 0;
1773
1774         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1775         while (len > 0) {
1776                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1777                         content_error(NULL, ct, "premature eof");
1778                         goto clean_up;
1779                 }
1780
1781                 if ((cc = strlen(buffer)) > len)
1782                         cc = len;
1783                 len -= cc;
1784
1785                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1786                         if (!isspace(*ep))
1787                                 break;
1788                 *++ep = '\n', ep++;
1789
1790                 for (; cp < ep; cp++) {
1791                         if (quoted > 0) {
1792                                 /* in an escape sequence */
1793                                 if (quoted == 1) {
1794                                         /* at byte 1 of an escape sequence */
1795                                         mask = hex2nib[*cp & 0x7f];
1796                                         /* next is byte 2 */
1797                                         quoted = 2;
1798                                 } else {
1799                                         /* at byte 2 of an escape sequence */
1800                                         mask <<= 4;
1801                                         mask |= hex2nib[*cp & 0x7f];
1802                                         putc(mask, ce->ce_fp);
1803                                         if (ferror(ce->ce_fp)) {
1804                                                 content_error(ce->ce_file, ct, "error writing to");
1805                                                 goto clean_up;
1806                                         }
1807                                         /*
1808                                         ** finished escape sequence; next may
1809                                         ** be literal or a new escape sequence
1810                                         */
1811                                         quoted = 0;
1812                                 }
1813                                 /* on to next byte */
1814                                 continue;
1815                         }
1816
1817                         /* not in an escape sequence */
1818                         if (*cp == '=') {
1819                                 /*
1820                                 ** starting an escape sequence,
1821                                 ** or invalid '='?
1822                                 */
1823                                 if (cp + 1 < ep && cp[1] == '\n') {
1824                                         /* "=\n" soft line break, eat the \n */
1825                                         cp++;
1826                                         continue;
1827                                 }
1828                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1829                                         /*
1830                                         ** We don't have 2 bytes left,
1831                                         ** so this is an invalid escape
1832                                         ** sequence; just show the raw bytes
1833                                         ** (below).
1834                                         */
1835                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1836                                         /*
1837                                         ** Next 2 bytes are hex digits,
1838                                         ** making this a valid escape
1839                                         ** sequence; let's decode it (above).
1840                                         */
1841                                         quoted = 1;
1842                                         continue;
1843                                 } else {
1844                                         /*
1845                                         ** One or both of the next 2 is
1846                                         ** out of range, making this an
1847                                         ** invalid escape sequence; just
1848                                         ** show the raw bytes (below).
1849                                         */
1850                                 }
1851                         }
1852
1853                         /* Just show the raw byte. */
1854                         putc(*cp, ce->ce_fp);
1855                         if (ferror(ce->ce_fp)) {
1856                                 content_error(ce->ce_file, ct,
1857                                                 "error writing to");
1858                                 goto clean_up;
1859                         }
1860                 }
1861         }
1862         if (quoted) {
1863                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1864                 goto clean_up;
1865         }
1866
1867         fseek(ct->c_fp, 0L, SEEK_SET);
1868
1869         if (fflush(ce->ce_fp)) {
1870                 content_error(ce->ce_file, ct, "error writing to");
1871                 goto clean_up;
1872         }
1873
1874         fseek(ce->ce_fp, 0L, SEEK_SET);
1875
1876 ready_to_go:
1877         *file = ce->ce_file;
1878         if (own_ct_fp) {
1879                 fclose(ct->c_fp);
1880                 ct->c_fp = NULL;
1881         }
1882         return fileno(ce->ce_fp);
1883
1884 clean_up:
1885         free_encoding(ct, 0);
1886         if (own_ct_fp) {
1887                 fclose(ct->c_fp);
1888                 ct->c_fp = NULL;
1889         }
1890         return NOTOK;
1891 }
1892
1893
1894 /*
1895 ** 7BIT
1896 */
1897
1898 static int
1899 Init7Bit(CT ct)
1900 {
1901         if (init_encoding(ct, open7Bit) == NOTOK)
1902                 return NOTOK;
1903
1904         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1905         return OK;
1906 }
1907
1908
1909 int
1910 open7Bit(CT ct, char **file)
1911 {
1912         int cc, fd, len, own_ct_fp = 0;
1913         char buffer[BUFSIZ];
1914         /* sbeck -- handle suffixes */
1915         char *cp;
1916         CI ci;
1917         CE ce;
1918
1919         ce = ct->c_cefile;
1920         if (ce->ce_fp) {
1921                 fseek(ce->ce_fp, 0L, SEEK_SET);
1922                 goto ready_to_go;
1923         }
1924
1925         if (ce->ce_file) {
1926                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1927                         content_error(ce->ce_file, ct,
1928                                         "unable to fopen for reading");
1929                         return NOTOK;
1930                 }
1931                 goto ready_to_go;
1932         }
1933
1934         if (*file == NULL) {
1935                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1936                 ce->ce_unlink = 1;
1937         } else {
1938                 ce->ce_file = mh_xstrdup(*file);
1939                 ce->ce_unlink = 0;
1940         }
1941
1942         /* sbeck@cise.ufl.edu -- handle suffixes */
1943         ci = &ct->c_ctinfo;
1944         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1945                         invo_name, ci->ci_type, ci->ci_subtype);
1946         cp = context_find(buffer);
1947         if (cp == NULL || *cp == '\0') {
1948                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1949                                 ci->ci_type);
1950                 cp = context_find(buffer);
1951         }
1952         if (cp != NULL && *cp != '\0') {
1953                 if (ce->ce_unlink) {
1954                         /*
1955                         ** Temporary file already exists, so we rename to
1956                         ** version with extension.
1957                         */
1958                         char *file_org = mh_xstrdup(ce->ce_file);
1959                         ce->ce_file = add(cp, ce->ce_file);
1960                         if (rename(file_org, ce->ce_file)) {
1961                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1962                                                 file_org);
1963                         }
1964                         mh_free0(&file_org);
1965
1966                 } else {
1967                         ce->ce_file = add(cp, ce->ce_file);
1968                 }
1969         }
1970
1971         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1972                 content_error(ce->ce_file, ct,
1973                                 "unable to fopen for reading/writing");
1974                 return NOTOK;
1975         }
1976
1977         if (ct->c_type == CT_MULTIPART) {
1978                 char **ap, **ep;
1979                 CI ci = &ct->c_ctinfo;
1980
1981                 len = 0;
1982                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1983                                 ci->ci_subtype);
1984                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1985                                 strlen(ci->ci_subtype);
1986                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1987                         putc(';', ce->ce_fp);
1988                         len++;
1989
1990                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1991                                         *ap, *ep);
1992
1993                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1994                                 fputs("\n\t", ce->ce_fp);
1995                                 len = 8;
1996                         } else {
1997                                 putc(' ', ce->ce_fp);
1998                                 len++;
1999                         }
2000                         fprintf(ce->ce_fp, "%s", buffer);
2001                         len += cc;
2002                 }
2003
2004                 if (ci->ci_comment) {
2005                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2006                                                 >= CPERLIN) {
2007                                 fputs("\n\t", ce->ce_fp);
2008                                 len = 8;
2009                         } else {
2010                                 putc(' ', ce->ce_fp);
2011                                 len++;
2012                         }
2013                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2014                         len += cc;
2015                 }
2016                 fprintf(ce->ce_fp, "\n");
2017                 if (ct->c_id)
2018                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2019                 if (ct->c_descr)
2020                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2021                 if (ct->c_dispo)
2022                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2023                 fprintf(ce->ce_fp, "\n");
2024         }
2025
2026         if ((len = ct->c_end - ct->c_begin) < 0)
2027                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2028
2029         if (!ct->c_fp) {
2030                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2031                         content_error(ct->c_file, ct,
2032                                         "unable to open for reading");
2033                         return NOTOK;
2034                 }
2035                 own_ct_fp = 1;
2036         }
2037
2038         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2039         while (len > 0)
2040                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2041                 case NOTOK:
2042                         content_error(ct->c_file, ct, "error reading from");
2043                         goto clean_up;
2044
2045                 case OK:
2046                         content_error(NULL, ct, "premature eof");
2047                         goto clean_up;
2048
2049                 default:
2050                         if (cc > len)
2051                                 cc = len;
2052                         len -= cc;
2053
2054                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2055                         if (ferror(ce->ce_fp)) {
2056                                 content_error(ce->ce_file, ct,
2057                                                 "error writing to");
2058                                 goto clean_up;
2059                         }
2060                 }
2061
2062         fseek(ct->c_fp, 0L, SEEK_SET);
2063
2064         if (fflush(ce->ce_fp)) {
2065                 content_error(ce->ce_file, ct, "error writing to");
2066                 goto clean_up;
2067         }
2068
2069         fseek(ce->ce_fp, 0L, SEEK_SET);
2070
2071 ready_to_go:
2072         *file = ce->ce_file;
2073         if (own_ct_fp) {
2074                 fclose(ct->c_fp);
2075                 ct->c_fp = NULL;
2076         }
2077         return fileno(ce->ce_fp);
2078
2079 clean_up:
2080         free_encoding(ct, 0);
2081         if (own_ct_fp) {
2082                 fclose(ct->c_fp);
2083                 ct->c_fp = NULL;
2084         }
2085         return NOTOK;
2086 }