Factor trim format function out
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = mh_xstrdup(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD2;;) {
252                 switch (state = m_getfld2(state, &f, in)) {
253                 case LENERR2:
254                         state = FLD2;
255                         /* FALL */
256                 case FLD2:
257                         compnum++;
258
259                         /* add the header data to the list */
260                         add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
261
262                         ct->c_begin = ftell(in) + 1;
263                         continue;
264
265                 case BODY2:
266                         ct->c_begin = ftell(in) - strlen(f.value);
267                         break;
268
269                 case FILEEOF2:
270                         ct->c_begin = ftell(in);
271                         break;
272
273                 case FMTERR2:
274                         advise(NULL, "message format error in component #%d", compnum);
275                         state = FLD2;
276                         continue;
277
278                 case IOERR2:
279                         adios(EX_IOERR, "m_getfld2", "io error");
280
281                 default:
282                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
283                 }
284                 break;
285         }
286
287         /*
288         ** Read the content headers.  We will parse the
289         ** MIME related header fields into their various
290         ** structures and set internal flags related to
291         ** content type/subtype, etc.
292         */
293
294         hp = ct->c_first_hf;  /* start at first header field */
295         while (hp) {
296                 /* Get MIME-Version field */
297                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
298                         int ucmp;
299                         char c;
300                         unsigned char *cp, *dp;
301
302                         if (ct->c_vrsn) {
303                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
304                                 goto next_header;
305                         }
306                         ct->c_vrsn = mh_xstrdup(hp->value);
307
308                         /* Now, cleanup this field */
309                         cp = ct->c_vrsn;
310
311                         while (isspace(*cp))
312                                 cp++;
313                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
314                                 *dp++ = ' ';
315                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
316                                 if (!isspace(*dp))
317                                         break;
318                         *++dp = '\0';
319                         if (debugsw)
320                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
321
322                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
323                                 goto out;
324
325                         for (dp = cp; istoken(*dp); dp++)
326                                 continue;
327                         c = *dp;
328                         *dp = '\0';
329                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
330                         *dp = c;
331                         if (!ucmp) {
332                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
333                         }
334
335                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
336                         /* Get Content-Type field */
337                         struct str2init *s2i;
338                         CI ci = &ct->c_ctinfo;
339
340                         /* Check if we've already seen a Content-Type header */
341                         if (ct->c_ctline) {
342                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
343                                 goto next_header;
344                         }
345
346                         /* Parse the Content-Type field */
347                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
348                                 goto out;
349
350                         /*
351                         ** Set the Init function and the internal
352                         ** flag for this content type.
353                         */
354                         for (s2i = str2cts; s2i->si_key; s2i++)
355                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
356                                         break;
357                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
358                                 s2i++;
359                         ct->c_type = s2i->si_val;
360                         ct->c_ctinitfnx = s2i->si_init;
361
362                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
363                         /* Get Content-Transfer-Encoding field */
364                         char c;
365                         unsigned char *cp, *dp;
366                         struct str2init *s2i;
367
368                         /*
369                         ** Check if we've already seen the
370                         ** Content-Transfer-Encoding field
371                         */
372                         if (ct->c_celine) {
373                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
374                                 goto next_header;
375                         }
376
377                         /* get copy of this field */
378                         ct->c_celine = cp = mh_xstrdup(hp->value);
379
380                         while (isspace(*cp))
381                                 cp++;
382                         for (dp = cp; istoken(*dp); dp++)
383                                 continue;
384                         c = *dp;
385                         *dp = '\0';
386
387                         /*
388                         ** Find the internal flag and Init function
389                         ** for this transfer encoding.
390                         */
391                         for (s2i = str2ces; s2i->si_key; s2i++)
392                                 if (!mh_strcasecmp(cp, s2i->si_key))
393                                         break;
394                         if (!s2i->si_key && !uprf(cp, "X-"))
395                                 s2i++;
396                         *dp = c;
397                         ct->c_encoding = s2i->si_val;
398
399                         /* Call the Init function for this encoding */
400                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
401                                 goto out;
402
403                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
404                         /* Get Content-ID field */
405                         ct->c_id = add(hp->value, ct->c_id);
406
407                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
408                         /* Get Content-Description field */
409                         ct->c_descr = add(hp->value, ct->c_descr);
410
411                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
412                         /* Get Content-Disposition field */
413                         ct->c_dispo = add(hp->value, ct->c_dispo);
414                 }
415
416 next_header:
417                 hp = hp->next;  /* next header field */
418         }
419
420         /*
421         ** Check if we saw a Content-Type field.
422         ** If not, then assign a default value for
423         ** it, and the Init function.
424         */
425         if (!ct->c_ctline) {
426                 /*
427                 ** If we are inside a multipart/digest message,
428                 ** so default type is message/rfc822
429                 */
430                 if (toplevel < 0) {
431                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
432                                 goto out;
433                         ct->c_type = CT_MESSAGE;
434                         ct->c_ctinitfnx = InitMessage;
435                 } else {
436                         /*
437                         ** Else default type is text/plain
438                         */
439                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
440                                 goto out;
441                         ct->c_type = CT_TEXT;
442                         ct->c_ctinitfnx = InitText;
443                 }
444         }
445
446         /* Use default Transfer-Encoding, if necessary */
447         if (!ct->c_celine) {
448                 ct->c_encoding = CE_7BIT;
449                 Init7Bit(ct);
450         }
451
452         return ct;
453
454 out:
455         free_content(ct);
456         return NULL;
457 }
458
459
460 /*
461 ** small routine to add header field to list
462 */
463
464 int
465 add_header(CT ct, char *name, char *value)
466 {
467         HF hp;
468
469         /* allocate header field structure */
470         hp = mh_xcalloc(1, sizeof(*hp));
471
472         /* link data into header structure */
473         hp->name = name;
474         hp->value = value;
475         hp->next = NULL;
476
477         /* link header structure into the list */
478         if (ct->c_first_hf == NULL) {
479                 ct->c_first_hf = hp;  /* this is the first */
480                 ct->c_last_hf = hp;
481         } else {
482                 ct->c_last_hf->next = hp;  /* add it to the end */
483                 ct->c_last_hf = hp;
484         }
485
486         return 0;
487 }
488
489
490 /*
491 ** Make sure that buf contains at least one appearance of name,
492 ** followed by =.  If not, insert both name and value, just after
493 ** first semicolon, if any.  Note that name should not contain a
494 ** trailing =.  And quotes will be added around the value.  Typical
495 ** usage:  make sure that a Content-Disposition header contains
496 ** filename="foo".  If it doesn't and value does, use value from
497 ** that.
498 */
499 static char *
500 incl_name_value(unsigned char *buf, char *name, char *value) {
501         char *newbuf = buf;
502
503         /* Assume that name is non-null. */
504         if (buf && value) {
505                 char *name_plus_equal = concat(name, "=", NULL);
506
507                 if (!strstr(buf, name_plus_equal)) {
508                         char *insertion;
509                         unsigned char *cp;
510                         char *prefix, *suffix;
511
512                         /* Trim trailing space, esp. newline. */
513                         for (cp = &buf[strlen(buf) - 1];
514                                          cp >= buf && isspace(*cp); --cp) {
515                                 *cp = '\0';
516                         }
517
518                         insertion = concat("; ", name, "=", "\"", value, "\"",
519                                         NULL);
520
521                         /*
522                         ** Insert at first semicolon, if any.
523                         ** If none, append to end.
524                         */
525                         prefix = mh_xstrdup(buf);
526                         if ((cp = strchr(prefix, ';'))) {
527                                 suffix = concat(cp, NULL);
528                                 *cp = '\0';
529                                 newbuf = concat(prefix, insertion, suffix,
530                                                 "\n", NULL);
531                                 mh_free0(&suffix);
532                         } else {
533                                 /* Append to end. */
534                                 newbuf = concat(buf, insertion, "\n", NULL);
535                         }
536
537                         mh_free0(&prefix);
538                         mh_free0(&insertion);
539                         mh_free0(&buf);
540                 }
541
542                 mh_free0(&name_plus_equal);
543         }
544
545         return newbuf;
546 }
547
548 /*
549 ** Extract just name_suffix="foo", if any, from value.  If there isn't
550 ** one, return the entire value.  Note that, for example, a name_suffix
551 ** of name will match filename="foo", and return foo.
552 */
553 char *
554 extract_name_value(char *name_suffix, char *value) {
555         char *extracted_name_value = value;
556         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
557         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
558         char *cp;
559
560         mh_free0(&name_suffix_plus_quote);
561         if (name_suffix_equals) {
562                 char *name_suffix_begin;
563
564                 /* Find first \". */
565                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
566                         ;
567                 name_suffix_begin = ++cp;
568                 /* Find second \". */
569                 for (; *cp != '"'; ++cp)
570                         ;
571
572                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
573                 memcpy(extracted_name_value, name_suffix_begin,
574                                 cp - name_suffix_begin);
575                 extracted_name_value[cp - name_suffix_begin] = '\0';
576         }
577
578         return extracted_name_value;
579 }
580
581 /*
582 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
583 ** directives.  Fills in the information of the CTinfo structure.
584 */
585 int
586 get_ctinfo(unsigned char *cp, CT ct, int magic)
587 {
588         int i;
589         unsigned char *dp;
590         char **ap, **ep;
591         char c;
592         CI ci;
593
594         ci = &ct->c_ctinfo;
595         i = strlen(invo_name) + 2;
596
597         /* store copy of Content-Type line */
598         cp = ct->c_ctline = mh_xstrdup(cp);
599
600         while (isspace(*cp))  /* trim leading spaces */
601                 cp++;
602
603         /* change newlines to spaces */
604         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
605                 *dp++ = ' ';
606
607         /* trim trailing spaces */
608         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
609                 if (!isspace(*dp))
610                         break;
611         *++dp = '\0';
612
613         if (debugsw)
614                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
615
616         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
617                 return NOTOK;
618
619         for (dp = cp; istoken(*dp); dp++)
620                 continue;
621         c = *dp, *dp = '\0';
622         ci->ci_type = mh_xstrdup(cp);  /* store content type */
623         *dp = c, cp = dp;
624
625         if (!*ci->ci_type) {
626                 advise(NULL, "invalid %s: field in message %s (empty type)",
627                                 TYPE_FIELD, ct->c_file);
628                 return NOTOK;
629         }
630
631         /* down case the content type string */
632         for (dp = ci->ci_type; *dp; dp++)
633                 if (isalpha(*dp) && isupper(*dp))
634                         *dp = tolower(*dp);
635
636         while (isspace(*cp))
637                 cp++;
638
639         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
640                 return NOTOK;
641
642         if (*cp != '/') {
643                 if (!magic)
644                         ci->ci_subtype = mh_xstrdup("");
645                 goto magic_skip;
646         }
647
648         cp++;
649         while (isspace(*cp))
650                 cp++;
651
652         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
653                 return NOTOK;
654
655         for (dp = cp; istoken(*dp); dp++)
656                 continue;
657         c = *dp, *dp = '\0';
658         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
659         *dp = c, cp = dp;
660
661         if (!*ci->ci_subtype) {
662                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
663                 return NOTOK;
664         }
665
666         /* down case the content subtype string */
667         for (dp = ci->ci_subtype; *dp; dp++)
668                 if (isalpha(*dp) && isupper(*dp))
669                         *dp = tolower(*dp);
670
671 magic_skip:
672         while (isspace(*cp))
673                 cp++;
674
675         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
676                 return NOTOK;
677
678         /*
679         ** Parse attribute/value pairs given with Content-Type
680         */
681         ep = (ap = ci->ci_attrs) + NPARMS;
682         while (*cp == ';') {
683                 char *vp;
684                 unsigned char *up;
685
686                 if (ap >= ep) {
687                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
688                         return NOTOK;
689                 }
690
691                 cp++;
692                 while (isspace(*cp))
693                         cp++;
694
695                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
696                         return NOTOK;
697
698                 if (*cp == 0) {
699                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
700                         return OK;
701                 }
702
703                 /* down case the attribute name */
704                 for (dp = cp; istoken(*dp); dp++)
705                         if (isalpha(*dp) && isupper(*dp))
706                                 *dp = tolower(*dp);
707
708                 for (up = dp; isspace(*dp);)
709                         dp++;
710                 if (dp == cp || *dp != '=') {
711                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
712                         return NOTOK;
713                 }
714
715                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
716                 *vp = '\0';
717                 for (dp++; isspace(*dp);)
718                         dp++;
719
720                 /* now add the attribute value */
721                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
722
723                 if (*dp == '"') {
724                         for (cp = ++dp, dp = vp;;) {
725                                 switch (c = *cp++) {
726                                 case '\0':
727 bad_quote:
728                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
729                                         return NOTOK;
730
731                                 case '\\':
732                                         *dp++ = c;
733                                         if ((c = *cp++) == '\0')
734                                                 goto bad_quote;
735                                         /* else fall... */
736
737                                 default:
738                                         *dp++ = c;
739                                         continue;
740
741                                 case '"':
742                                         *dp = '\0';
743                                         break;
744                                 }
745                                 break;
746                         }
747                 } else {
748                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
749                                 continue;
750                         *dp = '\0';
751                 }
752                 if (!*vp) {
753                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
754                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
755                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
756                         continue;
757                 }
758                 ap++;
759
760                 while (isspace(*cp))
761                         cp++;
762
763                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
764                         return NOTOK;
765         }
766
767         /*
768         ** Get any <Content-Id> given in buffer
769         */
770         if (magic && *cp == '<') {
771                 if (ct->c_id) {
772                         mh_free0(&(ct->c_id));
773                 }
774                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
775                         advise(NULL, "invalid ID in message %s", ct->c_file);
776                         return NOTOK;
777                 }
778                 c = *dp;
779                 *dp = '\0';
780                 if (*ct->c_id)
781                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
782                 else
783                         ct->c_id = NULL;
784                 *dp++ = c;
785                 cp = dp;
786
787                 while (isspace(*cp))
788                         cp++;
789         }
790
791         /*
792         ** Get any [Content-Description] given in buffer.
793         */
794         if (magic && *cp == '[') {
795                 ct->c_descr = ++cp;
796                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
797                         if (*dp == ']')
798                                 break;
799                 if (dp < cp) {
800                         advise(NULL, "invalid description in message %s",
801                                         ct->c_file);
802                         ct->c_descr = NULL;
803                         return NOTOK;
804                 }
805
806                 c = *dp;
807                 *dp = '\0';
808                 if (*ct->c_descr)
809                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
810                 else
811                         ct->c_descr = NULL;
812                 *dp++ = c;
813                 cp = dp;
814
815                 while (isspace(*cp))
816                         cp++;
817         }
818
819         /*
820         ** Get any {Content-Disposition} given in buffer.
821         */
822         if (magic && *cp == '{') {
823                 ct->c_dispo = ++cp;
824                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
825                         if (*dp == '}')
826                                 break;
827                 if (dp < cp) {
828                         advise(NULL, "invalid disposition in message %s",
829                                         ct->c_file);
830                         ct->c_dispo = NULL;
831                         return NOTOK;
832                 }
833
834                 c = *dp;
835                 *dp = '\0';
836                 if (*ct->c_dispo)
837                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
838                 else
839                         ct->c_dispo = NULL;
840                 *dp++ = c;
841                 cp = dp;
842
843                 while (isspace(*cp))
844                         cp++;
845         }
846
847         /*
848         ** Check if anything is left over
849         */
850         if (*cp) {
851                 if (magic) {
852                         ci->ci_magic = mh_xstrdup(cp);
853
854                         /*
855                         ** If there is a Content-Disposition header and
856                         ** it doesn't have a *filename=, extract it from
857                         ** the magic contents.  The mhbasename call skips
858                         ** any leading directory components.
859                         */
860                         if (ct->c_dispo)
861                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
862                         } else
863                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
864         }
865
866         return OK;
867 }
868
869
870 static int
871 get_comment(CT ct, unsigned char **ap, int istype)
872 {
873         int i;
874         char *bp;
875         unsigned char *cp;
876         char c, buffer[BUFSIZ], *dp;
877         CI ci;
878
879         ci = &ct->c_ctinfo;
880         cp = *ap;
881         bp = buffer;
882         cp++;
883
884         for (i = 0;;) {
885                 switch (c = *cp++) {
886                 case '\0':
887 invalid:
888                 advise(NULL, "invalid comment in message %s's %s: field",
889                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
890                 return NOTOK;
891
892                 case '\\':
893                         *bp++ = c;
894                         if ((c = *cp++) == '\0')
895                                 goto invalid;
896                         *bp++ = c;
897                         continue;
898
899                 case '(':
900                         i++;
901                         /* and fall... */
902                 default:
903                         *bp++ = c;
904                         continue;
905
906                 case ')':
907                         if (--i < 0)
908                                 break;
909                         *bp++ = c;
910                         continue;
911                 }
912                 break;
913         }
914         *bp = '\0';
915
916         if (istype) {
917                 if ((dp = ci->ci_comment)) {
918                         ci->ci_comment = concat(dp, " ", buffer, NULL);
919                         mh_free0(&dp);
920                 } else {
921                         ci->ci_comment = mh_xstrdup(buffer);
922                 }
923         }
924
925         while (isspace(*cp))
926                 cp++;
927
928         *ap = cp;
929         return OK;
930 }
931
932
933 /*
934 ** CONTENTS
935 **
936 ** Handles content types audio, image, and video.
937 ** There's not much to do right here.
938 */
939
940 static int
941 InitGeneric(CT ct)
942 {
943         return OK;  /* not much to do here */
944 }
945
946
947 /*
948 ** TEXT
949 */
950
951 static int
952 InitText(CT ct)
953 {
954         char **ap, **ep;
955         struct k2v *kv;
956         struct text *t;
957         CI ci = &ct->c_ctinfo;
958
959         /* check for missing subtype */
960         if (!*ci->ci_subtype)
961                 ci->ci_subtype = add("plain", ci->ci_subtype);
962
963         /* match subtype */
964         for (kv = SubText; kv->kv_key; kv++)
965                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
966                         break;
967         ct->c_subtype = kv->kv_value;
968
969         /* allocate text character set structure */
970         t = mh_xcalloc(1, sizeof(*t));
971         ct->c_ctparams = (void *) t;
972
973         /* scan for charset parameter */
974         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
975                 if (!mh_strcasecmp(*ap, "charset"))
976                         break;
977
978         /* check if content specified a character set */
979         if (*ap) {
980                 /* store its name */
981                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
982                 /* match character set or set to CHARSET_UNKNOWN */
983                 for (kv = Charset; kv->kv_key; kv++) {
984                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
985                                 break;
986                         }
987                 }
988                 t->tx_charset = kv->kv_value;
989         } else {
990                 t->tx_charset = CHARSET_UNSPECIFIED;
991         }
992
993         return OK;
994 }
995
996
997 /*
998 ** MULTIPART
999 */
1000
1001 static int
1002 InitMultiPart(CT ct)
1003 {
1004         int inout;
1005         long last, pos;
1006         unsigned char *cp, *dp;
1007         char **ap, **ep;
1008         char *bp, buffer[BUFSIZ];
1009         struct multipart *m;
1010         struct k2v *kv;
1011         struct part *part, **next;
1012         CI ci = &ct->c_ctinfo;
1013         CT p;
1014         FILE *fp;
1015
1016         /*
1017         ** The encoding for multipart messages must be either
1018         ** 7bit, 8bit, or binary (per RFC2045).
1019         */
1020         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1021                 && ct->c_encoding != CE_BINARY) {
1022                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1023                 ct->c_encoding = CE_7BIT;
1024         }
1025
1026         /* match subtype */
1027         for (kv = SubMultiPart; kv->kv_key; kv++)
1028                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1029                         break;
1030         ct->c_subtype = kv->kv_value;
1031
1032         /*
1033         ** Check for "boundary" parameter, which is
1034         ** required for multipart messages.
1035         */
1036         bp = 0;
1037         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1038                 if (!mh_strcasecmp(*ap, "boundary")) {
1039                         bp = *ep;
1040                         break;
1041                 }
1042         }
1043
1044         /* complain if boundary parameter is missing */
1045         if (!*ap) {
1046                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1047                 return NOTOK;
1048         }
1049
1050         /* allocate primary structure for multipart info */
1051         m = mh_xcalloc(1, sizeof(*m));
1052         ct->c_ctparams = (void *) m;
1053
1054         /* check if boundary parameter contains only whitespace characters */
1055         for (cp = bp; isspace(*cp); cp++)
1056                 continue;
1057         if (!*cp) {
1058                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1059                 return NOTOK;
1060         }
1061
1062         /* remove trailing whitespace from boundary parameter */
1063         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1064                 if (!isspace(*dp))
1065                         break;
1066         *++dp = '\0';
1067
1068         /* record boundary separators */
1069         m->mp_start = concat(bp, "\n", NULL);
1070         m->mp_stop = concat(bp, "--\n", NULL);
1071
1072         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1073                 advise(ct->c_file, "unable to open for reading");
1074                 return NOTOK;
1075         }
1076
1077         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1078         last = ct->c_end;
1079         next = &m->mp_parts;
1080         part = NULL;
1081         inout = 1;
1082
1083         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1084                 if (pos > last)
1085                         break;
1086
1087                 pos += strlen(buffer);
1088                 if (buffer[0] != '-' || buffer[1] != '-')
1089                         continue;
1090                 if (inout) {
1091                         if (strcmp(buffer + 2, m->mp_start)!=0)
1092                                 continue;
1093 next_part:
1094                         part = mh_xcalloc(1, sizeof(*part));
1095                         *next = part;
1096                         next = &part->mp_next;
1097
1098                         if (!(p = get_content(fp, ct->c_file,
1099                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1100                                 ct->c_fp = NULL;
1101                                 return NOTOK;
1102                         }
1103                         p->c_fp = NULL;
1104                         part->mp_part = p;
1105                         pos = p->c_begin;
1106                         fseek(fp, pos, SEEK_SET);
1107                         inout = 0;
1108                 } else {
1109                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1110                                 inout = 1;
1111 end_part:
1112                                 p = part->mp_part;
1113                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1114                                 if (p->c_end < p->c_begin)
1115                                         p->c_begin = p->c_end;
1116                                 if (inout)
1117                                         goto next_part;
1118                                 goto last_part;
1119                         } else {
1120                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1121                                         goto end_part;
1122                         }
1123                 }
1124         }
1125
1126         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1127         if (!inout && part) {
1128                 p = part->mp_part;
1129                 p->c_end = ct->c_end;
1130
1131                 if (p->c_begin >= p->c_end) {
1132                         for (next = &m->mp_parts; *next != part;
1133                                 next = &((*next)->mp_next))
1134                                 continue;
1135                         *next = NULL;
1136                         free_content(p);
1137                         mh_free0(&part);
1138                 }
1139         }
1140
1141 last_part:
1142         /* reverse the order of the parts for multipart/alternative */
1143         if (ct->c_subtype == MULTI_ALTERNATE)
1144                 reverse_parts(ct);
1145
1146         /*
1147         ** label all subparts with part number, and
1148         ** then initialize the content of the subpart.
1149         */
1150         {
1151                 int partnum;
1152                 char *pp;
1153                 char partnam[BUFSIZ];
1154
1155                 if (ct->c_partno) {
1156                         snprintf(partnam, sizeof(partnam), "%s.",
1157                                         ct->c_partno);
1158                         pp = partnam + strlen(partnam);
1159                 } else {
1160                         pp = partnam;
1161                 }
1162
1163                 for (part = m->mp_parts, partnum = 1; part;
1164                         part = part->mp_next, partnum++) {
1165                         p = part->mp_part;
1166
1167                         sprintf(pp, "%d", partnum);
1168                         p->c_partno = mh_xstrdup(partnam);
1169
1170                         /* initialize the content of the subparts */
1171                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1172                                 fclose(ct->c_fp);
1173                                 ct->c_fp = NULL;
1174                                 return NOTOK;
1175                         }
1176                 }
1177         }
1178
1179         fclose(ct->c_fp);
1180         ct->c_fp = NULL;
1181         return OK;
1182 }
1183
1184
1185 /*
1186 ** reverse the order of the parts of a multipart
1187 */
1188
1189 static void
1190 reverse_parts(CT ct)
1191 {
1192         int i;
1193         struct multipart *m;
1194         struct part **base, **bmp, **next, *part;
1195
1196         m = (struct multipart *) ct->c_ctparams;
1197
1198         /* if only one part, just return */
1199         if (!m->mp_parts || !m->mp_parts->mp_next)
1200                 return;
1201
1202         /* count number of parts */
1203         i = 0;
1204         for (part = m->mp_parts; part; part = part->mp_next)
1205                 i++;
1206
1207         /* allocate array of pointers to the parts */
1208         base = mh_xcalloc(i + 1, sizeof(*base));
1209         bmp = base;
1210
1211         /* point at all the parts */
1212         for (part = m->mp_parts; part; part = part->mp_next)
1213                 *bmp++ = part;
1214         *bmp = NULL;
1215
1216         /* reverse the order of the parts */
1217         next = &m->mp_parts;
1218         for (bmp--; bmp >= base; bmp--) {
1219                 part = *bmp;
1220                 *next = part;
1221                 next = &part->mp_next;
1222         }
1223         *next = NULL;
1224
1225         /* free array of pointers */
1226         mh_free0(&base);
1227 }
1228
1229
1230 /*
1231 ** MESSAGE
1232 */
1233
1234 static int
1235 InitMessage(CT ct)
1236 {
1237         struct k2v *kv;
1238         CI ci = &ct->c_ctinfo;
1239
1240         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1241                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1242                 return NOTOK;
1243         }
1244
1245         /* check for missing subtype */
1246         if (!*ci->ci_subtype)
1247                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1248
1249         /* match subtype */
1250         for (kv = SubMessage; kv->kv_key; kv++)
1251                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1252                         break;
1253         ct->c_subtype = kv->kv_value;
1254
1255         switch (ct->c_subtype) {
1256         case MESSAGE_RFC822:
1257                 break;
1258
1259         case MESSAGE_PARTIAL:
1260                 {
1261                 char **ap, **ep;
1262                 struct partial *p;
1263
1264                 p = mh_xcalloc(1, sizeof(*p));
1265                 ct->c_ctparams = (void *) p;
1266
1267                 /*
1268                 ** scan for parameters "id", "number",
1269                 ** and "total"
1270                 */
1271                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1272                         if (!mh_strcasecmp(*ap, "id")) {
1273                                 p->pm_partid = mh_xstrdup(*ep);
1274                                 continue;
1275                         }
1276                         if (!mh_strcasecmp(*ap, "number")) {
1277                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1278 invalid_param:
1279                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1280                                         return NOTOK;
1281                                 }
1282                                 continue;
1283                         }
1284                         if (!mh_strcasecmp(*ap, "total")) {
1285                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1286                                                 p->pm_maxno < 1)
1287                                         goto invalid_param;
1288                                 continue;
1289                         }
1290                 }
1291
1292                 if (!p->pm_partid || !p->pm_partno
1293                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1294                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1295                         return NOTOK;
1296                 }
1297                 }
1298                 break;
1299
1300         case MESSAGE_EXTERNAL:
1301                 {
1302                 CT p;
1303                 FILE *fp;
1304
1305                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1306                         advise(ct->c_file, "unable to open for reading");
1307                         return NOTOK;
1308                 }
1309
1310                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1311
1312                 if (!(p = get_content(fp, ct->c_file, 0))) {
1313                         ct->c_fp = NULL;
1314                         return NOTOK;
1315                 }
1316
1317                 p->c_fp = NULL;
1318                 p->c_end = p->c_begin;
1319
1320                 fclose(ct->c_fp);
1321                 ct->c_fp = NULL;
1322
1323                 switch (p->c_type) {
1324                 case CT_MULTIPART:
1325                         break;
1326
1327                 case CT_MESSAGE:
1328                         if (p->c_subtype != MESSAGE_RFC822)
1329                                 break;
1330                         /* else fall... */
1331                 default:
1332                         if (p->c_ctinitfnx)
1333                                 (*p->c_ctinitfnx) (p);
1334                         break;
1335                 }
1336                 }
1337                 break;
1338
1339         default:
1340                 break;
1341         }
1342
1343         return OK;
1344 }
1345
1346
1347 /*
1348 ** APPLICATION
1349 */
1350
1351 static int
1352 InitApplication(CT ct)
1353 {
1354         struct k2v *kv;
1355         CI ci = &ct->c_ctinfo;
1356
1357         /* match subtype */
1358         for (kv = SubApplication; kv->kv_key; kv++)
1359                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1360                         break;
1361         ct->c_subtype = kv->kv_value;
1362
1363         return OK;
1364 }
1365
1366
1367 /*
1368 ** TRANSFER ENCODINGS
1369 */
1370
1371 static int
1372 init_encoding(CT ct, OpenCEFunc openfnx)
1373 {
1374         CE ce;
1375
1376         ce = mh_xcalloc(1, sizeof(*ce));
1377
1378         ct->c_cefile     = ce;
1379         ct->c_ceopenfnx  = openfnx;
1380         ct->c_ceclosefnx = close_encoding;
1381         ct->c_cesizefnx  = size_encoding;
1382
1383         return OK;
1384 }
1385
1386
1387 void
1388 close_encoding(CT ct)
1389 {
1390         CE ce;
1391
1392         if (!(ce = ct->c_cefile))
1393                 return;
1394
1395         if (ce->ce_fp) {
1396                 fclose(ce->ce_fp);
1397                 ce->ce_fp = NULL;
1398         }
1399 }
1400
1401
1402 static unsigned long
1403 size_encoding(CT ct)
1404 {
1405         int fd;
1406         unsigned long size;
1407         char *file;
1408         CE ce;
1409         struct stat st;
1410
1411         if (!(ce = ct->c_cefile))
1412                 return (ct->c_end - ct->c_begin);
1413
1414         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1415                 return (long) st.st_size;
1416
1417         if (ce->ce_file) {
1418                 if (stat(ce->ce_file, &st) != NOTOK)
1419                         return (long) st.st_size;
1420                 else
1421                         return 0L;
1422         }
1423
1424         if (ct->c_encoding == CE_EXTERNAL)
1425                 return (ct->c_end - ct->c_begin);
1426
1427         file = NULL;
1428         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1429                 return (ct->c_end - ct->c_begin);
1430
1431         if (fstat(fd, &st) != NOTOK)
1432                 size = (long) st.st_size;
1433         else
1434                 size = 0L;
1435
1436         (*ct->c_ceclosefnx) (ct);
1437         return size;
1438 }
1439
1440
1441 /*
1442 ** BASE64
1443 */
1444
1445 static unsigned char b642nib[0x80] = {
1446         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1447         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1448         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1452         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1453         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1454         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1455         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1456         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1457         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1458         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1459         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1460         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1461         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1462 };
1463
1464
1465 static int
1466 InitBase64(CT ct)
1467 {
1468         return init_encoding(ct, openBase64);
1469 }
1470
1471
1472 static int
1473 openBase64(CT ct, char **file)
1474 {
1475         int bitno, cc;
1476         int fd, len, skip, own_ct_fp = 0;
1477         unsigned long bits;
1478         unsigned char value, *b, *b1, *b2, *b3;
1479         unsigned char *cp, *ep;
1480         char buffer[BUFSIZ];
1481         /* sbeck -- handle suffixes */
1482         CI ci;
1483         CE ce;
1484
1485         b  = (unsigned char *) &bits;
1486         b1 = &b[endian > 0 ? 1 : 2];
1487         b2 = &b[endian > 0 ? 2 : 1];
1488         b3 = &b[endian > 0 ? 3 : 0];
1489
1490         ce = ct->c_cefile;
1491         if (ce->ce_fp) {
1492                 fseek(ce->ce_fp, 0L, SEEK_SET);
1493                 goto ready_to_go;
1494         }
1495
1496         if (ce->ce_file) {
1497                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1498                         content_error(ce->ce_file, ct,
1499                                         "unable to fopen for reading");
1500                         return NOTOK;
1501                 }
1502                 goto ready_to_go;
1503         }
1504
1505         if (*file == NULL) {
1506                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1507                 ce->ce_unlink = 1;
1508         } else {
1509                 ce->ce_file = mh_xstrdup(*file);
1510                 ce->ce_unlink = 0;
1511         }
1512
1513         /* sbeck@cise.ufl.edu -- handle suffixes */
1514         ci = &ct->c_ctinfo;
1515         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1516                         invo_name, ci->ci_type, ci->ci_subtype);
1517         cp = context_find(buffer);
1518         if (cp == NULL || *cp == '\0') {
1519                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1520                                 ci->ci_type);
1521                 cp = context_find(buffer);
1522         }
1523         if (cp != NULL && *cp != '\0') {
1524                 if (ce->ce_unlink) {
1525                         /*
1526                         ** Temporary file already exists, so we rename to
1527                         ** version with extension.
1528                         */
1529                         char *file_org = mh_xstrdup(ce->ce_file);
1530                         ce->ce_file = add(cp, ce->ce_file);
1531                         if (rename(file_org, ce->ce_file)) {
1532                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1533                                                 file_org);
1534                         }
1535                         mh_free0(&file_org);
1536
1537                 } else {
1538                         ce->ce_file = add(cp, ce->ce_file);
1539                 }
1540         }
1541
1542         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1543                 content_error(ce->ce_file, ct,
1544                                 "unable to fopen for reading/writing");
1545                 return NOTOK;
1546         }
1547
1548         if ((len = ct->c_end - ct->c_begin) < 0)
1549                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1550
1551         if (!ct->c_fp) {
1552                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1553                         content_error(ct->c_file, ct,
1554                                         "unable to open for reading");
1555                         return NOTOK;
1556                 }
1557                 own_ct_fp = 1;
1558         }
1559
1560         bitno = 18;
1561         bits = 0L;
1562         skip = 0;
1563
1564         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1565         while (len > 0) {
1566                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1567                 case NOTOK:
1568                         content_error(ct->c_file, ct, "error reading from");
1569                         goto clean_up;
1570
1571                 case OK:
1572                         content_error(NULL, ct, "premature eof");
1573                         goto clean_up;
1574
1575                 default:
1576                         if (cc > len)
1577                                 cc = len;
1578                         len -= cc;
1579
1580                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1581                                 switch (*cp) {
1582                                 default:
1583                                         if (isspace(*cp))
1584                                                 break;
1585                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1586                                                 if (debugsw) {
1587                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1588                                                 }
1589                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1590                                                 continue;
1591                                         }
1592
1593                                         bits |= value << bitno;
1594 test_end:
1595                                         if ((bitno -= 6) < 0) {
1596                                                 putc((char) *b1, ce->ce_fp);
1597                                                 if (skip < 2) {
1598                                                         putc((char) *b2, ce->ce_fp);
1599                                                         if (skip < 1) {
1600                                                                 putc((char) *b3, ce->ce_fp);
1601                                                         }
1602                                                 }
1603
1604                                                 if (ferror(ce->ce_fp)) {
1605                                                         content_error(ce->ce_file, ct,
1606                                                                                    "error writing to");
1607                                                         goto clean_up;
1608                                                 }
1609                                                 bitno = 18, bits = 0L, skip = 0;
1610                                         }
1611                                         break;
1612
1613                                 case '=':
1614                                         if (++skip > 3)
1615                                                 goto self_delimiting;
1616                                         goto test_end;
1617                                 }
1618                         }
1619                 }
1620         }
1621
1622         if (bitno != 18) {
1623                 if (debugsw)
1624                         fprintf(stderr, "premature ending (bitno %d)\n",
1625                                         bitno);
1626
1627                 content_error(NULL, ct, "invalid BASE64 encoding");
1628                 goto clean_up;
1629         }
1630
1631 self_delimiting:
1632         fseek(ct->c_fp, 0L, SEEK_SET);
1633
1634         if (fflush(ce->ce_fp)) {
1635                 content_error(ce->ce_file, ct, "error writing to");
1636                 goto clean_up;
1637         }
1638
1639         fseek(ce->ce_fp, 0L, SEEK_SET);
1640
1641 ready_to_go:
1642         *file = ce->ce_file;
1643         if (own_ct_fp) {
1644                 fclose(ct->c_fp);
1645                 ct->c_fp = NULL;
1646         }
1647         return fileno(ce->ce_fp);
1648
1649 clean_up:
1650         free_encoding(ct, 0);
1651         if (own_ct_fp) {
1652                 fclose(ct->c_fp);
1653                 ct->c_fp = NULL;
1654         }
1655         return NOTOK;
1656 }
1657
1658
1659 /*
1660 ** QUOTED PRINTABLE
1661 */
1662
1663 static char hex2nib[0x80] = {
1664         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1671         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1677         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1680 };
1681
1682
1683 static int
1684 InitQuoted(CT ct)
1685 {
1686         return init_encoding(ct, openQuoted);
1687 }
1688
1689
1690 static int
1691 openQuoted(CT ct, char **file)
1692 {
1693         int cc, len, quoted, own_ct_fp = 0;
1694         unsigned char *cp, *ep;
1695         char buffer[BUFSIZ];
1696         unsigned char mask = 0;
1697         CE ce;
1698         /* sbeck -- handle suffixes */
1699         CI ci;
1700
1701         ce = ct->c_cefile;
1702         if (ce->ce_fp) {
1703                 fseek(ce->ce_fp, 0L, SEEK_SET);
1704                 goto ready_to_go;
1705         }
1706
1707         if (ce->ce_file) {
1708                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1709                         content_error(ce->ce_file, ct,
1710                                         "unable to fopen for reading");
1711                         return NOTOK;
1712                 }
1713                 goto ready_to_go;
1714         }
1715
1716         if (*file == NULL) {
1717                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1718                 ce->ce_unlink = 1;
1719         } else {
1720                 ce->ce_file = mh_xstrdup(*file);
1721                 ce->ce_unlink = 0;
1722         }
1723
1724         /* sbeck@cise.ufl.edu -- handle suffixes */
1725         ci = &ct->c_ctinfo;
1726         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1727                         invo_name, ci->ci_type, ci->ci_subtype);
1728         cp = context_find(buffer);
1729         if (cp == NULL || *cp == '\0') {
1730                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1731                                 ci->ci_type);
1732                 cp = context_find(buffer);
1733         }
1734         if (cp != NULL && *cp != '\0') {
1735                 if (ce->ce_unlink) {
1736                         /*
1737                         ** Temporary file already exists, so we rename to
1738                         ** version with extension.
1739                         */
1740                         char *file_org = mh_xstrdup(ce->ce_file);
1741                         ce->ce_file = add(cp, ce->ce_file);
1742                         if (rename(file_org, ce->ce_file)) {
1743                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1744                                                 file_org);
1745                         }
1746                         mh_free0(&file_org);
1747
1748                 } else {
1749                         ce->ce_file = add(cp, ce->ce_file);
1750                 }
1751         }
1752
1753         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1754                 content_error(ce->ce_file, ct,
1755                                 "unable to fopen for reading/writing");
1756                 return NOTOK;
1757         }
1758
1759         if ((len = ct->c_end - ct->c_begin) < 0)
1760                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1761
1762         if (!ct->c_fp) {
1763                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1764                         content_error(ct->c_file, ct,
1765                                         "unable to open for reading");
1766                         return NOTOK;
1767                 }
1768                 own_ct_fp = 1;
1769         }
1770
1771         quoted = 0;
1772
1773         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1774         while (len > 0) {
1775                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1776                         content_error(NULL, ct, "premature eof");
1777                         goto clean_up;
1778                 }
1779
1780                 if ((cc = strlen(buffer)) > len)
1781                         cc = len;
1782                 len -= cc;
1783
1784                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1785                         if (!isspace(*ep))
1786                                 break;
1787                 *++ep = '\n', ep++;
1788
1789                 for (; cp < ep; cp++) {
1790                         if (quoted > 0) {
1791                                 /* in an escape sequence */
1792                                 if (quoted == 1) {
1793                                         /* at byte 1 of an escape sequence */
1794                                         mask = hex2nib[*cp & 0x7f];
1795                                         /* next is byte 2 */
1796                                         quoted = 2;
1797                                 } else {
1798                                         /* at byte 2 of an escape sequence */
1799                                         mask <<= 4;
1800                                         mask |= hex2nib[*cp & 0x7f];
1801                                         putc(mask, ce->ce_fp);
1802                                         if (ferror(ce->ce_fp)) {
1803                                                 content_error(ce->ce_file, ct, "error writing to");
1804                                                 goto clean_up;
1805                                         }
1806                                         /*
1807                                         ** finished escape sequence; next may
1808                                         ** be literal or a new escape sequence
1809                                         */
1810                                         quoted = 0;
1811                                 }
1812                                 /* on to next byte */
1813                                 continue;
1814                         }
1815
1816                         /* not in an escape sequence */
1817                         if (*cp == '=') {
1818                                 /*
1819                                 ** starting an escape sequence,
1820                                 ** or invalid '='?
1821                                 */
1822                                 if (cp + 1 < ep && cp[1] == '\n') {
1823                                         /* "=\n" soft line break, eat the \n */
1824                                         cp++;
1825                                         continue;
1826                                 }
1827                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1828                                         /*
1829                                         ** We don't have 2 bytes left,
1830                                         ** so this is an invalid escape
1831                                         ** sequence; just show the raw bytes
1832                                         ** (below).
1833                                         */
1834                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1835                                         /*
1836                                         ** Next 2 bytes are hex digits,
1837                                         ** making this a valid escape
1838                                         ** sequence; let's decode it (above).
1839                                         */
1840                                         quoted = 1;
1841                                         continue;
1842                                 } else {
1843                                         /*
1844                                         ** One or both of the next 2 is
1845                                         ** out of range, making this an
1846                                         ** invalid escape sequence; just
1847                                         ** show the raw bytes (below).
1848                                         */
1849                                 }
1850                         }
1851
1852                         /* Just show the raw byte. */
1853                         putc(*cp, ce->ce_fp);
1854                         if (ferror(ce->ce_fp)) {
1855                                 content_error(ce->ce_file, ct,
1856                                                 "error writing to");
1857                                 goto clean_up;
1858                         }
1859                 }
1860         }
1861         if (quoted) {
1862                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1863                 goto clean_up;
1864         }
1865
1866         fseek(ct->c_fp, 0L, SEEK_SET);
1867
1868         if (fflush(ce->ce_fp)) {
1869                 content_error(ce->ce_file, ct, "error writing to");
1870                 goto clean_up;
1871         }
1872
1873         fseek(ce->ce_fp, 0L, SEEK_SET);
1874
1875 ready_to_go:
1876         *file = ce->ce_file;
1877         if (own_ct_fp) {
1878                 fclose(ct->c_fp);
1879                 ct->c_fp = NULL;
1880         }
1881         return fileno(ce->ce_fp);
1882
1883 clean_up:
1884         free_encoding(ct, 0);
1885         if (own_ct_fp) {
1886                 fclose(ct->c_fp);
1887                 ct->c_fp = NULL;
1888         }
1889         return NOTOK;
1890 }
1891
1892
1893 /*
1894 ** 7BIT
1895 */
1896
1897 static int
1898 Init7Bit(CT ct)
1899 {
1900         if (init_encoding(ct, open7Bit) == NOTOK)
1901                 return NOTOK;
1902
1903         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1904         return OK;
1905 }
1906
1907
1908 int
1909 open7Bit(CT ct, char **file)
1910 {
1911         int cc, fd, len, own_ct_fp = 0;
1912         char buffer[BUFSIZ];
1913         /* sbeck -- handle suffixes */
1914         char *cp;
1915         CI ci;
1916         CE ce;
1917
1918         ce = ct->c_cefile;
1919         if (ce->ce_fp) {
1920                 fseek(ce->ce_fp, 0L, SEEK_SET);
1921                 goto ready_to_go;
1922         }
1923
1924         if (ce->ce_file) {
1925                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1926                         content_error(ce->ce_file, ct,
1927                                         "unable to fopen for reading");
1928                         return NOTOK;
1929                 }
1930                 goto ready_to_go;
1931         }
1932
1933         if (*file == NULL) {
1934                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1935                 ce->ce_unlink = 1;
1936         } else {
1937                 ce->ce_file = mh_xstrdup(*file);
1938                 ce->ce_unlink = 0;
1939         }
1940
1941         /* sbeck@cise.ufl.edu -- handle suffixes */
1942         ci = &ct->c_ctinfo;
1943         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1944                         invo_name, ci->ci_type, ci->ci_subtype);
1945         cp = context_find(buffer);
1946         if (cp == NULL || *cp == '\0') {
1947                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1948                                 ci->ci_type);
1949                 cp = context_find(buffer);
1950         }
1951         if (cp != NULL && *cp != '\0') {
1952                 if (ce->ce_unlink) {
1953                         /*
1954                         ** Temporary file already exists, so we rename to
1955                         ** version with extension.
1956                         */
1957                         char *file_org = mh_xstrdup(ce->ce_file);
1958                         ce->ce_file = add(cp, ce->ce_file);
1959                         if (rename(file_org, ce->ce_file)) {
1960                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1961                                                 file_org);
1962                         }
1963                         mh_free0(&file_org);
1964
1965                 } else {
1966                         ce->ce_file = add(cp, ce->ce_file);
1967                 }
1968         }
1969
1970         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1971                 content_error(ce->ce_file, ct,
1972                                 "unable to fopen for reading/writing");
1973                 return NOTOK;
1974         }
1975
1976         if (ct->c_type == CT_MULTIPART) {
1977                 char **ap, **ep;
1978                 CI ci = &ct->c_ctinfo;
1979
1980                 len = 0;
1981                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1982                                 ci->ci_subtype);
1983                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1984                                 strlen(ci->ci_subtype);
1985                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1986                         putc(';', ce->ce_fp);
1987                         len++;
1988
1989                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1990                                         *ap, *ep);
1991
1992                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1993                                 fputs("\n\t", ce->ce_fp);
1994                                 len = 8;
1995                         } else {
1996                                 putc(' ', ce->ce_fp);
1997                                 len++;
1998                         }
1999                         fprintf(ce->ce_fp, "%s", buffer);
2000                         len += cc;
2001                 }
2002
2003                 if (ci->ci_comment) {
2004                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2005                                                 >= CPERLIN) {
2006                                 fputs("\n\t", ce->ce_fp);
2007                                 len = 8;
2008                         } else {
2009                                 putc(' ', ce->ce_fp);
2010                                 len++;
2011                         }
2012                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2013                         len += cc;
2014                 }
2015                 fprintf(ce->ce_fp, "\n");
2016                 if (ct->c_id)
2017                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2018                 if (ct->c_descr)
2019                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2020                 if (ct->c_dispo)
2021                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2022                 fprintf(ce->ce_fp, "\n");
2023         }
2024
2025         if ((len = ct->c_end - ct->c_begin) < 0)
2026                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2027
2028         if (!ct->c_fp) {
2029                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2030                         content_error(ct->c_file, ct,
2031                                         "unable to open for reading");
2032                         return NOTOK;
2033                 }
2034                 own_ct_fp = 1;
2035         }
2036
2037         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2038         while (len > 0)
2039                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2040                 case NOTOK:
2041                         content_error(ct->c_file, ct, "error reading from");
2042                         goto clean_up;
2043
2044                 case OK:
2045                         content_error(NULL, ct, "premature eof");
2046                         goto clean_up;
2047
2048                 default:
2049                         if (cc > len)
2050                                 cc = len;
2051                         len -= cc;
2052
2053                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2054                         if (ferror(ce->ce_fp)) {
2055                                 content_error(ce->ce_file, ct,
2056                                                 "error writing to");
2057                                 goto clean_up;
2058                         }
2059                 }
2060
2061         fseek(ct->c_fp, 0L, SEEK_SET);
2062
2063         if (fflush(ce->ce_fp)) {
2064                 content_error(ce->ce_file, ct, "error writing to");
2065                 goto clean_up;
2066         }
2067
2068         fseek(ce->ce_fp, 0L, SEEK_SET);
2069
2070 ready_to_go:
2071         *file = ce->ce_file;
2072         if (own_ct_fp) {
2073                 fclose(ct->c_fp);
2074                 ct->c_fp = NULL;
2075         }
2076         return fileno(ce->ce_fp);
2077
2078 clean_up:
2079         free_encoding(ct, 0);
2080         if (own_ct_fp) {
2081                 fclose(ct->c_fp);
2082                 ct->c_fp = NULL;
2083         }
2084         return NOTOK;
2085 }