refile: Never change the current folder
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         int compnum, state;
235         char buf[BUFSIZ], name[NAMESZ];
236         char *np, *vp;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = mh_xstrdup(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD;;) {
252                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
253                 case FLD:
254                 case FLDPLUS:
255                         compnum++;
256
257                         /* get copies of the buffers */
258                         np = mh_xstrdup(name);
259                         vp = mh_xstrdup(buf);
260
261                         /* if necessary, get rest of field */
262                         while (state == FLDPLUS) {
263                                 state = m_getfld(state, name, buf,
264                                                 sizeof(buf), in);
265                                 vp = add(buf, vp);  /* add to previous value */
266                         }
267
268                         /* Now add the header data to the list */
269                         add_header(ct, np, vp);
270
271                         ct->c_begin = ftell(in) + 1;
272                         continue;
273
274                 case BODY:
275                         ct->c_begin = ftell(in) - strlen(buf);
276                         break;
277
278                 case FILEEOF:
279                         ct->c_begin = ftell(in);
280                         break;
281
282                 case LENERR:
283                 case FMTERR:
284                         adios(EX_DATAERR, NULL, "message format error in component #%d",
285                                         compnum);
286
287                 default:
288                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
289                 }
290
291                 /* break out of the loop */
292                 break;
293         }
294
295         /*
296         ** Read the content headers.  We will parse the
297         ** MIME related header fields into their various
298         ** structures and set internal flags related to
299         ** content type/subtype, etc.
300         */
301
302         hp = ct->c_first_hf;  /* start at first header field */
303         while (hp) {
304                 /* Get MIME-Version field */
305                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
306                         int ucmp;
307                         char c;
308                         unsigned char *cp, *dp;
309
310                         if (ct->c_vrsn) {
311                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
312                                 goto next_header;
313                         }
314                         ct->c_vrsn = mh_xstrdup(hp->value);
315
316                         /* Now, cleanup this field */
317                         cp = ct->c_vrsn;
318
319                         while (isspace(*cp))
320                                 cp++;
321                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
322                                 *dp++ = ' ';
323                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
324                                 if (!isspace(*dp))
325                                         break;
326                         *++dp = '\0';
327                         if (debugsw)
328                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
329
330                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
331                                 goto out;
332
333                         for (dp = cp; istoken(*dp); dp++)
334                                 continue;
335                         c = *dp;
336                         *dp = '\0';
337                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
338                         *dp = c;
339                         if (!ucmp) {
340                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
341                         }
342
343                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
344                         /* Get Content-Type field */
345                         struct str2init *s2i;
346                         CI ci = &ct->c_ctinfo;
347
348                         /* Check if we've already seen a Content-Type header */
349                         if (ct->c_ctline) {
350                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
351                                 goto next_header;
352                         }
353
354                         /* Parse the Content-Type field */
355                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
356                                 goto out;
357
358                         /*
359                         ** Set the Init function and the internal
360                         ** flag for this content type.
361                         */
362                         for (s2i = str2cts; s2i->si_key; s2i++)
363                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
364                                         break;
365                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
366                                 s2i++;
367                         ct->c_type = s2i->si_val;
368                         ct->c_ctinitfnx = s2i->si_init;
369
370                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
371                         /* Get Content-Transfer-Encoding field */
372                         char c;
373                         unsigned char *cp, *dp;
374                         struct str2init *s2i;
375
376                         /*
377                         ** Check if we've already seen the
378                         ** Content-Transfer-Encoding field
379                         */
380                         if (ct->c_celine) {
381                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
382                                 goto next_header;
383                         }
384
385                         /* get copy of this field */
386                         ct->c_celine = cp = mh_xstrdup(hp->value);
387
388                         while (isspace(*cp))
389                                 cp++;
390                         for (dp = cp; istoken(*dp); dp++)
391                                 continue;
392                         c = *dp;
393                         *dp = '\0';
394
395                         /*
396                         ** Find the internal flag and Init function
397                         ** for this transfer encoding.
398                         */
399                         for (s2i = str2ces; s2i->si_key; s2i++)
400                                 if (!mh_strcasecmp(cp, s2i->si_key))
401                                         break;
402                         if (!s2i->si_key && !uprf(cp, "X-"))
403                                 s2i++;
404                         *dp = c;
405                         ct->c_encoding = s2i->si_val;
406
407                         /* Call the Init function for this encoding */
408                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
409                                 goto out;
410
411                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
412                         /* Get Content-ID field */
413                         ct->c_id = add(hp->value, ct->c_id);
414
415                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
416                         /* Get Content-Description field */
417                         ct->c_descr = add(hp->value, ct->c_descr);
418
419                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
420                         /* Get Content-Disposition field */
421                         ct->c_dispo = add(hp->value, ct->c_dispo);
422                 }
423
424 next_header:
425                 hp = hp->next;  /* next header field */
426         }
427
428         /*
429         ** Check if we saw a Content-Type field.
430         ** If not, then assign a default value for
431         ** it, and the Init function.
432         */
433         if (!ct->c_ctline) {
434                 /*
435                 ** If we are inside a multipart/digest message,
436                 ** so default type is message/rfc822
437                 */
438                 if (toplevel < 0) {
439                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
440                                 goto out;
441                         ct->c_type = CT_MESSAGE;
442                         ct->c_ctinitfnx = InitMessage;
443                 } else {
444                         /*
445                         ** Else default type is text/plain
446                         */
447                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
448                                 goto out;
449                         ct->c_type = CT_TEXT;
450                         ct->c_ctinitfnx = InitText;
451                 }
452         }
453
454         /* Use default Transfer-Encoding, if necessary */
455         if (!ct->c_celine) {
456                 ct->c_encoding = CE_7BIT;
457                 Init7Bit(ct);
458         }
459
460         return ct;
461
462 out:
463         free_content(ct);
464         return NULL;
465 }
466
467
468 /*
469 ** small routine to add header field to list
470 */
471
472 int
473 add_header(CT ct, char *name, char *value)
474 {
475         HF hp;
476
477         /* allocate header field structure */
478         hp = mh_xcalloc(1, sizeof(*hp));
479
480         /* link data into header structure */
481         hp->name = name;
482         hp->value = value;
483         hp->next = NULL;
484
485         /* link header structure into the list */
486         if (ct->c_first_hf == NULL) {
487                 ct->c_first_hf = hp;  /* this is the first */
488                 ct->c_last_hf = hp;
489         } else {
490                 ct->c_last_hf->next = hp;  /* add it to the end */
491                 ct->c_last_hf = hp;
492         }
493
494         return 0;
495 }
496
497
498 /*
499 ** Make sure that buf contains at least one appearance of name,
500 ** followed by =.  If not, insert both name and value, just after
501 ** first semicolon, if any.  Note that name should not contain a
502 ** trailing =.  And quotes will be added around the value.  Typical
503 ** usage:  make sure that a Content-Disposition header contains
504 ** filename="foo".  If it doesn't and value does, use value from
505 ** that.
506 */
507 static char *
508 incl_name_value(unsigned char *buf, char *name, char *value) {
509         char *newbuf = buf;
510
511         /* Assume that name is non-null. */
512         if (buf && value) {
513                 char *name_plus_equal = concat(name, "=", NULL);
514
515                 if (!strstr(buf, name_plus_equal)) {
516                         char *insertion;
517                         unsigned char *cp;
518                         char *prefix, *suffix;
519
520                         /* Trim trailing space, esp. newline. */
521                         for (cp = &buf[strlen(buf) - 1];
522                                          cp >= buf && isspace(*cp); --cp) {
523                                 *cp = '\0';
524                         }
525
526                         insertion = concat("; ", name, "=", "\"", value, "\"",
527                                         NULL);
528
529                         /*
530                         ** Insert at first semicolon, if any.
531                         ** If none, append to end.
532                         */
533                         prefix = mh_xstrdup(buf);
534                         if ((cp = strchr(prefix, ';'))) {
535                                 suffix = concat(cp, NULL);
536                                 *cp = '\0';
537                                 newbuf = concat(prefix, insertion, suffix,
538                                                 "\n", NULL);
539                                 mh_free0(&suffix);
540                         } else {
541                                 /* Append to end. */
542                                 newbuf = concat(buf, insertion, "\n", NULL);
543                         }
544
545                         mh_free0(&prefix);
546                         mh_free0(&insertion);
547                         mh_free0(&buf);
548                 }
549
550                 mh_free0(&name_plus_equal);
551         }
552
553         return newbuf;
554 }
555
556 /*
557 ** Extract just name_suffix="foo", if any, from value.  If there isn't
558 ** one, return the entire value.  Note that, for example, a name_suffix
559 ** of name will match filename="foo", and return foo.
560 */
561 static char *
562 extract_name_value(char *name_suffix, char *value) {
563         char *extracted_name_value = value;
564         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
565         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
566         char *cp;
567
568         mh_free0(&name_suffix_plus_quote);
569         if (name_suffix_equals) {
570                 char *name_suffix_begin;
571
572                 /* Find first \". */
573                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
574                         ;
575                 name_suffix_begin = ++cp;
576                 /* Find second \". */
577                 for (; *cp != '"'; ++cp)
578                         ;
579
580                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
581                 memcpy(extracted_name_value, name_suffix_begin,
582                                 cp - name_suffix_begin);
583                 extracted_name_value[cp - name_suffix_begin] = '\0';
584         }
585
586         return extracted_name_value;
587 }
588
589 /*
590 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
591 ** directives.  Fills in the information of the CTinfo structure.
592 */
593 int
594 get_ctinfo(unsigned char *cp, CT ct, int magic)
595 {
596         int i;
597         unsigned char *dp;
598         char **ap, **ep;
599         char c;
600         CI ci;
601
602         ci = &ct->c_ctinfo;
603         i = strlen(invo_name) + 2;
604
605         /* store copy of Content-Type line */
606         cp = ct->c_ctline = mh_xstrdup(cp);
607
608         while (isspace(*cp))  /* trim leading spaces */
609                 cp++;
610
611         /* change newlines to spaces */
612         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
613                 *dp++ = ' ';
614
615         /* trim trailing spaces */
616         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
617                 if (!isspace(*dp))
618                         break;
619         *++dp = '\0';
620
621         if (debugsw)
622                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
623
624         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
625                 return NOTOK;
626
627         for (dp = cp; istoken(*dp); dp++)
628                 continue;
629         c = *dp, *dp = '\0';
630         ci->ci_type = mh_xstrdup(cp);  /* store content type */
631         *dp = c, cp = dp;
632
633         if (!*ci->ci_type) {
634                 advise(NULL, "invalid %s: field in message %s (empty type)",
635                                 TYPE_FIELD, ct->c_file);
636                 return NOTOK;
637         }
638
639         /* down case the content type string */
640         for (dp = ci->ci_type; *dp; dp++)
641                 if (isalpha(*dp) && isupper(*dp))
642                         *dp = tolower(*dp);
643
644         while (isspace(*cp))
645                 cp++;
646
647         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
648                 return NOTOK;
649
650         if (*cp != '/') {
651                 if (!magic)
652                         ci->ci_subtype = mh_xstrdup("");
653                 goto magic_skip;
654         }
655
656         cp++;
657         while (isspace(*cp))
658                 cp++;
659
660         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
661                 return NOTOK;
662
663         for (dp = cp; istoken(*dp); dp++)
664                 continue;
665         c = *dp, *dp = '\0';
666         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
667         *dp = c, cp = dp;
668
669         if (!*ci->ci_subtype) {
670                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
671                 return NOTOK;
672         }
673
674         /* down case the content subtype string */
675         for (dp = ci->ci_subtype; *dp; dp++)
676                 if (isalpha(*dp) && isupper(*dp))
677                         *dp = tolower(*dp);
678
679 magic_skip:
680         while (isspace(*cp))
681                 cp++;
682
683         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
684                 return NOTOK;
685
686         /*
687         ** Parse attribute/value pairs given with Content-Type
688         */
689         ep = (ap = ci->ci_attrs) + NPARMS;
690         while (*cp == ';') {
691                 char *vp;
692                 unsigned char *up;
693
694                 if (ap >= ep) {
695                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
696                         return NOTOK;
697                 }
698
699                 cp++;
700                 while (isspace(*cp))
701                         cp++;
702
703                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
704                         return NOTOK;
705
706                 if (*cp == 0) {
707                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
708                         return OK;
709                 }
710
711                 /* down case the attribute name */
712                 for (dp = cp; istoken(*dp); dp++)
713                         if (isalpha(*dp) && isupper(*dp))
714                                 *dp = tolower(*dp);
715
716                 for (up = dp; isspace(*dp);)
717                         dp++;
718                 if (dp == cp || *dp != '=') {
719                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
720                         return NOTOK;
721                 }
722
723                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
724                 *vp = '\0';
725                 for (dp++; isspace(*dp);)
726                         dp++;
727
728                 /* now add the attribute value */
729                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
730
731                 if (*dp == '"') {
732                         for (cp = ++dp, dp = vp;;) {
733                                 switch (c = *cp++) {
734                                 case '\0':
735 bad_quote:
736                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
737                                         return NOTOK;
738
739                                 case '\\':
740                                         *dp++ = c;
741                                         if ((c = *cp++) == '\0')
742                                                 goto bad_quote;
743                                         /* else fall... */
744
745                                 default:
746                                         *dp++ = c;
747                                         continue;
748
749                                 case '"':
750                                         *dp = '\0';
751                                         break;
752                                 }
753                                 break;
754                         }
755                 } else {
756                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
757                                 continue;
758                         *dp = '\0';
759                 }
760                 if (!*vp) {
761                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
762                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
763                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
764                         continue;
765                 }
766                 ap++;
767
768                 while (isspace(*cp))
769                         cp++;
770
771                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
772                         return NOTOK;
773         }
774
775         /*
776         ** Get any <Content-Id> given in buffer
777         */
778         if (magic && *cp == '<') {
779                 if (ct->c_id) {
780                         mh_free0(&(ct->c_id));
781                 }
782                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
783                         advise(NULL, "invalid ID in message %s", ct->c_file);
784                         return NOTOK;
785                 }
786                 c = *dp;
787                 *dp = '\0';
788                 if (*ct->c_id)
789                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
790                 else
791                         ct->c_id = NULL;
792                 *dp++ = c;
793                 cp = dp;
794
795                 while (isspace(*cp))
796                         cp++;
797         }
798
799         /*
800         ** Get any [Content-Description] given in buffer.
801         */
802         if (magic && *cp == '[') {
803                 ct->c_descr = ++cp;
804                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
805                         if (*dp == ']')
806                                 break;
807                 if (dp < cp) {
808                         advise(NULL, "invalid description in message %s",
809                                         ct->c_file);
810                         ct->c_descr = NULL;
811                         return NOTOK;
812                 }
813
814                 c = *dp;
815                 *dp = '\0';
816                 if (*ct->c_descr)
817                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
818                 else
819                         ct->c_descr = NULL;
820                 *dp++ = c;
821                 cp = dp;
822
823                 while (isspace(*cp))
824                         cp++;
825         }
826
827         /*
828         ** Get any {Content-Disposition} given in buffer.
829         */
830         if (magic && *cp == '{') {
831                 ct->c_dispo = ++cp;
832                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
833                         if (*dp == '}')
834                                 break;
835                 if (dp < cp) {
836                         advise(NULL, "invalid disposition in message %s",
837                                         ct->c_file);
838                         ct->c_dispo = NULL;
839                         return NOTOK;
840                 }
841
842                 c = *dp;
843                 *dp = '\0';
844                 if (*ct->c_dispo)
845                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
846                 else
847                         ct->c_dispo = NULL;
848                 *dp++ = c;
849                 cp = dp;
850
851                 while (isspace(*cp))
852                         cp++;
853         }
854
855         /*
856         ** Check if anything is left over
857         */
858         if (*cp) {
859                 if (magic) {
860                         ci->ci_magic = mh_xstrdup(cp);
861
862                         /*
863                         ** If there is a Content-Disposition header and
864                         ** it doesn't have a *filename=, extract it from
865                         ** the magic contents.  The mhbasename call skips
866                         ** any leading directory components.
867                         */
868                         if (ct->c_dispo)
869                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
870                         } else
871                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
872         }
873
874         return OK;
875 }
876
877
878 static int
879 get_comment(CT ct, unsigned char **ap, int istype)
880 {
881         int i;
882         char *bp;
883         unsigned char *cp;
884         char c, buffer[BUFSIZ], *dp;
885         CI ci;
886
887         ci = &ct->c_ctinfo;
888         cp = *ap;
889         bp = buffer;
890         cp++;
891
892         for (i = 0;;) {
893                 switch (c = *cp++) {
894                 case '\0':
895 invalid:
896                 advise(NULL, "invalid comment in message %s's %s: field",
897                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
898                 return NOTOK;
899
900                 case '\\':
901                         *bp++ = c;
902                         if ((c = *cp++) == '\0')
903                                 goto invalid;
904                         *bp++ = c;
905                         continue;
906
907                 case '(':
908                         i++;
909                         /* and fall... */
910                 default:
911                         *bp++ = c;
912                         continue;
913
914                 case ')':
915                         if (--i < 0)
916                                 break;
917                         *bp++ = c;
918                         continue;
919                 }
920                 break;
921         }
922         *bp = '\0';
923
924         if (istype) {
925                 if ((dp = ci->ci_comment)) {
926                         ci->ci_comment = concat(dp, " ", buffer, NULL);
927                         mh_free0(&dp);
928                 } else {
929                         ci->ci_comment = mh_xstrdup(buffer);
930                 }
931         }
932
933         while (isspace(*cp))
934                 cp++;
935
936         *ap = cp;
937         return OK;
938 }
939
940
941 /*
942 ** CONTENTS
943 **
944 ** Handles content types audio, image, and video.
945 ** There's not much to do right here.
946 */
947
948 static int
949 InitGeneric(CT ct)
950 {
951         return OK;  /* not much to do here */
952 }
953
954
955 /*
956 ** TEXT
957 */
958
959 static int
960 InitText(CT ct)
961 {
962         char **ap, **ep;
963         struct k2v *kv;
964         struct text *t;
965         CI ci = &ct->c_ctinfo;
966
967         /* check for missing subtype */
968         if (!*ci->ci_subtype)
969                 ci->ci_subtype = add("plain", ci->ci_subtype);
970
971         /* match subtype */
972         for (kv = SubText; kv->kv_key; kv++)
973                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
974                         break;
975         ct->c_subtype = kv->kv_value;
976
977         /* allocate text character set structure */
978         t = mh_xcalloc(1, sizeof(*t));
979         ct->c_ctparams = (void *) t;
980
981         /* scan for charset parameter */
982         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
983                 if (!mh_strcasecmp(*ap, "charset"))
984                         break;
985
986         /* check if content specified a character set */
987         if (*ap) {
988                 /* store its name */
989                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
990                 /* match character set or set to CHARSET_UNKNOWN */
991                 for (kv = Charset; kv->kv_key; kv++) {
992                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
993                                 break;
994                         }
995                 }
996                 t->tx_charset = kv->kv_value;
997         } else {
998                 t->tx_charset = CHARSET_UNSPECIFIED;
999         }
1000
1001         return OK;
1002 }
1003
1004
1005 /*
1006 ** MULTIPART
1007 */
1008
1009 static int
1010 InitMultiPart(CT ct)
1011 {
1012         int inout;
1013         long last, pos;
1014         unsigned char *cp, *dp;
1015         char **ap, **ep;
1016         char *bp, buffer[BUFSIZ];
1017         struct multipart *m;
1018         struct k2v *kv;
1019         struct part *part, **next;
1020         CI ci = &ct->c_ctinfo;
1021         CT p;
1022         FILE *fp;
1023
1024         /*
1025         ** The encoding for multipart messages must be either
1026         ** 7bit, 8bit, or binary (per RFC2045).
1027         */
1028         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1029                 && ct->c_encoding != CE_BINARY) {
1030                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1031                 ct->c_encoding = CE_7BIT;
1032         }
1033
1034         /* match subtype */
1035         for (kv = SubMultiPart; kv->kv_key; kv++)
1036                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1037                         break;
1038         ct->c_subtype = kv->kv_value;
1039
1040         /*
1041         ** Check for "boundary" parameter, which is
1042         ** required for multipart messages.
1043         */
1044         bp = 0;
1045         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1046                 if (!mh_strcasecmp(*ap, "boundary")) {
1047                         bp = *ep;
1048                         break;
1049                 }
1050         }
1051
1052         /* complain if boundary parameter is missing */
1053         if (!*ap) {
1054                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1055                 return NOTOK;
1056         }
1057
1058         /* allocate primary structure for multipart info */
1059         m = mh_xcalloc(1, sizeof(*m));
1060         ct->c_ctparams = (void *) m;
1061
1062         /* check if boundary parameter contains only whitespace characters */
1063         for (cp = bp; isspace(*cp); cp++)
1064                 continue;
1065         if (!*cp) {
1066                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1067                 return NOTOK;
1068         }
1069
1070         /* remove trailing whitespace from boundary parameter */
1071         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1072                 if (!isspace(*dp))
1073                         break;
1074         *++dp = '\0';
1075
1076         /* record boundary separators */
1077         m->mp_start = concat(bp, "\n", NULL);
1078         m->mp_stop = concat(bp, "--\n", NULL);
1079
1080         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1081                 advise(ct->c_file, "unable to open for reading");
1082                 return NOTOK;
1083         }
1084
1085         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1086         last = ct->c_end;
1087         next = &m->mp_parts;
1088         part = NULL;
1089         inout = 1;
1090
1091         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1092                 if (pos > last)
1093                         break;
1094
1095                 pos += strlen(buffer);
1096                 if (buffer[0] != '-' || buffer[1] != '-')
1097                         continue;
1098                 if (inout) {
1099                         if (strcmp(buffer + 2, m->mp_start)!=0)
1100                                 continue;
1101 next_part:
1102                         part = mh_xcalloc(1, sizeof(*part));
1103                         *next = part;
1104                         next = &part->mp_next;
1105
1106                         if (!(p = get_content(fp, ct->c_file,
1107                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1108                                 ct->c_fp = NULL;
1109                                 return NOTOK;
1110                         }
1111                         p->c_fp = NULL;
1112                         part->mp_part = p;
1113                         pos = p->c_begin;
1114                         fseek(fp, pos, SEEK_SET);
1115                         inout = 0;
1116                 } else {
1117                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1118                                 inout = 1;
1119 end_part:
1120                                 p = part->mp_part;
1121                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1122                                 if (p->c_end < p->c_begin)
1123                                         p->c_begin = p->c_end;
1124                                 if (inout)
1125                                         goto next_part;
1126                                 goto last_part;
1127                         } else {
1128                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1129                                         goto end_part;
1130                         }
1131                 }
1132         }
1133
1134         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1135         if (!inout && part) {
1136                 p = part->mp_part;
1137                 p->c_end = ct->c_end;
1138
1139                 if (p->c_begin >= p->c_end) {
1140                         for (next = &m->mp_parts; *next != part;
1141                                 next = &((*next)->mp_next))
1142                                 continue;
1143                         *next = NULL;
1144                         free_content(p);
1145                         mh_free0(&part);
1146                 }
1147         }
1148
1149 last_part:
1150         /* reverse the order of the parts for multipart/alternative */
1151         if (ct->c_subtype == MULTI_ALTERNATE)
1152                 reverse_parts(ct);
1153
1154         /*
1155         ** label all subparts with part number, and
1156         ** then initialize the content of the subpart.
1157         */
1158         {
1159                 int partnum;
1160                 char *pp;
1161                 char partnam[BUFSIZ];
1162
1163                 if (ct->c_partno) {
1164                         snprintf(partnam, sizeof(partnam), "%s.",
1165                                         ct->c_partno);
1166                         pp = partnam + strlen(partnam);
1167                 } else {
1168                         pp = partnam;
1169                 }
1170
1171                 for (part = m->mp_parts, partnum = 1; part;
1172                         part = part->mp_next, partnum++) {
1173                         p = part->mp_part;
1174
1175                         sprintf(pp, "%d", partnum);
1176                         p->c_partno = mh_xstrdup(partnam);
1177
1178                         /* initialize the content of the subparts */
1179                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1180                                 fclose(ct->c_fp);
1181                                 ct->c_fp = NULL;
1182                                 return NOTOK;
1183                         }
1184                 }
1185         }
1186
1187         fclose(ct->c_fp);
1188         ct->c_fp = NULL;
1189         return OK;
1190 }
1191
1192
1193 /*
1194 ** reverse the order of the parts of a multipart
1195 */
1196
1197 static void
1198 reverse_parts(CT ct)
1199 {
1200         int i;
1201         struct multipart *m;
1202         struct part **base, **bmp, **next, *part;
1203
1204         m = (struct multipart *) ct->c_ctparams;
1205
1206         /* if only one part, just return */
1207         if (!m->mp_parts || !m->mp_parts->mp_next)
1208                 return;
1209
1210         /* count number of parts */
1211         i = 0;
1212         for (part = m->mp_parts; part; part = part->mp_next)
1213                 i++;
1214
1215         /* allocate array of pointers to the parts */
1216         base = mh_xcalloc(i + 1, sizeof(*base));
1217         bmp = base;
1218
1219         /* point at all the parts */
1220         for (part = m->mp_parts; part; part = part->mp_next)
1221                 *bmp++ = part;
1222         *bmp = NULL;
1223
1224         /* reverse the order of the parts */
1225         next = &m->mp_parts;
1226         for (bmp--; bmp >= base; bmp--) {
1227                 part = *bmp;
1228                 *next = part;
1229                 next = &part->mp_next;
1230         }
1231         *next = NULL;
1232
1233         /* free array of pointers */
1234         mh_free0(&base);
1235 }
1236
1237
1238 /*
1239 ** MESSAGE
1240 */
1241
1242 static int
1243 InitMessage(CT ct)
1244 {
1245         struct k2v *kv;
1246         CI ci = &ct->c_ctinfo;
1247
1248         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1249                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1250                 return NOTOK;
1251         }
1252
1253         /* check for missing subtype */
1254         if (!*ci->ci_subtype)
1255                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1256
1257         /* match subtype */
1258         for (kv = SubMessage; kv->kv_key; kv++)
1259                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1260                         break;
1261         ct->c_subtype = kv->kv_value;
1262
1263         switch (ct->c_subtype) {
1264         case MESSAGE_RFC822:
1265                 break;
1266
1267         case MESSAGE_PARTIAL:
1268                 {
1269                 char **ap, **ep;
1270                 struct partial *p;
1271
1272                 p = mh_xcalloc(1, sizeof(*p));
1273                 ct->c_ctparams = (void *) p;
1274
1275                 /*
1276                 ** scan for parameters "id", "number",
1277                 ** and "total"
1278                 */
1279                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1280                         if (!mh_strcasecmp(*ap, "id")) {
1281                                 p->pm_partid = mh_xstrdup(*ep);
1282                                 continue;
1283                         }
1284                         if (!mh_strcasecmp(*ap, "number")) {
1285                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1286 invalid_param:
1287                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1288                                         return NOTOK;
1289                                 }
1290                                 continue;
1291                         }
1292                         if (!mh_strcasecmp(*ap, "total")) {
1293                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1294                                                 p->pm_maxno < 1)
1295                                         goto invalid_param;
1296                                 continue;
1297                         }
1298                 }
1299
1300                 if (!p->pm_partid || !p->pm_partno
1301                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1302                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1303                         return NOTOK;
1304                 }
1305                 }
1306                 break;
1307
1308         case MESSAGE_EXTERNAL:
1309                 {
1310                 CT p;
1311                 FILE *fp;
1312
1313                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1314                         advise(ct->c_file, "unable to open for reading");
1315                         return NOTOK;
1316                 }
1317
1318                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1319
1320                 if (!(p = get_content(fp, ct->c_file, 0))) {
1321                         ct->c_fp = NULL;
1322                         return NOTOK;
1323                 }
1324
1325                 p->c_fp = NULL;
1326                 p->c_end = p->c_begin;
1327
1328                 fclose(ct->c_fp);
1329                 ct->c_fp = NULL;
1330
1331                 switch (p->c_type) {
1332                 case CT_MULTIPART:
1333                         break;
1334
1335                 case CT_MESSAGE:
1336                         if (p->c_subtype != MESSAGE_RFC822)
1337                                 break;
1338                         /* else fall... */
1339                 default:
1340                         if (p->c_ctinitfnx)
1341                                 (*p->c_ctinitfnx) (p);
1342                         break;
1343                 }
1344                 }
1345                 break;
1346
1347         default:
1348                 break;
1349         }
1350
1351         return OK;
1352 }
1353
1354
1355 /*
1356 ** APPLICATION
1357 */
1358
1359 static int
1360 InitApplication(CT ct)
1361 {
1362         struct k2v *kv;
1363         CI ci = &ct->c_ctinfo;
1364
1365         /* match subtype */
1366         for (kv = SubApplication; kv->kv_key; kv++)
1367                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1368                         break;
1369         ct->c_subtype = kv->kv_value;
1370
1371         return OK;
1372 }
1373
1374
1375 /*
1376 ** TRANSFER ENCODINGS
1377 */
1378
1379 static int
1380 init_encoding(CT ct, OpenCEFunc openfnx)
1381 {
1382         CE ce;
1383
1384         ce = mh_xcalloc(1, sizeof(*ce));
1385
1386         ct->c_cefile     = ce;
1387         ct->c_ceopenfnx  = openfnx;
1388         ct->c_ceclosefnx = close_encoding;
1389         ct->c_cesizefnx  = size_encoding;
1390
1391         return OK;
1392 }
1393
1394
1395 void
1396 close_encoding(CT ct)
1397 {
1398         CE ce;
1399
1400         if (!(ce = ct->c_cefile))
1401                 return;
1402
1403         if (ce->ce_fp) {
1404                 fclose(ce->ce_fp);
1405                 ce->ce_fp = NULL;
1406         }
1407 }
1408
1409
1410 static unsigned long
1411 size_encoding(CT ct)
1412 {
1413         int fd;
1414         unsigned long size;
1415         char *file;
1416         CE ce;
1417         struct stat st;
1418
1419         if (!(ce = ct->c_cefile))
1420                 return (ct->c_end - ct->c_begin);
1421
1422         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1423                 return (long) st.st_size;
1424
1425         if (ce->ce_file) {
1426                 if (stat(ce->ce_file, &st) != NOTOK)
1427                         return (long) st.st_size;
1428                 else
1429                         return 0L;
1430         }
1431
1432         if (ct->c_encoding == CE_EXTERNAL)
1433                 return (ct->c_end - ct->c_begin);
1434
1435         file = NULL;
1436         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1437                 return (ct->c_end - ct->c_begin);
1438
1439         if (fstat(fd, &st) != NOTOK)
1440                 size = (long) st.st_size;
1441         else
1442                 size = 0L;
1443
1444         (*ct->c_ceclosefnx) (ct);
1445         return size;
1446 }
1447
1448
1449 /*
1450 ** BASE64
1451 */
1452
1453 static unsigned char b642nib[0x80] = {
1454         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1456         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1457         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1459         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1460         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1461         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1462         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1463         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1464         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1465         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1467         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1468         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1469         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1470 };
1471
1472
1473 static int
1474 InitBase64(CT ct)
1475 {
1476         return init_encoding(ct, openBase64);
1477 }
1478
1479
1480 static int
1481 openBase64(CT ct, char **file)
1482 {
1483         int bitno, cc;
1484         int fd, len, skip, own_ct_fp = 0;
1485         unsigned long bits;
1486         unsigned char value, *b, *b1, *b2, *b3;
1487         unsigned char *cp, *ep;
1488         char buffer[BUFSIZ];
1489         /* sbeck -- handle suffixes */
1490         CI ci;
1491         CE ce;
1492
1493         b  = (unsigned char *) &bits;
1494         b1 = &b[endian > 0 ? 1 : 2];
1495         b2 = &b[endian > 0 ? 2 : 1];
1496         b3 = &b[endian > 0 ? 3 : 0];
1497
1498         ce = ct->c_cefile;
1499         if (ce->ce_fp) {
1500                 fseek(ce->ce_fp, 0L, SEEK_SET);
1501                 goto ready_to_go;
1502         }
1503
1504         if (ce->ce_file) {
1505                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1506                         content_error(ce->ce_file, ct,
1507                                         "unable to fopen for reading");
1508                         return NOTOK;
1509                 }
1510                 goto ready_to_go;
1511         }
1512
1513         if (*file == NULL) {
1514                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1515                 ce->ce_unlink = 1;
1516         } else {
1517                 ce->ce_file = mh_xstrdup(*file);
1518                 ce->ce_unlink = 0;
1519         }
1520
1521         /* sbeck@cise.ufl.edu -- handle suffixes */
1522         ci = &ct->c_ctinfo;
1523         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1524                         invo_name, ci->ci_type, ci->ci_subtype);
1525         cp = context_find(buffer);
1526         if (cp == NULL || *cp == '\0') {
1527                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1528                                 ci->ci_type);
1529                 cp = context_find(buffer);
1530         }
1531         if (cp != NULL && *cp != '\0') {
1532                 if (ce->ce_unlink) {
1533                         /*
1534                         ** Temporary file already exists, so we rename to
1535                         ** version with extension.
1536                         */
1537                         char *file_org = mh_xstrdup(ce->ce_file);
1538                         ce->ce_file = add(cp, ce->ce_file);
1539                         if (rename(file_org, ce->ce_file)) {
1540                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1541                                                 file_org);
1542                         }
1543                         mh_free0(&file_org);
1544
1545                 } else {
1546                         ce->ce_file = add(cp, ce->ce_file);
1547                 }
1548         }
1549
1550         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1551                 content_error(ce->ce_file, ct,
1552                                 "unable to fopen for reading/writing");
1553                 return NOTOK;
1554         }
1555
1556         if ((len = ct->c_end - ct->c_begin) < 0)
1557                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1558
1559         if (!ct->c_fp) {
1560                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1561                         content_error(ct->c_file, ct,
1562                                         "unable to open for reading");
1563                         return NOTOK;
1564                 }
1565                 own_ct_fp = 1;
1566         }
1567
1568         bitno = 18;
1569         bits = 0L;
1570         skip = 0;
1571
1572         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1573         while (len > 0) {
1574                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1575                 case NOTOK:
1576                         content_error(ct->c_file, ct, "error reading from");
1577                         goto clean_up;
1578
1579                 case OK:
1580                         content_error(NULL, ct, "premature eof");
1581                         goto clean_up;
1582
1583                 default:
1584                         if (cc > len)
1585                                 cc = len;
1586                         len -= cc;
1587
1588                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1589                                 switch (*cp) {
1590                                 default:
1591                                         if (isspace(*cp))
1592                                                 break;
1593                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1594                                                 if (debugsw) {
1595                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1596                                                 }
1597                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1598                                                 continue;
1599                                         }
1600
1601                                         bits |= value << bitno;
1602 test_end:
1603                                         if ((bitno -= 6) < 0) {
1604                                                 putc((char) *b1, ce->ce_fp);
1605                                                 if (skip < 2) {
1606                                                         putc((char) *b2, ce->ce_fp);
1607                                                         if (skip < 1) {
1608                                                                 putc((char) *b3, ce->ce_fp);
1609                                                         }
1610                                                 }
1611
1612                                                 if (ferror(ce->ce_fp)) {
1613                                                         content_error(ce->ce_file, ct,
1614                                                                                    "error writing to");
1615                                                         goto clean_up;
1616                                                 }
1617                                                 bitno = 18, bits = 0L, skip = 0;
1618                                         }
1619                                         break;
1620
1621                                 case '=':
1622                                         if (++skip > 3)
1623                                                 goto self_delimiting;
1624                                         goto test_end;
1625                                 }
1626                         }
1627                 }
1628         }
1629
1630         if (bitno != 18) {
1631                 if (debugsw)
1632                         fprintf(stderr, "premature ending (bitno %d)\n",
1633                                         bitno);
1634
1635                 content_error(NULL, ct, "invalid BASE64 encoding");
1636                 goto clean_up;
1637         }
1638
1639 self_delimiting:
1640         fseek(ct->c_fp, 0L, SEEK_SET);
1641
1642         if (fflush(ce->ce_fp)) {
1643                 content_error(ce->ce_file, ct, "error writing to");
1644                 goto clean_up;
1645         }
1646
1647         fseek(ce->ce_fp, 0L, SEEK_SET);
1648
1649 ready_to_go:
1650         *file = ce->ce_file;
1651         if (own_ct_fp) {
1652                 fclose(ct->c_fp);
1653                 ct->c_fp = NULL;
1654         }
1655         return fileno(ce->ce_fp);
1656
1657 clean_up:
1658         free_encoding(ct, 0);
1659         if (own_ct_fp) {
1660                 fclose(ct->c_fp);
1661                 ct->c_fp = NULL;
1662         }
1663         return NOTOK;
1664 }
1665
1666
1667 /*
1668 ** QUOTED PRINTABLE
1669 */
1670
1671 static char hex2nib[0x80] = {
1672         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1679         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1688 };
1689
1690
1691 static int
1692 InitQuoted(CT ct)
1693 {
1694         return init_encoding(ct, openQuoted);
1695 }
1696
1697
1698 static int
1699 openQuoted(CT ct, char **file)
1700 {
1701         int cc, len, quoted, own_ct_fp = 0;
1702         unsigned char *cp, *ep;
1703         char buffer[BUFSIZ];
1704         unsigned char mask = 0;
1705         CE ce;
1706         /* sbeck -- handle suffixes */
1707         CI ci;
1708
1709         ce = ct->c_cefile;
1710         if (ce->ce_fp) {
1711                 fseek(ce->ce_fp, 0L, SEEK_SET);
1712                 goto ready_to_go;
1713         }
1714
1715         if (ce->ce_file) {
1716                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1717                         content_error(ce->ce_file, ct,
1718                                         "unable to fopen for reading");
1719                         return NOTOK;
1720                 }
1721                 goto ready_to_go;
1722         }
1723
1724         if (*file == NULL) {
1725                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1726                 ce->ce_unlink = 1;
1727         } else {
1728                 ce->ce_file = mh_xstrdup(*file);
1729                 ce->ce_unlink = 0;
1730         }
1731
1732         /* sbeck@cise.ufl.edu -- handle suffixes */
1733         ci = &ct->c_ctinfo;
1734         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1735                         invo_name, ci->ci_type, ci->ci_subtype);
1736         cp = context_find(buffer);
1737         if (cp == NULL || *cp == '\0') {
1738                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1739                                 ci->ci_type);
1740                 cp = context_find(buffer);
1741         }
1742         if (cp != NULL && *cp != '\0') {
1743                 if (ce->ce_unlink) {
1744                         /*
1745                         ** Temporary file already exists, so we rename to
1746                         ** version with extension.
1747                         */
1748                         char *file_org = mh_xstrdup(ce->ce_file);
1749                         ce->ce_file = add(cp, ce->ce_file);
1750                         if (rename(file_org, ce->ce_file)) {
1751                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1752                                                 file_org);
1753                         }
1754                         mh_free0(&file_org);
1755
1756                 } else {
1757                         ce->ce_file = add(cp, ce->ce_file);
1758                 }
1759         }
1760
1761         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1762                 content_error(ce->ce_file, ct,
1763                                 "unable to fopen for reading/writing");
1764                 return NOTOK;
1765         }
1766
1767         if ((len = ct->c_end - ct->c_begin) < 0)
1768                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1769
1770         if (!ct->c_fp) {
1771                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1772                         content_error(ct->c_file, ct,
1773                                         "unable to open for reading");
1774                         return NOTOK;
1775                 }
1776                 own_ct_fp = 1;
1777         }
1778
1779         quoted = 0;
1780
1781         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1782         while (len > 0) {
1783                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1784                         content_error(NULL, ct, "premature eof");
1785                         goto clean_up;
1786                 }
1787
1788                 if ((cc = strlen(buffer)) > len)
1789                         cc = len;
1790                 len -= cc;
1791
1792                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1793                         if (!isspace(*ep))
1794                                 break;
1795                 *++ep = '\n', ep++;
1796
1797                 for (; cp < ep; cp++) {
1798                         if (quoted > 0) {
1799                                 /* in an escape sequence */
1800                                 if (quoted == 1) {
1801                                         /* at byte 1 of an escape sequence */
1802                                         mask = hex2nib[*cp & 0x7f];
1803                                         /* next is byte 2 */
1804                                         quoted = 2;
1805                                 } else {
1806                                         /* at byte 2 of an escape sequence */
1807                                         mask <<= 4;
1808                                         mask |= hex2nib[*cp & 0x7f];
1809                                         putc(mask, ce->ce_fp);
1810                                         if (ferror(ce->ce_fp)) {
1811                                                 content_error(ce->ce_file, ct, "error writing to");
1812                                                 goto clean_up;
1813                                         }
1814                                         /*
1815                                         ** finished escape sequence; next may
1816                                         ** be literal or a new escape sequence
1817                                         */
1818                                         quoted = 0;
1819                                 }
1820                                 /* on to next byte */
1821                                 continue;
1822                         }
1823
1824                         /* not in an escape sequence */
1825                         if (*cp == '=') {
1826                                 /*
1827                                 ** starting an escape sequence,
1828                                 ** or invalid '='?
1829                                 */
1830                                 if (cp + 1 < ep && cp[1] == '\n') {
1831                                         /* "=\n" soft line break, eat the \n */
1832                                         cp++;
1833                                         continue;
1834                                 }
1835                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1836                                         /*
1837                                         ** We don't have 2 bytes left,
1838                                         ** so this is an invalid escape
1839                                         ** sequence; just show the raw bytes
1840                                         ** (below).
1841                                         */
1842                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1843                                         /*
1844                                         ** Next 2 bytes are hex digits,
1845                                         ** making this a valid escape
1846                                         ** sequence; let's decode it (above).
1847                                         */
1848                                         quoted = 1;
1849                                         continue;
1850                                 } else {
1851                                         /*
1852                                         ** One or both of the next 2 is
1853                                         ** out of range, making this an
1854                                         ** invalid escape sequence; just
1855                                         ** show the raw bytes (below).
1856                                         */
1857                                 }
1858                         }
1859
1860                         /* Just show the raw byte. */
1861                         putc(*cp, ce->ce_fp);
1862                         if (ferror(ce->ce_fp)) {
1863                                 content_error(ce->ce_file, ct,
1864                                                 "error writing to");
1865                                 goto clean_up;
1866                         }
1867                 }
1868         }
1869         if (quoted) {
1870                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1871                 goto clean_up;
1872         }
1873
1874         fseek(ct->c_fp, 0L, SEEK_SET);
1875
1876         if (fflush(ce->ce_fp)) {
1877                 content_error(ce->ce_file, ct, "error writing to");
1878                 goto clean_up;
1879         }
1880
1881         fseek(ce->ce_fp, 0L, SEEK_SET);
1882
1883 ready_to_go:
1884         *file = ce->ce_file;
1885         if (own_ct_fp) {
1886                 fclose(ct->c_fp);
1887                 ct->c_fp = NULL;
1888         }
1889         return fileno(ce->ce_fp);
1890
1891 clean_up:
1892         free_encoding(ct, 0);
1893         if (own_ct_fp) {
1894                 fclose(ct->c_fp);
1895                 ct->c_fp = NULL;
1896         }
1897         return NOTOK;
1898 }
1899
1900
1901 /*
1902 ** 7BIT
1903 */
1904
1905 static int
1906 Init7Bit(CT ct)
1907 {
1908         if (init_encoding(ct, open7Bit) == NOTOK)
1909                 return NOTOK;
1910
1911         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1912         return OK;
1913 }
1914
1915
1916 int
1917 open7Bit(CT ct, char **file)
1918 {
1919         int cc, fd, len, own_ct_fp = 0;
1920         char buffer[BUFSIZ];
1921         /* sbeck -- handle suffixes */
1922         char *cp;
1923         CI ci;
1924         CE ce;
1925
1926         ce = ct->c_cefile;
1927         if (ce->ce_fp) {
1928                 fseek(ce->ce_fp, 0L, SEEK_SET);
1929                 goto ready_to_go;
1930         }
1931
1932         if (ce->ce_file) {
1933                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1934                         content_error(ce->ce_file, ct,
1935                                         "unable to fopen for reading");
1936                         return NOTOK;
1937                 }
1938                 goto ready_to_go;
1939         }
1940
1941         if (*file == NULL) {
1942                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1943                 ce->ce_unlink = 1;
1944         } else {
1945                 ce->ce_file = mh_xstrdup(*file);
1946                 ce->ce_unlink = 0;
1947         }
1948
1949         /* sbeck@cise.ufl.edu -- handle suffixes */
1950         ci = &ct->c_ctinfo;
1951         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1952                         invo_name, ci->ci_type, ci->ci_subtype);
1953         cp = context_find(buffer);
1954         if (cp == NULL || *cp == '\0') {
1955                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1956                                 ci->ci_type);
1957                 cp = context_find(buffer);
1958         }
1959         if (cp != NULL && *cp != '\0') {
1960                 if (ce->ce_unlink) {
1961                         /*
1962                         ** Temporary file already exists, so we rename to
1963                         ** version with extension.
1964                         */
1965                         char *file_org = mh_xstrdup(ce->ce_file);
1966                         ce->ce_file = add(cp, ce->ce_file);
1967                         if (rename(file_org, ce->ce_file)) {
1968                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1969                                                 file_org);
1970                         }
1971                         mh_free0(&file_org);
1972
1973                 } else {
1974                         ce->ce_file = add(cp, ce->ce_file);
1975                 }
1976         }
1977
1978         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1979                 content_error(ce->ce_file, ct,
1980                                 "unable to fopen for reading/writing");
1981                 return NOTOK;
1982         }
1983
1984         if (ct->c_type == CT_MULTIPART) {
1985                 char **ap, **ep;
1986                 CI ci = &ct->c_ctinfo;
1987
1988                 len = 0;
1989                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1990                                 ci->ci_subtype);
1991                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1992                                 strlen(ci->ci_subtype);
1993                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1994                         putc(';', ce->ce_fp);
1995                         len++;
1996
1997                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1998                                         *ap, *ep);
1999
2000                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2001                                 fputs("\n\t", ce->ce_fp);
2002                                 len = 8;
2003                         } else {
2004                                 putc(' ', ce->ce_fp);
2005                                 len++;
2006                         }
2007                         fprintf(ce->ce_fp, "%s", buffer);
2008                         len += cc;
2009                 }
2010
2011                 if (ci->ci_comment) {
2012                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2013                                                 >= CPERLIN) {
2014                                 fputs("\n\t", ce->ce_fp);
2015                                 len = 8;
2016                         } else {
2017                                 putc(' ', ce->ce_fp);
2018                                 len++;
2019                         }
2020                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2021                         len += cc;
2022                 }
2023                 fprintf(ce->ce_fp, "\n");
2024                 if (ct->c_id)
2025                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2026                 if (ct->c_descr)
2027                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2028                 if (ct->c_dispo)
2029                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2030                 fprintf(ce->ce_fp, "\n");
2031         }
2032
2033         if ((len = ct->c_end - ct->c_begin) < 0)
2034                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2035
2036         if (!ct->c_fp) {
2037                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2038                         content_error(ct->c_file, ct,
2039                                         "unable to open for reading");
2040                         return NOTOK;
2041                 }
2042                 own_ct_fp = 1;
2043         }
2044
2045         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2046         while (len > 0)
2047                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2048                 case NOTOK:
2049                         content_error(ct->c_file, ct, "error reading from");
2050                         goto clean_up;
2051
2052                 case OK:
2053                         content_error(NULL, ct, "premature eof");
2054                         goto clean_up;
2055
2056                 default:
2057                         if (cc > len)
2058                                 cc = len;
2059                         len -= cc;
2060
2061                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2062                         if (ferror(ce->ce_fp)) {
2063                                 content_error(ce->ce_file, ct,
2064                                                 "error writing to");
2065                                 goto clean_up;
2066                         }
2067                 }
2068
2069         fseek(ct->c_fp, 0L, SEEK_SET);
2070
2071         if (fflush(ce->ce_fp)) {
2072                 content_error(ce->ce_file, ct, "error writing to");
2073                 goto clean_up;
2074         }
2075
2076         fseek(ce->ce_fp, 0L, SEEK_SET);
2077
2078 ready_to_go:
2079         *file = ce->ce_file;
2080         if (own_ct_fp) {
2081                 fclose(ct->c_fp);
2082                 ct->c_fp = NULL;
2083         }
2084         return fileno(ce->ce_fp);
2085
2086 clean_up:
2087         free_encoding(ct, 0);
2088         if (own_ct_fp) {
2089                 fclose(ct->c_fp);
2090                 ct->c_fp = NULL;
2091         }
2092         return NOTOK;
2093 }