a478b4271f98ff3c4a31aa40ba3d9220fc772e18
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = mh_xstrdup(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD2;;) {
252                 switch (state = m_getfld2(state, &f, in)) {
253                 case LENERR2:
254                         state = FLD2;
255                         /* FALL */
256                 case FLD2:
257                         if (compnum == 1) {
258                                 ct->crlf = f.value[f.valuelen-2] == '\r';
259                         }
260                         compnum++;
261
262                         /* add the header data to the list */
263                         add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
264
265                         ct->c_begin = ftell(in) + 1;
266                         continue;
267
268                 case BODY2:
269                         ct->c_begin = ftell(in) - strlen(f.value);
270                         break;
271
272                 case FILEEOF2:
273                         ct->c_begin = ftell(in);
274                         break;
275
276                 case FMTERR2:
277                         advise(NULL, "message format error in component #%d", compnum);
278                         state = FLD2;
279                         continue;
280
281                 case IOERR2:
282                         adios(EX_IOERR, "m_getfld2", "io error");
283
284                 default:
285                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
286                 }
287                 break;
288         }
289
290         /*
291         ** Read the content headers.  We will parse the
292         ** MIME related header fields into their various
293         ** structures and set internal flags related to
294         ** content type/subtype, etc.
295         */
296
297         hp = ct->c_first_hf;  /* start at first header field */
298         while (hp) {
299                 /* Get MIME-Version field */
300                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
301                         int ucmp;
302                         char c;
303                         unsigned char *cp, *dp;
304
305                         if (ct->c_vrsn) {
306                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
307                                 goto next_header;
308                         }
309                         ct->c_vrsn = mh_xstrdup(hp->value);
310
311                         /* Now, cleanup this field */
312                         cp = ct->c_vrsn;
313
314                         while (isspace(*cp))
315                                 cp++;
316                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
317                                 *dp++ = ' ';
318                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
319                                 if (!isspace(*dp))
320                                         break;
321                         *++dp = '\0';
322                         if (debugsw)
323                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
324
325                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
326                                 goto out;
327
328                         for (dp = cp; istoken(*dp); dp++)
329                                 continue;
330                         c = *dp;
331                         *dp = '\0';
332                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
333                         *dp = c;
334                         if (!ucmp) {
335                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
336                         }
337
338                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
339                         /* Get Content-Type field */
340                         struct str2init *s2i;
341                         CI ci = &ct->c_ctinfo;
342
343                         /* Check if we've already seen a Content-Type header */
344                         if (ct->c_ctline) {
345                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
346                                 goto next_header;
347                         }
348
349                         /* Parse the Content-Type field */
350                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
351                                 goto out;
352
353                         /*
354                         ** Set the Init function and the internal
355                         ** flag for this content type.
356                         */
357                         for (s2i = str2cts; s2i->si_key; s2i++)
358                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
359                                         break;
360                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
361                                 s2i++;
362                         ct->c_type = s2i->si_val;
363                         ct->c_ctinitfnx = s2i->si_init;
364
365                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
366                         /* Get Content-Transfer-Encoding field */
367                         char c;
368                         unsigned char *cp, *dp;
369                         struct str2init *s2i;
370
371                         /*
372                         ** Check if we've already seen the
373                         ** Content-Transfer-Encoding field
374                         */
375                         if (ct->c_celine) {
376                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
377                                 goto next_header;
378                         }
379
380                         /* get copy of this field */
381                         ct->c_celine = cp = mh_xstrdup(hp->value);
382
383                         while (isspace(*cp))
384                                 cp++;
385                         for (dp = cp; istoken(*dp); dp++)
386                                 continue;
387                         c = *dp;
388                         *dp = '\0';
389
390                         /*
391                         ** Find the internal flag and Init function
392                         ** for this transfer encoding.
393                         */
394                         for (s2i = str2ces; s2i->si_key; s2i++)
395                                 if (!mh_strcasecmp(cp, s2i->si_key))
396                                         break;
397                         if (!s2i->si_key && !uprf(cp, "X-"))
398                                 s2i++;
399                         *dp = c;
400                         ct->c_encoding = s2i->si_val;
401
402                         /* Call the Init function for this encoding */
403                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
404                                 goto out;
405
406                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
407                         /* Get Content-ID field */
408                         ct->c_id = add(hp->value, ct->c_id);
409
410                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
411                         /* Get Content-Description field */
412                         ct->c_descr = add(hp->value, ct->c_descr);
413
414                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
415                         /* Get Content-Disposition field */
416                         ct->c_dispo = add(hp->value, ct->c_dispo);
417                 }
418
419 next_header:
420                 hp = hp->next;  /* next header field */
421         }
422
423         /*
424         ** Check if we saw a Content-Type field.
425         ** If not, then assign a default value for
426         ** it, and the Init function.
427         */
428         if (!ct->c_ctline) {
429                 /*
430                 ** If we are inside a multipart/digest message,
431                 ** so default type is message/rfc822
432                 */
433                 if (toplevel < 0) {
434                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
435                                 goto out;
436                         ct->c_type = CT_MESSAGE;
437                         ct->c_ctinitfnx = InitMessage;
438                 } else {
439                         /*
440                         ** Else default type is text/plain
441                         */
442                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
443                                 goto out;
444                         ct->c_type = CT_TEXT;
445                         ct->c_ctinitfnx = InitText;
446                 }
447         }
448
449         /* Use default Transfer-Encoding, if necessary */
450         if (!ct->c_celine) {
451                 ct->c_encoding = CE_7BIT;
452                 Init7Bit(ct);
453         }
454
455         return ct;
456
457 out:
458         free_content(ct);
459         return NULL;
460 }
461
462
463 /*
464 ** small routine to add header field to list
465 */
466
467 int
468 add_header(CT ct, char *name, char *value)
469 {
470         HF hp;
471
472         /* allocate header field structure */
473         hp = mh_xcalloc(1, sizeof(*hp));
474
475         /* link data into header structure */
476         hp->name = name;
477         hp->value = value;
478         hp->next = NULL;
479
480         /* link header structure into the list */
481         if (ct->c_first_hf == NULL) {
482                 ct->c_first_hf = hp;  /* this is the first */
483                 ct->c_last_hf = hp;
484         } else {
485                 ct->c_last_hf->next = hp;  /* add it to the end */
486                 ct->c_last_hf = hp;
487         }
488
489         return 0;
490 }
491
492
493 /*
494 ** Make sure that buf contains at least one appearance of name,
495 ** followed by =.  If not, insert both name and value, just after
496 ** first semicolon, if any.  Note that name should not contain a
497 ** trailing =.  And quotes will be added around the value.  Typical
498 ** usage:  make sure that a Content-Disposition header contains
499 ** filename="foo".  If it doesn't and value does, use value from
500 ** that.
501 */
502 static char *
503 incl_name_value(unsigned char *buf, char *name, char *value) {
504         char *newbuf = buf;
505
506         /* Assume that name is non-null. */
507         if (buf && value) {
508                 char *name_plus_equal = concat(name, "=", NULL);
509
510                 if (!strstr(buf, name_plus_equal)) {
511                         char *insertion;
512                         unsigned char *cp;
513                         char *prefix, *suffix;
514
515                         /* Trim trailing space, esp. newline. */
516                         for (cp = &buf[strlen(buf) - 1];
517                                          cp >= buf && isspace(*cp); --cp) {
518                                 *cp = '\0';
519                         }
520
521                         insertion = concat("; ", name, "=", "\"", value, "\"",
522                                         NULL);
523
524                         /*
525                         ** Insert at first semicolon, if any.
526                         ** If none, append to end.
527                         */
528                         prefix = mh_xstrdup(buf);
529                         if ((cp = strchr(prefix, ';'))) {
530                                 suffix = concat(cp, NULL);
531                                 *cp = '\0';
532                                 newbuf = concat(prefix, insertion, suffix,
533                                                 "\n", NULL);
534                                 mh_free0(&suffix);
535                         } else {
536                                 /* Append to end. */
537                                 newbuf = concat(buf, insertion, "\n", NULL);
538                         }
539
540                         mh_free0(&prefix);
541                         mh_free0(&insertion);
542                         mh_free0(&buf);
543                 }
544
545                 mh_free0(&name_plus_equal);
546         }
547
548         return newbuf;
549 }
550
551 /*
552 ** Extract just name_suffix="foo", if any, from value.  If there isn't
553 ** one, return the entire value.  Note that, for example, a name_suffix
554 ** of name will match filename="foo", and return foo.
555 */
556 char *
557 extract_name_value(char *name_suffix, char *value) {
558         char *extracted_name_value;
559         char *name_suffix_plus_quote;
560         char *name_suffix_equals;
561         char *cp;
562
563         if (!value) {
564                 return value;
565         }
566         extracted_name_value = value;
567         name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
568         name_suffix_equals = strstr(value, name_suffix_plus_quote);
569         mh_free0(&name_suffix_plus_quote);
570         if (name_suffix_equals) {
571                 char *name_suffix_begin;
572
573                 /* Find first \". */
574                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
575                         ;
576                 name_suffix_begin = ++cp;
577                 /* Find second \". */
578                 for (; *cp != '"'; ++cp)
579                         ;
580
581                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
582                 memcpy(extracted_name_value, name_suffix_begin,
583                                 cp - name_suffix_begin);
584                 extracted_name_value[cp - name_suffix_begin] = '\0';
585         }
586
587         return extracted_name_value;
588 }
589
590 /*
591 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
592 ** directives.  Fills in the information of the CTinfo structure.
593 */
594 int
595 get_ctinfo(unsigned char *cp, CT ct, int magic)
596 {
597         int i;
598         unsigned char *dp;
599         char **ap, **ep;
600         char c;
601         CI ci;
602
603         ci = &ct->c_ctinfo;
604         i = strlen(invo_name) + 2;
605
606         /* store copy of Content-Type line */
607         cp = ct->c_ctline = mh_xstrdup(cp);
608
609         while (isspace(*cp))  /* trim leading spaces */
610                 cp++;
611
612         /* change newlines to spaces */
613         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
614                 *dp++ = ' ';
615
616         /* trim trailing spaces */
617         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
618                 if (!isspace(*dp))
619                         break;
620         *++dp = '\0';
621
622         if (debugsw)
623                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
624
625         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
626                 return NOTOK;
627
628         for (dp = cp; istoken(*dp); dp++)
629                 continue;
630         c = *dp, *dp = '\0';
631         ci->ci_type = mh_xstrdup(cp);  /* store content type */
632         *dp = c, cp = dp;
633
634         if (!*ci->ci_type) {
635                 advise(NULL, "invalid %s: field in message %s (empty type)",
636                                 TYPE_FIELD, ct->c_file);
637                 return NOTOK;
638         }
639
640         /* down case the content type string */
641         for (dp = ci->ci_type; *dp; dp++)
642                 if (isalpha(*dp) && isupper(*dp))
643                         *dp = tolower(*dp);
644
645         while (isspace(*cp))
646                 cp++;
647
648         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
649                 return NOTOK;
650
651         if (*cp != '/') {
652                 if (!magic)
653                         ci->ci_subtype = mh_xstrdup("");
654                 goto magic_skip;
655         }
656
657         cp++;
658         while (isspace(*cp))
659                 cp++;
660
661         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
662                 return NOTOK;
663
664         for (dp = cp; istoken(*dp); dp++)
665                 continue;
666         c = *dp, *dp = '\0';
667         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
668         *dp = c, cp = dp;
669
670         if (!*ci->ci_subtype) {
671                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
672                 return NOTOK;
673         }
674
675         /* down case the content subtype string */
676         for (dp = ci->ci_subtype; *dp; dp++)
677                 if (isalpha(*dp) && isupper(*dp))
678                         *dp = tolower(*dp);
679
680 magic_skip:
681         while (isspace(*cp))
682                 cp++;
683
684         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
685                 return NOTOK;
686
687         /*
688         ** Parse attribute/value pairs given with Content-Type
689         */
690         ep = (ap = ci->ci_attrs) + NPARMS;
691         while (*cp == ';') {
692                 char *vp;
693                 unsigned char *up;
694
695                 if (ap >= ep) {
696                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
697                         return NOTOK;
698                 }
699
700                 cp++;
701                 while (isspace(*cp))
702                         cp++;
703
704                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
705                         return NOTOK;
706
707                 if (*cp == 0) {
708                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
709                         return OK;
710                 }
711
712                 /* down case the attribute name */
713                 for (dp = cp; istoken(*dp); dp++)
714                         if (isalpha(*dp) && isupper(*dp))
715                                 *dp = tolower(*dp);
716
717                 for (up = dp; isspace(*dp);)
718                         dp++;
719                 if (dp == cp || *dp != '=') {
720                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
721                         return NOTOK;
722                 }
723
724                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
725                 *vp = '\0';
726                 for (dp++; isspace(*dp);)
727                         dp++;
728
729                 /* now add the attribute value */
730                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
731
732                 if (*dp == '"') {
733                         for (cp = ++dp, dp = vp;;) {
734                                 switch (c = *cp++) {
735                                 case '\0':
736 bad_quote:
737                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
738                                         return NOTOK;
739
740                                 case '\\':
741                                         *dp++ = c;
742                                         if ((c = *cp++) == '\0')
743                                                 goto bad_quote;
744                                         /* else fall... */
745
746                                 default:
747                                         *dp++ = c;
748                                         continue;
749
750                                 case '"':
751                                         *dp = '\0';
752                                         break;
753                                 }
754                                 break;
755                         }
756                 } else {
757                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
758                                 continue;
759                         *dp = '\0';
760                 }
761                 if (!*vp) {
762                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
763                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
764                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
765                         continue;
766                 }
767                 ap++;
768
769                 while (isspace(*cp))
770                         cp++;
771
772                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
773                         return NOTOK;
774         }
775
776         /*
777         ** Get any <Content-Id> given in buffer
778         */
779         if (magic && *cp == '<') {
780                 if (ct->c_id) {
781                         mh_free0(&(ct->c_id));
782                 }
783                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784                         advise(NULL, "invalid ID in message %s", ct->c_file);
785                         return NOTOK;
786                 }
787                 c = *dp;
788                 *dp = '\0';
789                 if (*ct->c_id)
790                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
791                 else
792                         ct->c_id = NULL;
793                 *dp++ = c;
794                 cp = dp;
795
796                 while (isspace(*cp))
797                         cp++;
798         }
799
800         /*
801         ** Get any [Content-Description] given in buffer.
802         */
803         if (magic && *cp == '[') {
804                 ct->c_descr = ++cp;
805                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
806                         if (*dp == ']')
807                                 break;
808                 if (dp < cp) {
809                         advise(NULL, "invalid description in message %s",
810                                         ct->c_file);
811                         ct->c_descr = NULL;
812                         return NOTOK;
813                 }
814
815                 c = *dp;
816                 *dp = '\0';
817                 if (*ct->c_descr)
818                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
819                 else
820                         ct->c_descr = NULL;
821                 *dp++ = c;
822                 cp = dp;
823
824                 while (isspace(*cp))
825                         cp++;
826         }
827
828         /*
829         ** Get any {Content-Disposition} given in buffer.
830         */
831         if (magic && *cp == '{') {
832                 ct->c_dispo = ++cp;
833                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
834                         if (*dp == '}')
835                                 break;
836                 if (dp < cp) {
837                         advise(NULL, "invalid disposition in message %s",
838                                         ct->c_file);
839                         ct->c_dispo = NULL;
840                         return NOTOK;
841                 }
842
843                 c = *dp;
844                 *dp = '\0';
845                 if (*ct->c_dispo)
846                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
847                 else
848                         ct->c_dispo = NULL;
849                 *dp++ = c;
850                 cp = dp;
851
852                 while (isspace(*cp))
853                         cp++;
854         }
855
856         /*
857         ** Check if anything is left over
858         */
859         if (*cp) {
860                 if (magic) {
861                         ci->ci_magic = mh_xstrdup(cp);
862
863                         /*
864                         ** If there is a Content-Disposition header and
865                         ** it doesn't have a *filename=, extract it from
866                         ** the magic contents.  The mhbasename call skips
867                         ** any leading directory components.
868                         */
869                         if (ct->c_dispo)
870                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
871                         } else
872                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
873         }
874
875         return OK;
876 }
877
878
879 static int
880 get_comment(CT ct, unsigned char **ap, int istype)
881 {
882         int i;
883         char *bp;
884         unsigned char *cp;
885         char c, buffer[BUFSIZ], *dp;
886         CI ci;
887
888         ci = &ct->c_ctinfo;
889         cp = *ap;
890         bp = buffer;
891         cp++;
892
893         for (i = 0;;) {
894                 switch (c = *cp++) {
895                 case '\0':
896 invalid:
897                 advise(NULL, "invalid comment in message %s's %s: field",
898                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
899                 return NOTOK;
900
901                 case '\\':
902                         *bp++ = c;
903                         if ((c = *cp++) == '\0')
904                                 goto invalid;
905                         *bp++ = c;
906                         continue;
907
908                 case '(':
909                         i++;
910                         /* and fall... */
911                 default:
912                         *bp++ = c;
913                         continue;
914
915                 case ')':
916                         if (--i < 0)
917                                 break;
918                         *bp++ = c;
919                         continue;
920                 }
921                 break;
922         }
923         *bp = '\0';
924
925         if (istype) {
926                 if ((dp = ci->ci_comment)) {
927                         ci->ci_comment = concat(dp, " ", buffer, NULL);
928                         mh_free0(&dp);
929                 } else {
930                         ci->ci_comment = mh_xstrdup(buffer);
931                 }
932         }
933
934         while (isspace(*cp))
935                 cp++;
936
937         *ap = cp;
938         return OK;
939 }
940
941
942 /*
943 ** CONTENTS
944 **
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
947 */
948
949 static int
950 InitGeneric(CT ct)
951 {
952         return OK;  /* not much to do here */
953 }
954
955
956 /*
957 ** TEXT
958 */
959
960 static int
961 InitText(CT ct)
962 {
963         char **ap, **ep;
964         struct k2v *kv;
965         struct text *t;
966         CI ci = &ct->c_ctinfo;
967
968         /* check for missing subtype */
969         if (!*ci->ci_subtype)
970                 ci->ci_subtype = add("plain", ci->ci_subtype);
971
972         /* match subtype */
973         for (kv = SubText; kv->kv_key; kv++)
974                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
975                         break;
976         ct->c_subtype = kv->kv_value;
977
978         /* allocate text character set structure */
979         t = mh_xcalloc(1, sizeof(*t));
980         ct->c_ctparams = (void *) t;
981
982         /* scan for charset parameter */
983         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
984                 if (!mh_strcasecmp(*ap, "charset"))
985                         break;
986
987         /* check if content specified a character set */
988         if (*ap) {
989                 /* store its name */
990                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
991                 /* match character set or set to CHARSET_UNKNOWN */
992                 for (kv = Charset; kv->kv_key; kv++) {
993                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
994                                 break;
995                         }
996                 }
997                 t->tx_charset = kv->kv_value;
998         } else {
999                 t->tx_charset = CHARSET_UNSPECIFIED;
1000         }
1001
1002         return OK;
1003 }
1004
1005
1006 /*
1007 ** MULTIPART
1008 */
1009
1010 static int
1011 InitMultiPart(CT ct)
1012 {
1013         int inout;
1014         long last, pos;
1015         unsigned char *cp, *dp;
1016         char **ap, **ep;
1017         char *bp, buffer[BUFSIZ];
1018         struct multipart *m;
1019         struct k2v *kv;
1020         struct part *part, **next;
1021         CI ci = &ct->c_ctinfo;
1022         CT p;
1023         FILE *fp;
1024
1025         /*
1026         ** The encoding for multipart messages must be either
1027         ** 7bit, 8bit, or binary (per RFC2045).
1028         */
1029         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1030                 && ct->c_encoding != CE_BINARY) {
1031                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1032                 ct->c_encoding = CE_7BIT;
1033         }
1034
1035         /* match subtype */
1036         for (kv = SubMultiPart; kv->kv_key; kv++)
1037                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1038                         break;
1039         ct->c_subtype = kv->kv_value;
1040
1041         /*
1042         ** Check for "boundary" parameter, which is
1043         ** required for multipart messages.
1044         */
1045         bp = 0;
1046         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1047                 if (!mh_strcasecmp(*ap, "boundary")) {
1048                         bp = *ep;
1049                         break;
1050                 }
1051         }
1052
1053         /* complain if boundary parameter is missing */
1054         if (!*ap) {
1055                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1056                 return NOTOK;
1057         }
1058
1059         /* allocate primary structure for multipart info */
1060         m = mh_xcalloc(1, sizeof(*m));
1061         ct->c_ctparams = (void *) m;
1062
1063         /* check if boundary parameter contains only whitespace characters */
1064         for (cp = bp; isspace(*cp); cp++)
1065                 continue;
1066         if (!*cp) {
1067                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1068                 return NOTOK;
1069         }
1070
1071         /* remove trailing whitespace from boundary parameter */
1072         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1073                 if (!isspace(*dp))
1074                         break;
1075         *++dp = '\0';
1076
1077         /* record boundary separators */
1078         if (!ct->crlf) {
1079                 m->mp_start = concat(bp, "\n", NULL);
1080                 m->mp_stop = concat(bp, "--\n", NULL);
1081         } else {
1082                 m->mp_start = concat(bp, "\r\n", NULL);
1083                 m->mp_stop = concat(bp, "--\r\n", NULL);
1084         }
1085
1086
1087         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1088                 advise(ct->c_file, "unable to open for reading");
1089                 return NOTOK;
1090         }
1091
1092         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1093         last = ct->c_end;
1094         next = &m->mp_parts;
1095         part = NULL;
1096         inout = 1;
1097
1098         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1099                 if (pos > last)
1100                         break;
1101
1102                 pos += strlen(buffer);
1103                 if (buffer[0] != '-' || buffer[1] != '-')
1104                         continue;
1105                 if (inout) {
1106                         if (strcmp(buffer + 2, m->mp_start)!=0)
1107                                 continue;
1108 next_part:
1109                         part = mh_xcalloc(1, sizeof(*part));
1110                         *next = part;
1111                         next = &part->mp_next;
1112
1113                         if (!(p = get_content(fp, ct->c_file,
1114                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1115                                 ct->c_fp = NULL;
1116                                 return NOTOK;
1117                         }
1118                         p->c_fp = NULL;
1119                         part->mp_part = p;
1120                         pos = p->c_begin;
1121                         fseek(fp, pos, SEEK_SET);
1122                         inout = 0;
1123                 } else {
1124                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1125                                 inout = 1;
1126 end_part:
1127                                 p = part->mp_part;
1128                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1129                                 if (p->c_end < p->c_begin)
1130                                         p->c_begin = p->c_end;
1131                                 if (inout)
1132                                         goto next_part;
1133                                 goto last_part;
1134                         } else {
1135                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1136                                         goto end_part;
1137                         }
1138                 }
1139         }
1140
1141         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1142         if (!inout && part) {
1143                 p = part->mp_part;
1144                 p->c_end = ct->c_end;
1145
1146                 if (p->c_begin >= p->c_end) {
1147                         for (next = &m->mp_parts; *next != part;
1148                                 next = &((*next)->mp_next))
1149                                 continue;
1150                         *next = NULL;
1151                         free_content(p);
1152                         mh_free0(&part);
1153                 }
1154         }
1155
1156 last_part:
1157         /* reverse the order of the parts for multipart/alternative */
1158         if (ct->c_subtype == MULTI_ALTERNATE)
1159                 reverse_parts(ct);
1160
1161         /*
1162         ** label all subparts with part number, and
1163         ** then initialize the content of the subpart.
1164         */
1165         {
1166                 int partnum;
1167                 char *pp;
1168                 char partnam[BUFSIZ];
1169
1170                 if (ct->c_partno) {
1171                         snprintf(partnam, sizeof(partnam), "%s.",
1172                                         ct->c_partno);
1173                         pp = partnam + strlen(partnam);
1174                 } else {
1175                         pp = partnam;
1176                 }
1177
1178                 for (part = m->mp_parts, partnum = 1; part;
1179                         part = part->mp_next, partnum++) {
1180                         p = part->mp_part;
1181
1182                         sprintf(pp, "%d", partnum);
1183                         p->c_partno = mh_xstrdup(partnam);
1184
1185                         /* initialize the content of the subparts */
1186                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1187                                 fclose(ct->c_fp);
1188                                 ct->c_fp = NULL;
1189                                 return NOTOK;
1190                         }
1191                 }
1192         }
1193
1194         fclose(ct->c_fp);
1195         ct->c_fp = NULL;
1196         return OK;
1197 }
1198
1199
1200 /*
1201 ** reverse the order of the parts of a multipart
1202 */
1203
1204 static void
1205 reverse_parts(CT ct)
1206 {
1207         int i;
1208         struct multipart *m;
1209         struct part **base, **bmp, **next, *part;
1210
1211         m = (struct multipart *) ct->c_ctparams;
1212
1213         /* if only one part, just return */
1214         if (!m->mp_parts || !m->mp_parts->mp_next)
1215                 return;
1216
1217         /* count number of parts */
1218         i = 0;
1219         for (part = m->mp_parts; part; part = part->mp_next)
1220                 i++;
1221
1222         /* allocate array of pointers to the parts */
1223         base = mh_xcalloc(i + 1, sizeof(*base));
1224         bmp = base;
1225
1226         /* point at all the parts */
1227         for (part = m->mp_parts; part; part = part->mp_next)
1228                 *bmp++ = part;
1229         *bmp = NULL;
1230
1231         /* reverse the order of the parts */
1232         next = &m->mp_parts;
1233         for (bmp--; bmp >= base; bmp--) {
1234                 part = *bmp;
1235                 *next = part;
1236                 next = &part->mp_next;
1237         }
1238         *next = NULL;
1239
1240         /* free array of pointers */
1241         mh_free0(&base);
1242 }
1243
1244
1245 /*
1246 ** MESSAGE
1247 */
1248
1249 static int
1250 InitMessage(CT ct)
1251 {
1252         struct k2v *kv;
1253         CI ci = &ct->c_ctinfo;
1254
1255         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT) && (ct->c_encoding != CE_BINARY)) {
1256                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1257                 ct->c_encoding = CE_7BIT;
1258         }
1259
1260         /* check for missing subtype */
1261         if (!*ci->ci_subtype)
1262                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1263
1264         /* match subtype */
1265         for (kv = SubMessage; kv->kv_key; kv++)
1266                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1267                         break;
1268         ct->c_subtype = kv->kv_value;
1269
1270         switch (ct->c_subtype) {
1271         case MESSAGE_RFC822:
1272                 break;
1273
1274         case MESSAGE_PARTIAL:
1275                 {
1276                 char **ap, **ep;
1277                 struct partial *p;
1278
1279                 p = mh_xcalloc(1, sizeof(*p));
1280                 ct->c_ctparams = (void *) p;
1281
1282                 /*
1283                 ** scan for parameters "id", "number",
1284                 ** and "total"
1285                 */
1286                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1287                         if (!mh_strcasecmp(*ap, "id")) {
1288                                 p->pm_partid = mh_xstrdup(*ep);
1289                                 continue;
1290                         }
1291                         if (!mh_strcasecmp(*ap, "number")) {
1292                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1293 invalid_param:
1294                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1295                                         return NOTOK;
1296                                 }
1297                                 continue;
1298                         }
1299                         if (!mh_strcasecmp(*ap, "total")) {
1300                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1301                                                 p->pm_maxno < 1)
1302                                         goto invalid_param;
1303                                 continue;
1304                         }
1305                 }
1306
1307                 if (!p->pm_partid || !p->pm_partno
1308                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1309                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1310                         return NOTOK;
1311                 }
1312                 }
1313                 break;
1314
1315         case MESSAGE_EXTERNAL:
1316                 {
1317                 CT p;
1318                 FILE *fp;
1319
1320                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1321                         advise(ct->c_file, "unable to open for reading");
1322                         return NOTOK;
1323                 }
1324
1325                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1326
1327                 if (!(p = get_content(fp, ct->c_file, 0))) {
1328                         ct->c_fp = NULL;
1329                         return NOTOK;
1330                 }
1331
1332                 p->c_fp = NULL;
1333                 p->c_end = p->c_begin;
1334
1335                 fclose(ct->c_fp);
1336                 ct->c_fp = NULL;
1337
1338                 switch (p->c_type) {
1339                 case CT_MULTIPART:
1340                         break;
1341
1342                 case CT_MESSAGE:
1343                         if (p->c_subtype != MESSAGE_RFC822)
1344                                 break;
1345                         /* else fall... */
1346                 default:
1347                         if (p->c_ctinitfnx)
1348                                 (*p->c_ctinitfnx) (p);
1349                         break;
1350                 }
1351                 }
1352                 break;
1353
1354         default:
1355                 break;
1356         }
1357
1358         return OK;
1359 }
1360
1361
1362 /*
1363 ** APPLICATION
1364 */
1365
1366 static int
1367 InitApplication(CT ct)
1368 {
1369         struct k2v *kv;
1370         CI ci = &ct->c_ctinfo;
1371
1372         /* match subtype */
1373         for (kv = SubApplication; kv->kv_key; kv++)
1374                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1375                         break;
1376         ct->c_subtype = kv->kv_value;
1377
1378         return OK;
1379 }
1380
1381
1382 /*
1383 ** TRANSFER ENCODINGS
1384 */
1385
1386 static int
1387 init_encoding(CT ct, OpenCEFunc openfnx)
1388 {
1389         CE ce;
1390
1391         ce = mh_xcalloc(1, sizeof(*ce));
1392
1393         ct->c_cefile     = ce;
1394         ct->c_ceopenfnx  = openfnx;
1395         ct->c_ceclosefnx = close_encoding;
1396         ct->c_cesizefnx  = size_encoding;
1397
1398         return OK;
1399 }
1400
1401
1402 void
1403 close_encoding(CT ct)
1404 {
1405         CE ce;
1406
1407         if (!(ce = ct->c_cefile))
1408                 return;
1409
1410         if (ce->ce_fp) {
1411                 fclose(ce->ce_fp);
1412                 ce->ce_fp = NULL;
1413         }
1414 }
1415
1416
1417 static unsigned long
1418 size_encoding(CT ct)
1419 {
1420         int fd;
1421         unsigned long size;
1422         char *file;
1423         CE ce;
1424         struct stat st;
1425
1426         if (!(ce = ct->c_cefile))
1427                 return (ct->c_end - ct->c_begin);
1428
1429         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1430                 return (long) st.st_size;
1431
1432         if (ce->ce_file) {
1433                 if (stat(ce->ce_file, &st) != NOTOK)
1434                         return (long) st.st_size;
1435                 else
1436                         return 0L;
1437         }
1438
1439         if (ct->c_encoding == CE_EXTERNAL)
1440                 return (ct->c_end - ct->c_begin);
1441
1442         file = NULL;
1443         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1444                 return (ct->c_end - ct->c_begin);
1445
1446         if (fstat(fd, &st) != NOTOK)
1447                 size = (long) st.st_size;
1448         else
1449                 size = 0L;
1450
1451         (*ct->c_ceclosefnx) (ct);
1452         return size;
1453 }
1454
1455
1456 /*
1457 ** BASE64
1458 */
1459
1460 static unsigned char b642nib[0x80] = {
1461         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1462         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1467         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1468         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1470         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1471         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1472         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1473         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1474         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1475         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1476         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1477 };
1478
1479
1480 static int
1481 InitBase64(CT ct)
1482 {
1483         return init_encoding(ct, openBase64);
1484 }
1485
1486
1487 static int
1488 openBase64(CT ct, char **file)
1489 {
1490         int bitno, cc;
1491         int fd, len, skip, own_ct_fp = 0;
1492         unsigned long bits;
1493         unsigned char value, *b, *b1, *b2, *b3;
1494         unsigned char *cp, *ep;
1495         char buffer[BUFSIZ];
1496         /* sbeck -- handle suffixes */
1497         CI ci;
1498         CE ce;
1499
1500         b  = (unsigned char *) &bits;
1501         b1 = &b[endian > 0 ? 1 : 2];
1502         b2 = &b[endian > 0 ? 2 : 1];
1503         b3 = &b[endian > 0 ? 3 : 0];
1504
1505         ce = ct->c_cefile;
1506         if (ce->ce_fp) {
1507                 fseek(ce->ce_fp, 0L, SEEK_SET);
1508                 goto ready_to_go;
1509         }
1510
1511         if (ce->ce_file) {
1512                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1513                         content_error(ce->ce_file, ct,
1514                                         "unable to fopen for reading");
1515                         return NOTOK;
1516                 }
1517                 goto ready_to_go;
1518         }
1519
1520         if (*file == NULL) {
1521                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1522                 ce->ce_unlink = 1;
1523         } else {
1524                 ce->ce_file = mh_xstrdup(*file);
1525                 ce->ce_unlink = 0;
1526         }
1527
1528         /* sbeck@cise.ufl.edu -- handle suffixes */
1529         ci = &ct->c_ctinfo;
1530         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1531                         invo_name, ci->ci_type, ci->ci_subtype);
1532         cp = context_find(buffer);
1533         if (cp == NULL || *cp == '\0') {
1534                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1535                                 ci->ci_type);
1536                 cp = context_find(buffer);
1537         }
1538         if (cp != NULL && *cp != '\0') {
1539                 if (ce->ce_unlink) {
1540                         /*
1541                         ** Temporary file already exists, so we rename to
1542                         ** version with extension.
1543                         */
1544                         char *file_org = mh_xstrdup(ce->ce_file);
1545                         ce->ce_file = add(cp, ce->ce_file);
1546                         if (rename(file_org, ce->ce_file)) {
1547                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1548                                                 file_org);
1549                         }
1550                         mh_free0(&file_org);
1551
1552                 } else {
1553                         ce->ce_file = add(cp, ce->ce_file);
1554                 }
1555         }
1556
1557         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1558                 content_error(ce->ce_file, ct,
1559                                 "unable to fopen for reading/writing");
1560                 return NOTOK;
1561         }
1562
1563         if ((len = ct->c_end - ct->c_begin) < 0)
1564                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1565
1566         if (!ct->c_fp) {
1567                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1568                         content_error(ct->c_file, ct,
1569                                         "unable to open for reading");
1570                         return NOTOK;
1571                 }
1572                 own_ct_fp = 1;
1573         }
1574
1575         bitno = 18;
1576         bits = 0L;
1577         skip = 0;
1578
1579         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1580         while (len > 0) {
1581                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1582                 case NOTOK:
1583                         content_error(ct->c_file, ct, "error reading from");
1584                         goto clean_up;
1585
1586                 case OK:
1587                         content_error(NULL, ct, "premature eof");
1588                         goto clean_up;
1589
1590                 default:
1591                         if (cc > len)
1592                                 cc = len;
1593                         len -= cc;
1594
1595                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1596                                 switch (*cp) {
1597                                 default:
1598                                         if (isspace(*cp))
1599                                                 break;
1600                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1601                                                 if (debugsw) {
1602                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1603                                                 }
1604                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1605                                                 continue;
1606                                         }
1607
1608                                         bits |= value << bitno;
1609 test_end:
1610                                         if ((bitno -= 6) < 0) {
1611                                                 putc((char) *b1, ce->ce_fp);
1612                                                 if (skip < 2) {
1613                                                         putc((char) *b2, ce->ce_fp);
1614                                                         if (skip < 1) {
1615                                                                 putc((char) *b3, ce->ce_fp);
1616                                                         }
1617                                                 }
1618
1619                                                 if (ferror(ce->ce_fp)) {
1620                                                         content_error(ce->ce_file, ct,
1621                                                                                    "error writing to");
1622                                                         goto clean_up;
1623                                                 }
1624                                                 bitno = 18, bits = 0L, skip = 0;
1625                                         }
1626                                         break;
1627
1628                                 case '=':
1629                                         if (++skip > 3)
1630                                                 goto self_delimiting;
1631                                         goto test_end;
1632                                 }
1633                         }
1634                 }
1635         }
1636
1637         if (bitno != 18) {
1638                 if (debugsw)
1639                         fprintf(stderr, "premature ending (bitno %d)\n",
1640                                         bitno);
1641
1642                 content_error(NULL, ct, "invalid BASE64 encoding");
1643                 goto clean_up;
1644         }
1645
1646 self_delimiting:
1647         fseek(ct->c_fp, 0L, SEEK_SET);
1648
1649         if (fflush(ce->ce_fp)) {
1650                 content_error(ce->ce_file, ct, "error writing to");
1651                 goto clean_up;
1652         }
1653
1654         fseek(ce->ce_fp, 0L, SEEK_SET);
1655
1656 ready_to_go:
1657         *file = ce->ce_file;
1658         if (own_ct_fp) {
1659                 fclose(ct->c_fp);
1660                 ct->c_fp = NULL;
1661         }
1662         return fileno(ce->ce_fp);
1663
1664 clean_up:
1665         free_encoding(ct, 0);
1666         if (own_ct_fp) {
1667                 fclose(ct->c_fp);
1668                 ct->c_fp = NULL;
1669         }
1670         return NOTOK;
1671 }
1672
1673
1674 /*
1675 ** QUOTED PRINTABLE
1676 */
1677
1678 static char hex2nib[0x80] = {
1679         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1686         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1695 };
1696
1697
1698 static int
1699 InitQuoted(CT ct)
1700 {
1701         return init_encoding(ct, openQuoted);
1702 }
1703
1704
1705 static int
1706 openQuoted(CT ct, char **file)
1707 {
1708         int cc, len, quoted, own_ct_fp = 0;
1709         unsigned char *cp, *ep;
1710         char buffer[BUFSIZ];
1711         unsigned char mask = 0;
1712         CE ce;
1713         /* sbeck -- handle suffixes */
1714         CI ci;
1715
1716         ce = ct->c_cefile;
1717         if (ce->ce_fp) {
1718                 fseek(ce->ce_fp, 0L, SEEK_SET);
1719                 goto ready_to_go;
1720         }
1721
1722         if (ce->ce_file) {
1723                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1724                         content_error(ce->ce_file, ct,
1725                                         "unable to fopen for reading");
1726                         return NOTOK;
1727                 }
1728                 goto ready_to_go;
1729         }
1730
1731         if (*file == NULL) {
1732                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1733                 ce->ce_unlink = 1;
1734         } else {
1735                 ce->ce_file = mh_xstrdup(*file);
1736                 ce->ce_unlink = 0;
1737         }
1738
1739         /* sbeck@cise.ufl.edu -- handle suffixes */
1740         ci = &ct->c_ctinfo;
1741         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1742                         invo_name, ci->ci_type, ci->ci_subtype);
1743         cp = context_find(buffer);
1744         if (cp == NULL || *cp == '\0') {
1745                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1746                                 ci->ci_type);
1747                 cp = context_find(buffer);
1748         }
1749         if (cp != NULL && *cp != '\0') {
1750                 if (ce->ce_unlink) {
1751                         /*
1752                         ** Temporary file already exists, so we rename to
1753                         ** version with extension.
1754                         */
1755                         char *file_org = mh_xstrdup(ce->ce_file);
1756                         ce->ce_file = add(cp, ce->ce_file);
1757                         if (rename(file_org, ce->ce_file)) {
1758                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1759                                                 file_org);
1760                         }
1761                         mh_free0(&file_org);
1762
1763                 } else {
1764                         ce->ce_file = add(cp, ce->ce_file);
1765                 }
1766         }
1767
1768         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1769                 content_error(ce->ce_file, ct,
1770                                 "unable to fopen for reading/writing");
1771                 return NOTOK;
1772         }
1773
1774         if ((len = ct->c_end - ct->c_begin) < 0)
1775                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1776
1777         if (!ct->c_fp) {
1778                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1779                         content_error(ct->c_file, ct,
1780                                         "unable to open for reading");
1781                         return NOTOK;
1782                 }
1783                 own_ct_fp = 1;
1784         }
1785
1786         quoted = 0;
1787
1788         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1789         while (len > 0) {
1790                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1791                         content_error(NULL, ct, "premature eof");
1792                         goto clean_up;
1793                 }
1794
1795                 if ((cc = strlen(buffer)) > len)
1796                         cc = len;
1797                 len -= cc;
1798
1799                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1800                         if (!isspace(*ep))
1801                                 break;
1802                 *++ep = '\n', ep++;
1803
1804                 for (; cp < ep; cp++) {
1805                         if (quoted > 0) {
1806                                 /* in an escape sequence */
1807                                 if (quoted == 1) {
1808                                         /* at byte 1 of an escape sequence */
1809                                         mask = hex2nib[*cp & 0x7f];
1810                                         /* next is byte 2 */
1811                                         quoted = 2;
1812                                 } else {
1813                                         /* at byte 2 of an escape sequence */
1814                                         mask <<= 4;
1815                                         mask |= hex2nib[*cp & 0x7f];
1816                                         putc(mask, ce->ce_fp);
1817                                         if (ferror(ce->ce_fp)) {
1818                                                 content_error(ce->ce_file, ct, "error writing to");
1819                                                 goto clean_up;
1820                                         }
1821                                         /*
1822                                         ** finished escape sequence; next may
1823                                         ** be literal or a new escape sequence
1824                                         */
1825                                         quoted = 0;
1826                                 }
1827                                 /* on to next byte */
1828                                 continue;
1829                         }
1830
1831                         /* not in an escape sequence */
1832                         if (*cp == '=') {
1833                                 /*
1834                                 ** starting an escape sequence,
1835                                 ** or invalid '='?
1836                                 */
1837                                 if (cp + 1 < ep && cp[1] == '\n') {
1838                                         /* "=\n" soft line break, eat the \n */
1839                                         cp++;
1840                                         continue;
1841                                 }
1842                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1843                                         /*
1844                                         ** We don't have 2 bytes left,
1845                                         ** so this is an invalid escape
1846                                         ** sequence; just show the raw bytes
1847                                         ** (below).
1848                                         */
1849                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1850                                         /*
1851                                         ** Next 2 bytes are hex digits,
1852                                         ** making this a valid escape
1853                                         ** sequence; let's decode it (above).
1854                                         */
1855                                         quoted = 1;
1856                                         continue;
1857                                 } else {
1858                                         /*
1859                                         ** One or both of the next 2 is
1860                                         ** out of range, making this an
1861                                         ** invalid escape sequence; just
1862                                         ** show the raw bytes (below).
1863                                         */
1864                                 }
1865                         }
1866
1867                         /* Just show the raw byte. */
1868                         putc(*cp, ce->ce_fp);
1869                         if (ferror(ce->ce_fp)) {
1870                                 content_error(ce->ce_file, ct,
1871                                                 "error writing to");
1872                                 goto clean_up;
1873                         }
1874                 }
1875         }
1876         if (quoted) {
1877                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1878                 goto clean_up;
1879         }
1880
1881         fseek(ct->c_fp, 0L, SEEK_SET);
1882
1883         if (fflush(ce->ce_fp)) {
1884                 content_error(ce->ce_file, ct, "error writing to");
1885                 goto clean_up;
1886         }
1887
1888         fseek(ce->ce_fp, 0L, SEEK_SET);
1889
1890 ready_to_go:
1891         *file = ce->ce_file;
1892         if (own_ct_fp) {
1893                 fclose(ct->c_fp);
1894                 ct->c_fp = NULL;
1895         }
1896         return fileno(ce->ce_fp);
1897
1898 clean_up:
1899         free_encoding(ct, 0);
1900         if (own_ct_fp) {
1901                 fclose(ct->c_fp);
1902                 ct->c_fp = NULL;
1903         }
1904         return NOTOK;
1905 }
1906
1907
1908 /*
1909 ** 7BIT
1910 */
1911
1912 static int
1913 Init7Bit(CT ct)
1914 {
1915         if (init_encoding(ct, open7Bit) == NOTOK)
1916                 return NOTOK;
1917
1918         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1919         return OK;
1920 }
1921
1922
1923 int
1924 open7Bit(CT ct, char **file)
1925 {
1926         int cc, fd, len, own_ct_fp = 0;
1927         char buffer[BUFSIZ];
1928         /* sbeck -- handle suffixes */
1929         char *cp;
1930         CI ci;
1931         CE ce;
1932
1933         ce = ct->c_cefile;
1934         if (ce->ce_fp) {
1935                 fseek(ce->ce_fp, 0L, SEEK_SET);
1936                 goto ready_to_go;
1937         }
1938
1939         if (ce->ce_file) {
1940                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1941                         content_error(ce->ce_file, ct,
1942                                         "unable to fopen for reading");
1943                         return NOTOK;
1944                 }
1945                 goto ready_to_go;
1946         }
1947
1948         if (*file == NULL) {
1949                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1950                 ce->ce_unlink = 1;
1951         } else {
1952                 ce->ce_file = mh_xstrdup(*file);
1953                 ce->ce_unlink = 0;
1954         }
1955
1956         /* sbeck@cise.ufl.edu -- handle suffixes */
1957         ci = &ct->c_ctinfo;
1958         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1959                         invo_name, ci->ci_type, ci->ci_subtype);
1960         cp = context_find(buffer);
1961         if (cp == NULL || *cp == '\0') {
1962                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1963                                 ci->ci_type);
1964                 cp = context_find(buffer);
1965         }
1966         if (cp != NULL && *cp != '\0') {
1967                 if (ce->ce_unlink) {
1968                         /*
1969                         ** Temporary file already exists, so we rename to
1970                         ** version with extension.
1971                         */
1972                         char *file_org = mh_xstrdup(ce->ce_file);
1973                         ce->ce_file = add(cp, ce->ce_file);
1974                         if (rename(file_org, ce->ce_file)) {
1975                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1976                                                 file_org);
1977                         }
1978                         mh_free0(&file_org);
1979
1980                 } else {
1981                         ce->ce_file = add(cp, ce->ce_file);
1982                 }
1983         }
1984
1985         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1986                 content_error(ce->ce_file, ct,
1987                                 "unable to fopen for reading/writing");
1988                 return NOTOK;
1989         }
1990
1991         if (ct->c_type == CT_MULTIPART) {
1992                 char **ap, **ep;
1993                 CI ci = &ct->c_ctinfo;
1994
1995                 len = 0;
1996                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1997                                 ci->ci_subtype);
1998                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1999                                 strlen(ci->ci_subtype);
2000                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2001                         putc(';', ce->ce_fp);
2002                         len++;
2003
2004                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2005                                         *ap, *ep);
2006
2007                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2008                                 fputs("\n\t", ce->ce_fp);
2009                                 len = 8;
2010                         } else {
2011                                 putc(' ', ce->ce_fp);
2012                                 len++;
2013                         }
2014                         fprintf(ce->ce_fp, "%s", buffer);
2015                         len += cc;
2016                 }
2017
2018                 if (ci->ci_comment) {
2019                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2020                                                 >= CPERLIN) {
2021                                 fputs("\n\t", ce->ce_fp);
2022                                 len = 8;
2023                         } else {
2024                                 putc(' ', ce->ce_fp);
2025                                 len++;
2026                         }
2027                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2028                         len += cc;
2029                 }
2030                 fprintf(ce->ce_fp, "\n");
2031                 if (ct->c_id)
2032                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2033                 if (ct->c_descr)
2034                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2035                 if (ct->c_dispo)
2036                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2037                 fprintf(ce->ce_fp, "\n");
2038         }
2039
2040         if ((len = ct->c_end - ct->c_begin) < 0)
2041                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2042
2043         if (!ct->c_fp) {
2044                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2045                         content_error(ct->c_file, ct,
2046                                         "unable to open for reading");
2047                         return NOTOK;
2048                 }
2049                 own_ct_fp = 1;
2050         }
2051
2052         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2053         while (len > 0)
2054                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2055                 case NOTOK:
2056                         content_error(ct->c_file, ct, "error reading from");
2057                         goto clean_up;
2058
2059                 case OK:
2060                         content_error(NULL, ct, "premature eof");
2061                         goto clean_up;
2062
2063                 default:
2064                         if (cc > len)
2065                                 cc = len;
2066                         len -= cc;
2067
2068                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2069                         if (ferror(ce->ce_fp)) {
2070                                 content_error(ce->ce_file, ct,
2071                                                 "error writing to");
2072                                 goto clean_up;
2073                         }
2074                 }
2075
2076         fseek(ct->c_fp, 0L, SEEK_SET);
2077
2078         if (fflush(ce->ce_fp)) {
2079                 content_error(ce->ce_file, ct, "error writing to");
2080                 goto clean_up;
2081         }
2082
2083         fseek(ce->ce_fp, 0L, SEEK_SET);
2084
2085 ready_to_go:
2086         *file = ce->ce_file;
2087         if (own_ct_fp) {
2088                 fclose(ct->c_fp);
2089                 ct->c_fp = NULL;
2090         }
2091         return fileno(ce->ce_fp);
2092
2093 clean_up:
2094         free_encoding(ct, 0);
2095         if (own_ct_fp) {
2096                 fclose(ct->c_fp);
2097                 ct->c_fp = NULL;
2098         }
2099         return NOTOK;
2100 }