Replace mh_xmalloc() with mh_xcalloc()
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         int compnum, state;
235         char buf[BUFSIZ], name[NAMESZ];
236         char *np, *vp;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = (CT) mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = getcpy(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD;;) {
252                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
253                 case FLD:
254                 case FLDPLUS:
255                         compnum++;
256
257                         /* get copies of the buffers */
258                         np = getcpy(name);
259                         vp = getcpy(buf);
260
261                         /* if necessary, get rest of field */
262                         while (state == FLDPLUS) {
263                                 state = m_getfld(state, name, buf,
264                                                 sizeof(buf), in);
265                                 vp = add(buf, vp);  /* add to previous value */
266                         }
267
268                         /* Now add the header data to the list */
269                         add_header(ct, np, vp);
270
271                         ct->c_begin = ftell(in) + 1;
272                         continue;
273
274                 case BODY:
275                         ct->c_begin = ftell(in) - strlen(buf);
276                         break;
277
278                 case FILEEOF:
279                         ct->c_begin = ftell(in);
280                         break;
281
282                 case LENERR:
283                 case FMTERR:
284                         adios(EX_DATAERR, NULL, "message format error in component #%d",
285                                         compnum);
286
287                 default:
288                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
289                 }
290
291                 /* break out of the loop */
292                 break;
293         }
294
295         /*
296         ** Read the content headers.  We will parse the
297         ** MIME related header fields into their various
298         ** structures and set internal flags related to
299         ** content type/subtype, etc.
300         */
301
302         hp = ct->c_first_hf;  /* start at first header field */
303         while (hp) {
304                 /* Get MIME-Version field */
305                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
306                         int ucmp;
307                         char c;
308                         unsigned char *cp, *dp;
309
310                         if (ct->c_vrsn) {
311                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
312                                 goto next_header;
313                         }
314                         ct->c_vrsn = getcpy(hp->value);
315
316                         /* Now, cleanup this field */
317                         cp = ct->c_vrsn;
318
319                         while (isspace(*cp))
320                                 cp++;
321                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
322                                 *dp++ = ' ';
323                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
324                                 if (!isspace(*dp))
325                                         break;
326                         *++dp = '\0';
327                         if (debugsw)
328                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
329
330                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
331                                 goto out;
332
333                         for (dp = cp; istoken(*dp); dp++)
334                                 continue;
335                         c = *dp;
336                         *dp = '\0';
337                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
338                         *dp = c;
339                         if (!ucmp) {
340                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
341                         }
342
343                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
344                         /* Get Content-Type field */
345                         struct str2init *s2i;
346                         CI ci = &ct->c_ctinfo;
347
348                         /* Check if we've already seen a Content-Type header */
349                         if (ct->c_ctline) {
350                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
351                                 goto next_header;
352                         }
353
354                         /* Parse the Content-Type field */
355                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
356                                 goto out;
357
358                         /*
359                         ** Set the Init function and the internal
360                         ** flag for this content type.
361                         */
362                         for (s2i = str2cts; s2i->si_key; s2i++)
363                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
364                                         break;
365                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
366                                 s2i++;
367                         ct->c_type = s2i->si_val;
368                         ct->c_ctinitfnx = s2i->si_init;
369
370                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
371                         /* Get Content-Transfer-Encoding field */
372                         char c;
373                         unsigned char *cp, *dp;
374                         struct str2init *s2i;
375
376                         /*
377                         ** Check if we've already seen the
378                         ** Content-Transfer-Encoding field
379                         */
380                         if (ct->c_celine) {
381                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
382                                 goto next_header;
383                         }
384
385                         /* get copy of this field */
386                         ct->c_celine = cp = getcpy(hp->value);
387
388                         while (isspace(*cp))
389                                 cp++;
390                         for (dp = cp; istoken(*dp); dp++)
391                                 continue;
392                         c = *dp;
393                         *dp = '\0';
394
395                         /*
396                         ** Find the internal flag and Init function
397                         ** for this transfer encoding.
398                         */
399                         for (s2i = str2ces; s2i->si_key; s2i++)
400                                 if (!mh_strcasecmp(cp, s2i->si_key))
401                                         break;
402                         if (!s2i->si_key && !uprf(cp, "X-"))
403                                 s2i++;
404                         *dp = c;
405                         ct->c_encoding = s2i->si_val;
406
407                         /* Call the Init function for this encoding */
408                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
409                                 goto out;
410
411                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
412                         /* Get Content-ID field */
413                         ct->c_id = add(hp->value, ct->c_id);
414
415                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
416                         /* Get Content-Description field */
417                         ct->c_descr = add(hp->value, ct->c_descr);
418
419                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
420                         /* Get Content-Disposition field */
421                         ct->c_dispo = add(hp->value, ct->c_dispo);
422                 }
423
424 next_header:
425                 hp = hp->next;  /* next header field */
426         }
427
428         /*
429         ** Check if we saw a Content-Type field.
430         ** If not, then assign a default value for
431         ** it, and the Init function.
432         */
433         if (!ct->c_ctline) {
434                 /*
435                 ** If we are inside a multipart/digest message,
436                 ** so default type is message/rfc822
437                 */
438                 if (toplevel < 0) {
439                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
440                                 goto out;
441                         ct->c_type = CT_MESSAGE;
442                         ct->c_ctinitfnx = InitMessage;
443                 } else {
444                         /*
445                         ** Else default type is text/plain
446                         */
447                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
448                                 goto out;
449                         ct->c_type = CT_TEXT;
450                         ct->c_ctinitfnx = InitText;
451                 }
452         }
453
454         /* Use default Transfer-Encoding, if necessary */
455         if (!ct->c_celine) {
456                 ct->c_encoding = CE_7BIT;
457                 Init7Bit(ct);
458         }
459
460         return ct;
461
462 out:
463         free_content(ct);
464         return NULL;
465 }
466
467
468 /*
469 ** small routine to add header field to list
470 */
471
472 int
473 add_header(CT ct, char *name, char *value)
474 {
475         HF hp;
476
477         /* allocate header field structure */
478         hp = mh_xcalloc(1, sizeof(*hp));
479
480         /* link data into header structure */
481         hp->name = name;
482         hp->value = value;
483         hp->next = NULL;
484
485         /* link header structure into the list */
486         if (ct->c_first_hf == NULL) {
487                 ct->c_first_hf = hp;  /* this is the first */
488                 ct->c_last_hf = hp;
489         } else {
490                 ct->c_last_hf->next = hp;  /* add it to the end */
491                 ct->c_last_hf = hp;
492         }
493
494         return 0;
495 }
496
497
498 /*
499 ** Make sure that buf contains at least one appearance of name,
500 ** followed by =.  If not, insert both name and value, just after
501 ** first semicolon, if any.  Note that name should not contain a
502 ** trailing =.  And quotes will be added around the value.  Typical
503 ** usage:  make sure that a Content-Disposition header contains
504 ** filename="foo".  If it doesn't and value does, use value from
505 ** that.
506 */
507 static char *
508 incl_name_value(unsigned char *buf, char *name, char *value) {
509         char *newbuf = buf;
510
511         /* Assume that name is non-null. */
512         if (buf && value) {
513                 char *name_plus_equal = concat(name, "=", NULL);
514
515                 if (!strstr(buf, name_plus_equal)) {
516                         char *insertion;
517                         unsigned char *cp;
518                         char *prefix, *suffix;
519
520                         /* Trim trailing space, esp. newline. */
521                         for (cp = &buf[strlen(buf) - 1];
522                                          cp >= buf && isspace(*cp); --cp) {
523                                 *cp = '\0';
524                         }
525
526                         insertion = concat("; ", name, "=", "\"", value, "\"",
527                                         NULL);
528
529                         /*
530                         ** Insert at first semicolon, if any.
531                         ** If none, append to end.
532                         */
533                         prefix = getcpy(buf);
534                         if ((cp = strchr(prefix, ';'))) {
535                                 suffix = concat(cp, NULL);
536                                 *cp = '\0';
537                                 newbuf = concat(prefix, insertion, suffix,
538                                                 "\n", NULL);
539                                 free(suffix);
540                         } else {
541                                 /* Append to end. */
542                                 newbuf = concat(buf, insertion, "\n", NULL);
543                         }
544
545                         free(prefix);
546                         free(insertion);
547                         free(buf);
548                 }
549
550                 free(name_plus_equal);
551         }
552
553         return newbuf;
554 }
555
556 /*
557 ** Extract just name_suffix="foo", if any, from value.  If there isn't
558 ** one, return the entire value.  Note that, for example, a name_suffix
559 ** of name will match filename="foo", and return foo.
560 */
561 static char *
562 extract_name_value(char *name_suffix, char *value) {
563         char *extracted_name_value = value;
564         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
565         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
566         char *cp;
567
568         free(name_suffix_plus_quote);
569         if (name_suffix_equals) {
570                 char *name_suffix_begin;
571
572                 /* Find first \". */
573                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
574                         ;
575                 name_suffix_begin = ++cp;
576                 /* Find second \". */
577                 for (; *cp != '"'; ++cp)
578                         ;
579
580                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
581                 memcpy(extracted_name_value, name_suffix_begin,
582                                 cp - name_suffix_begin);
583                 extracted_name_value[cp - name_suffix_begin] = '\0';
584         }
585
586         return extracted_name_value;
587 }
588
589 /*
590 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
591 ** directives.  Fills in the information of the CTinfo structure.
592 */
593 int
594 get_ctinfo(unsigned char *cp, CT ct, int magic)
595 {
596         int i;
597         unsigned char *dp;
598         char **ap, **ep;
599         char c;
600         CI ci;
601
602         ci = &ct->c_ctinfo;
603         i = strlen(invo_name) + 2;
604
605         /* store copy of Content-Type line */
606         cp = ct->c_ctline = getcpy(cp);
607
608         while (isspace(*cp))  /* trim leading spaces */
609                 cp++;
610
611         /* change newlines to spaces */
612         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
613                 *dp++ = ' ';
614
615         /* trim trailing spaces */
616         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
617                 if (!isspace(*dp))
618                         break;
619         *++dp = '\0';
620
621         if (debugsw)
622                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
623
624         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
625                 return NOTOK;
626
627         for (dp = cp; istoken(*dp); dp++)
628                 continue;
629         c = *dp, *dp = '\0';
630         ci->ci_type = getcpy(cp);  /* store content type */
631         *dp = c, cp = dp;
632
633         if (!*ci->ci_type) {
634                 advise(NULL, "invalid %s: field in message %s (empty type)",
635                                 TYPE_FIELD, ct->c_file);
636                 return NOTOK;
637         }
638
639         /* down case the content type string */
640         for (dp = ci->ci_type; *dp; dp++)
641                 if (isalpha(*dp) && isupper(*dp))
642                         *dp = tolower(*dp);
643
644         while (isspace(*cp))
645                 cp++;
646
647         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
648                 return NOTOK;
649
650         if (*cp != '/') {
651                 if (!magic)
652                         ci->ci_subtype = getcpy("");
653                 goto magic_skip;
654         }
655
656         cp++;
657         while (isspace(*cp))
658                 cp++;
659
660         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
661                 return NOTOK;
662
663         for (dp = cp; istoken(*dp); dp++)
664                 continue;
665         c = *dp, *dp = '\0';
666         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
667         *dp = c, cp = dp;
668
669         if (!*ci->ci_subtype) {
670                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
671                 return NOTOK;
672         }
673
674         /* down case the content subtype string */
675         for (dp = ci->ci_subtype; *dp; dp++)
676                 if (isalpha(*dp) && isupper(*dp))
677                         *dp = tolower(*dp);
678
679 magic_skip:
680         while (isspace(*cp))
681                 cp++;
682
683         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
684                 return NOTOK;
685
686         /*
687         ** Parse attribute/value pairs given with Content-Type
688         */
689         ep = (ap = ci->ci_attrs) + NPARMS;
690         while (*cp == ';') {
691                 char *vp;
692                 unsigned char *up;
693
694                 if (ap >= ep) {
695                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
696                         return NOTOK;
697                 }
698
699                 cp++;
700                 while (isspace(*cp))
701                         cp++;
702
703                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
704                         return NOTOK;
705
706                 if (*cp == 0) {
707                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
708                         return OK;
709                 }
710
711                 /* down case the attribute name */
712                 for (dp = cp; istoken(*dp); dp++)
713                         if (isalpha(*dp) && isupper(*dp))
714                                 *dp = tolower(*dp);
715
716                 for (up = dp; isspace(*dp);)
717                         dp++;
718                 if (dp == cp || *dp != '=') {
719                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
720                         return NOTOK;
721                 }
722
723                 vp = (*ap = getcpy(cp)) + (up - cp);
724                 *vp = '\0';
725                 for (dp++; isspace(*dp);)
726                         dp++;
727
728                 /* now add the attribute value */
729                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
730
731                 if (*dp == '"') {
732                         for (cp = ++dp, dp = vp;;) {
733                                 switch (c = *cp++) {
734                                 case '\0':
735 bad_quote:
736                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
737                                         return NOTOK;
738
739                                 case '\\':
740                                         *dp++ = c;
741                                         if ((c = *cp++) == '\0')
742                                                 goto bad_quote;
743                                         /* else fall... */
744
745                                 default:
746                                         *dp++ = c;
747                                         continue;
748
749                                 case '"':
750                                         *dp = '\0';
751                                         break;
752                                 }
753                                 break;
754                         }
755                 } else {
756                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
757                                 continue;
758                         *dp = '\0';
759                 }
760                 if (!*vp) {
761                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
762                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
763                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
764                         continue;
765                 }
766                 ap++;
767
768                 while (isspace(*cp))
769                         cp++;
770
771                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
772                         return NOTOK;
773         }
774
775         /*
776         ** Get any <Content-Id> given in buffer
777         */
778         if (magic && *cp == '<') {
779                 if (ct->c_id) {
780                         free(ct->c_id);
781                         ct->c_id = NULL;
782                 }
783                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784                         advise(NULL, "invalid ID in message %s", ct->c_file);
785                         return NOTOK;
786                 }
787                 c = *dp;
788                 *dp = '\0';
789                 if (*ct->c_id)
790                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
791                 else
792                         ct->c_id = NULL;
793                 *dp++ = c;
794                 cp = dp;
795
796                 while (isspace(*cp))
797                         cp++;
798         }
799
800         /*
801         ** Get any [Content-Description] given in buffer.
802         */
803         if (magic && *cp == '[') {
804                 ct->c_descr = ++cp;
805                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
806                         if (*dp == ']')
807                                 break;
808                 if (dp < cp) {
809                         advise(NULL, "invalid description in message %s",
810                                         ct->c_file);
811                         ct->c_descr = NULL;
812                         return NOTOK;
813                 }
814
815                 c = *dp;
816                 *dp = '\0';
817                 if (*ct->c_descr)
818                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
819                 else
820                         ct->c_descr = NULL;
821                 *dp++ = c;
822                 cp = dp;
823
824                 while (isspace(*cp))
825                         cp++;
826         }
827
828         /*
829         ** Get any {Content-Disposition} given in buffer.
830         */
831         if (magic && *cp == '{') {
832                 ct->c_dispo = ++cp;
833                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
834                         if (*dp == '}')
835                                 break;
836                 if (dp < cp) {
837                         advise(NULL, "invalid disposition in message %s",
838                                         ct->c_file);
839                         ct->c_dispo = NULL;
840                         return NOTOK;
841                 }
842
843                 c = *dp;
844                 *dp = '\0';
845                 if (*ct->c_dispo)
846                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
847                 else
848                         ct->c_dispo = NULL;
849                 *dp++ = c;
850                 cp = dp;
851
852                 while (isspace(*cp))
853                         cp++;
854         }
855
856         /*
857         ** Check if anything is left over
858         */
859         if (*cp) {
860                 if (magic) {
861                         ci->ci_magic = getcpy(cp);
862
863                         /*
864                         ** If there is a Content-Disposition header and
865                         ** it doesn't have a *filename=, extract it from
866                         ** the magic contents.  The mhbasename call skips
867                         ** any leading directory components.
868                         */
869                         if (ct->c_dispo)
870                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
871                         } else
872                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
873         }
874
875         return OK;
876 }
877
878
879 static int
880 get_comment(CT ct, unsigned char **ap, int istype)
881 {
882         int i;
883         char *bp;
884         unsigned char *cp;
885         char c, buffer[BUFSIZ], *dp;
886         CI ci;
887
888         ci = &ct->c_ctinfo;
889         cp = *ap;
890         bp = buffer;
891         cp++;
892
893         for (i = 0;;) {
894                 switch (c = *cp++) {
895                 case '\0':
896 invalid:
897                 advise(NULL, "invalid comment in message %s's %s: field",
898                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
899                 return NOTOK;
900
901                 case '\\':
902                         *bp++ = c;
903                         if ((c = *cp++) == '\0')
904                                 goto invalid;
905                         *bp++ = c;
906                         continue;
907
908                 case '(':
909                         i++;
910                         /* and fall... */
911                 default:
912                         *bp++ = c;
913                         continue;
914
915                 case ')':
916                         if (--i < 0)
917                                 break;
918                         *bp++ = c;
919                         continue;
920                 }
921                 break;
922         }
923         *bp = '\0';
924
925         if (istype) {
926                 if ((dp = ci->ci_comment)) {
927                         ci->ci_comment = concat(dp, " ", buffer, NULL);
928                         free(dp);
929                 } else {
930                         ci->ci_comment = getcpy(buffer);
931                 }
932         }
933
934         while (isspace(*cp))
935                 cp++;
936
937         *ap = cp;
938         return OK;
939 }
940
941
942 /*
943 ** CONTENTS
944 **
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
947 */
948
949 static int
950 InitGeneric(CT ct)
951 {
952         return OK;  /* not much to do here */
953 }
954
955
956 /*
957 ** TEXT
958 */
959
960 static int
961 InitText(CT ct)
962 {
963         char **ap, **ep;
964         struct k2v *kv;
965         struct text *t;
966         CI ci = &ct->c_ctinfo;
967
968         /* check for missing subtype */
969         if (!*ci->ci_subtype)
970                 ci->ci_subtype = add("plain", ci->ci_subtype);
971
972         /* match subtype */
973         for (kv = SubText; kv->kv_key; kv++)
974                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
975                         break;
976         ct->c_subtype = kv->kv_value;
977
978         /* allocate text character set structure */
979         t = (struct text *) mh_xcalloc(1, sizeof(*t));
980         ct->c_ctparams = (void *) t;
981
982         /* scan for charset parameter */
983         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
984                 if (!mh_strcasecmp(*ap, "charset"))
985                         break;
986
987         /* check if content specified a character set */
988         if (*ap) {
989                 /* store its name */
990                 ct->c_charset = getcpy(norm_charmap(*ep));
991                 /* match character set or set to CHARSET_UNKNOWN */
992                 for (kv = Charset; kv->kv_key; kv++) {
993                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
994                                 break;
995                         }
996                 }
997                 t->tx_charset = kv->kv_value;
998         } else {
999                 t->tx_charset = CHARSET_UNSPECIFIED;
1000         }
1001
1002         return OK;
1003 }
1004
1005
1006 /*
1007 ** MULTIPART
1008 */
1009
1010 static int
1011 InitMultiPart(CT ct)
1012 {
1013         int inout;
1014         long last, pos;
1015         unsigned char *cp, *dp;
1016         char **ap, **ep;
1017         char *bp, buffer[BUFSIZ];
1018         struct multipart *m;
1019         struct k2v *kv;
1020         struct part *part, **next;
1021         CI ci = &ct->c_ctinfo;
1022         CT p;
1023         FILE *fp;
1024
1025         /*
1026         ** The encoding for multipart messages must be either
1027         ** 7bit, 8bit, or binary (per RFC2045).
1028         */
1029         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1030                 && ct->c_encoding != CE_BINARY) {
1031                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1032                 ct->c_encoding = CE_7BIT;
1033         }
1034
1035         /* match subtype */
1036         for (kv = SubMultiPart; kv->kv_key; kv++)
1037                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1038                         break;
1039         ct->c_subtype = kv->kv_value;
1040
1041         /*
1042         ** Check for "boundary" parameter, which is
1043         ** required for multipart messages.
1044         */
1045         bp = 0;
1046         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1047                 if (!mh_strcasecmp(*ap, "boundary")) {
1048                         bp = *ep;
1049                         break;
1050                 }
1051         }
1052
1053         /* complain if boundary parameter is missing */
1054         if (!*ap) {
1055                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1056                 return NOTOK;
1057         }
1058
1059         /* allocate primary structure for multipart info */
1060         m = (struct multipart *) mh_xcalloc(1, sizeof(*m));
1061         ct->c_ctparams = (void *) m;
1062
1063         /* check if boundary parameter contains only whitespace characters */
1064         for (cp = bp; isspace(*cp); cp++)
1065                 continue;
1066         if (!*cp) {
1067                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1068                 return NOTOK;
1069         }
1070
1071         /* remove trailing whitespace from boundary parameter */
1072         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1073                 if (!isspace(*dp))
1074                         break;
1075         *++dp = '\0';
1076
1077         /* record boundary separators */
1078         m->mp_start = concat(bp, "\n", NULL);
1079         m->mp_stop = concat(bp, "--\n", NULL);
1080
1081         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1082                 advise(ct->c_file, "unable to open for reading");
1083                 return NOTOK;
1084         }
1085
1086         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1087         last = ct->c_end;
1088         next = &m->mp_parts;
1089         part = NULL;
1090         inout = 1;
1091
1092         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1093                 if (pos > last)
1094                         break;
1095
1096                 pos += strlen(buffer);
1097                 if (buffer[0] != '-' || buffer[1] != '-')
1098                         continue;
1099                 if (inout) {
1100                         if (strcmp(buffer + 2, m->mp_start)!=0)
1101                                 continue;
1102 next_part:
1103                         part = (struct part *) mh_xcalloc(1, sizeof(*part));
1104                         *next = part;
1105                         next = &part->mp_next;
1106
1107                         if (!(p = get_content(fp, ct->c_file,
1108                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1109                                 ct->c_fp = NULL;
1110                                 return NOTOK;
1111                         }
1112                         p->c_fp = NULL;
1113                         part->mp_part = p;
1114                         pos = p->c_begin;
1115                         fseek(fp, pos, SEEK_SET);
1116                         inout = 0;
1117                 } else {
1118                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1119                                 inout = 1;
1120 end_part:
1121                                 p = part->mp_part;
1122                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1123                                 if (p->c_end < p->c_begin)
1124                                         p->c_begin = p->c_end;
1125                                 if (inout)
1126                                         goto next_part;
1127                                 goto last_part;
1128                         } else {
1129                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1130                                         goto end_part;
1131                         }
1132                 }
1133         }
1134
1135         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1136         if (!inout && part) {
1137                 p = part->mp_part;
1138                 p->c_end = ct->c_end;
1139
1140                 if (p->c_begin >= p->c_end) {
1141                         for (next = &m->mp_parts; *next != part;
1142                                 next = &((*next)->mp_next))
1143                                 continue;
1144                         *next = NULL;
1145                         free_content(p);
1146                         free((char *) part);
1147                 }
1148         }
1149
1150 last_part:
1151         /* reverse the order of the parts for multipart/alternative */
1152         if (ct->c_subtype == MULTI_ALTERNATE)
1153                 reverse_parts(ct);
1154
1155         /*
1156         ** label all subparts with part number, and
1157         ** then initialize the content of the subpart.
1158         */
1159         {
1160                 int partnum;
1161                 char *pp;
1162                 char partnam[BUFSIZ];
1163
1164                 if (ct->c_partno) {
1165                         snprintf(partnam, sizeof(partnam), "%s.",
1166                                         ct->c_partno);
1167                         pp = partnam + strlen(partnam);
1168                 } else {
1169                         pp = partnam;
1170                 }
1171
1172                 for (part = m->mp_parts, partnum = 1; part;
1173                         part = part->mp_next, partnum++) {
1174                         p = part->mp_part;
1175
1176                         sprintf(pp, "%d", partnum);
1177                         p->c_partno = getcpy(partnam);
1178
1179                         /* initialize the content of the subparts */
1180                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1181                                 fclose(ct->c_fp);
1182                                 ct->c_fp = NULL;
1183                                 return NOTOK;
1184                         }
1185                 }
1186         }
1187
1188         fclose(ct->c_fp);
1189         ct->c_fp = NULL;
1190         return OK;
1191 }
1192
1193
1194 /*
1195 ** reverse the order of the parts of a multipart
1196 */
1197
1198 static void
1199 reverse_parts(CT ct)
1200 {
1201         int i;
1202         struct multipart *m;
1203         struct part **base, **bmp, **next, *part;
1204
1205         m = (struct multipart *) ct->c_ctparams;
1206
1207         /* if only one part, just return */
1208         if (!m->mp_parts || !m->mp_parts->mp_next)
1209                 return;
1210
1211         /* count number of parts */
1212         i = 0;
1213         for (part = m->mp_parts; part; part = part->mp_next)
1214                 i++;
1215
1216         /* allocate array of pointers to the parts */
1217         base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base));
1218         bmp = base;
1219
1220         /* point at all the parts */
1221         for (part = m->mp_parts; part; part = part->mp_next)
1222                 *bmp++ = part;
1223         *bmp = NULL;
1224
1225         /* reverse the order of the parts */
1226         next = &m->mp_parts;
1227         for (bmp--; bmp >= base; bmp--) {
1228                 part = *bmp;
1229                 *next = part;
1230                 next = &part->mp_next;
1231         }
1232         *next = NULL;
1233
1234         /* free array of pointers */
1235         free((char *) base);
1236 }
1237
1238
1239 /*
1240 ** MESSAGE
1241 */
1242
1243 static int
1244 InitMessage(CT ct)
1245 {
1246         struct k2v *kv;
1247         CI ci = &ct->c_ctinfo;
1248
1249         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1250                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1251                 return NOTOK;
1252         }
1253
1254         /* check for missing subtype */
1255         if (!*ci->ci_subtype)
1256                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1257
1258         /* match subtype */
1259         for (kv = SubMessage; kv->kv_key; kv++)
1260                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1261                         break;
1262         ct->c_subtype = kv->kv_value;
1263
1264         switch (ct->c_subtype) {
1265         case MESSAGE_RFC822:
1266                 break;
1267
1268         case MESSAGE_PARTIAL:
1269                 {
1270                 char **ap, **ep;
1271                 struct partial *p;
1272
1273                 p = (struct partial *) mh_xcalloc(1, sizeof(*p));
1274                 ct->c_ctparams = (void *) p;
1275
1276                 /*
1277                 ** scan for parameters "id", "number",
1278                 ** and "total"
1279                 */
1280                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1281                         if (!mh_strcasecmp(*ap, "id")) {
1282                                 p->pm_partid = getcpy(*ep);
1283                                 continue;
1284                         }
1285                         if (!mh_strcasecmp(*ap, "number")) {
1286                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1287 invalid_param:
1288                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1289                                         return NOTOK;
1290                                 }
1291                                 continue;
1292                         }
1293                         if (!mh_strcasecmp(*ap, "total")) {
1294                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1295                                                 p->pm_maxno < 1)
1296                                         goto invalid_param;
1297                                 continue;
1298                         }
1299                 }
1300
1301                 if (!p->pm_partid || !p->pm_partno
1302                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1303                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1304                         return NOTOK;
1305                 }
1306                 }
1307                 break;
1308
1309         case MESSAGE_EXTERNAL:
1310                 {
1311                 CT p;
1312                 FILE *fp;
1313
1314                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1315                         advise(ct->c_file, "unable to open for reading");
1316                         return NOTOK;
1317                 }
1318
1319                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1320
1321                 if (!(p = get_content(fp, ct->c_file, 0))) {
1322                         ct->c_fp = NULL;
1323                         return NOTOK;
1324                 }
1325
1326                 p->c_fp = NULL;
1327                 p->c_end = p->c_begin;
1328
1329                 fclose(ct->c_fp);
1330                 ct->c_fp = NULL;
1331
1332                 switch (p->c_type) {
1333                 case CT_MULTIPART:
1334                         break;
1335
1336                 case CT_MESSAGE:
1337                         if (p->c_subtype != MESSAGE_RFC822)
1338                                 break;
1339                         /* else fall... */
1340                 default:
1341                         if (p->c_ctinitfnx)
1342                                 (*p->c_ctinitfnx) (p);
1343                         break;
1344                 }
1345                 }
1346                 break;
1347
1348         default:
1349                 break;
1350         }
1351
1352         return OK;
1353 }
1354
1355
1356 /*
1357 ** APPLICATION
1358 */
1359
1360 static int
1361 InitApplication(CT ct)
1362 {
1363         struct k2v *kv;
1364         CI ci = &ct->c_ctinfo;
1365
1366         /* match subtype */
1367         for (kv = SubApplication; kv->kv_key; kv++)
1368                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1369                         break;
1370         ct->c_subtype = kv->kv_value;
1371
1372         return OK;
1373 }
1374
1375
1376 /*
1377 ** TRANSFER ENCODINGS
1378 */
1379
1380 static int
1381 init_encoding(CT ct, OpenCEFunc openfnx)
1382 {
1383         CE ce;
1384
1385         ce = (CE) mh_xcalloc(1, sizeof(*ce));
1386
1387         ct->c_cefile     = ce;
1388         ct->c_ceopenfnx  = openfnx;
1389         ct->c_ceclosefnx = close_encoding;
1390         ct->c_cesizefnx  = size_encoding;
1391
1392         return OK;
1393 }
1394
1395
1396 void
1397 close_encoding(CT ct)
1398 {
1399         CE ce;
1400
1401         if (!(ce = ct->c_cefile))
1402                 return;
1403
1404         if (ce->ce_fp) {
1405                 fclose(ce->ce_fp);
1406                 ce->ce_fp = NULL;
1407         }
1408 }
1409
1410
1411 static unsigned long
1412 size_encoding(CT ct)
1413 {
1414         int fd;
1415         unsigned long size;
1416         char *file;
1417         CE ce;
1418         struct stat st;
1419
1420         if (!(ce = ct->c_cefile))
1421                 return (ct->c_end - ct->c_begin);
1422
1423         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1424                 return (long) st.st_size;
1425
1426         if (ce->ce_file) {
1427                 if (stat(ce->ce_file, &st) != NOTOK)
1428                         return (long) st.st_size;
1429                 else
1430                         return 0L;
1431         }
1432
1433         if (ct->c_encoding == CE_EXTERNAL)
1434                 return (ct->c_end - ct->c_begin);
1435
1436         file = NULL;
1437         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1438                 return (ct->c_end - ct->c_begin);
1439
1440         if (fstat(fd, &st) != NOTOK)
1441                 size = (long) st.st_size;
1442         else
1443                 size = 0L;
1444
1445         (*ct->c_ceclosefnx) (ct);
1446         return size;
1447 }
1448
1449
1450 /*
1451 ** BASE64
1452 */
1453
1454 static unsigned char b642nib[0x80] = {
1455         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1456         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1457         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1459         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1460         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1461         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1462         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1464         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1465         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1466         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1468         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1469         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1470         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1471 };
1472
1473
1474 static int
1475 InitBase64(CT ct)
1476 {
1477         return init_encoding(ct, openBase64);
1478 }
1479
1480
1481 static int
1482 openBase64(CT ct, char **file)
1483 {
1484         int bitno, cc;
1485         int fd, len, skip, own_ct_fp = 0;
1486         unsigned long bits;
1487         unsigned char value, *b, *b1, *b2, *b3;
1488         unsigned char *cp, *ep;
1489         char buffer[BUFSIZ];
1490         /* sbeck -- handle suffixes */
1491         CI ci;
1492         CE ce;
1493
1494         b  = (unsigned char *) &bits;
1495         b1 = &b[endian > 0 ? 1 : 2];
1496         b2 = &b[endian > 0 ? 2 : 1];
1497         b3 = &b[endian > 0 ? 3 : 0];
1498
1499         ce = ct->c_cefile;
1500         if (ce->ce_fp) {
1501                 fseek(ce->ce_fp, 0L, SEEK_SET);
1502                 goto ready_to_go;
1503         }
1504
1505         if (ce->ce_file) {
1506                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1507                         content_error(ce->ce_file, ct,
1508                                         "unable to fopen for reading");
1509                         return NOTOK;
1510                 }
1511                 goto ready_to_go;
1512         }
1513
1514         if (*file == NULL) {
1515                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1516                 ce->ce_unlink = 1;
1517         } else {
1518                 ce->ce_file = getcpy(*file);
1519                 ce->ce_unlink = 0;
1520         }
1521
1522         /* sbeck@cise.ufl.edu -- handle suffixes */
1523         ci = &ct->c_ctinfo;
1524         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1525                         invo_name, ci->ci_type, ci->ci_subtype);
1526         cp = context_find(buffer);
1527         if (cp == NULL || *cp == '\0') {
1528                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1529                                 ci->ci_type);
1530                 cp = context_find(buffer);
1531         }
1532         if (cp != NULL && *cp != '\0') {
1533                 if (ce->ce_unlink) {
1534                         /*
1535                         ** Temporary file already exists, so we rename to
1536                         ** version with extension.
1537                         */
1538                         char *file_org = strdup(ce->ce_file);
1539                         ce->ce_file = add(cp, ce->ce_file);
1540                         if (rename(file_org, ce->ce_file)) {
1541                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1542                                                 file_org);
1543                         }
1544                         free(file_org);
1545
1546                 } else {
1547                         ce->ce_file = add(cp, ce->ce_file);
1548                 }
1549         }
1550
1551         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1552                 content_error(ce->ce_file, ct,
1553                                 "unable to fopen for reading/writing");
1554                 return NOTOK;
1555         }
1556
1557         if ((len = ct->c_end - ct->c_begin) < 0)
1558                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1559
1560         if (!ct->c_fp) {
1561                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1562                         content_error(ct->c_file, ct,
1563                                         "unable to open for reading");
1564                         return NOTOK;
1565                 }
1566                 own_ct_fp = 1;
1567         }
1568
1569         bitno = 18;
1570         bits = 0L;
1571         skip = 0;
1572
1573         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1574         while (len > 0) {
1575                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1576                 case NOTOK:
1577                         content_error(ct->c_file, ct, "error reading from");
1578                         goto clean_up;
1579
1580                 case OK:
1581                         content_error(NULL, ct, "premature eof");
1582                         goto clean_up;
1583
1584                 default:
1585                         if (cc > len)
1586                                 cc = len;
1587                         len -= cc;
1588
1589                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1590                                 switch (*cp) {
1591                                 default:
1592                                         if (isspace(*cp))
1593                                                 break;
1594                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1595                                                 if (debugsw) {
1596                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1597                                                 }
1598                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1599                                                 continue;
1600                                         }
1601
1602                                         bits |= value << bitno;
1603 test_end:
1604                                         if ((bitno -= 6) < 0) {
1605                                                 putc((char) *b1, ce->ce_fp);
1606                                                 if (skip < 2) {
1607                                                         putc((char) *b2, ce->ce_fp);
1608                                                         if (skip < 1) {
1609                                                                 putc((char) *b3, ce->ce_fp);
1610                                                         }
1611                                                 }
1612
1613                                                 if (ferror(ce->ce_fp)) {
1614                                                         content_error(ce->ce_file, ct,
1615                                                                                    "error writing to");
1616                                                         goto clean_up;
1617                                                 }
1618                                                 bitno = 18, bits = 0L, skip = 0;
1619                                         }
1620                                         break;
1621
1622                                 case '=':
1623                                         if (++skip > 3)
1624                                                 goto self_delimiting;
1625                                         goto test_end;
1626                                 }
1627                         }
1628                 }
1629         }
1630
1631         if (bitno != 18) {
1632                 if (debugsw)
1633                         fprintf(stderr, "premature ending (bitno %d)\n",
1634                                         bitno);
1635
1636                 content_error(NULL, ct, "invalid BASE64 encoding");
1637                 goto clean_up;
1638         }
1639
1640 self_delimiting:
1641         fseek(ct->c_fp, 0L, SEEK_SET);
1642
1643         if (fflush(ce->ce_fp)) {
1644                 content_error(ce->ce_file, ct, "error writing to");
1645                 goto clean_up;
1646         }
1647
1648         fseek(ce->ce_fp, 0L, SEEK_SET);
1649
1650 ready_to_go:
1651         *file = ce->ce_file;
1652         if (own_ct_fp) {
1653                 fclose(ct->c_fp);
1654                 ct->c_fp = NULL;
1655         }
1656         return fileno(ce->ce_fp);
1657
1658 clean_up:
1659         free_encoding(ct, 0);
1660         if (own_ct_fp) {
1661                 fclose(ct->c_fp);
1662                 ct->c_fp = NULL;
1663         }
1664         return NOTOK;
1665 }
1666
1667
1668 /*
1669 ** QUOTED PRINTABLE
1670 */
1671
1672 static char hex2nib[0x80] = {
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1680         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1689 };
1690
1691
1692 static int
1693 InitQuoted(CT ct)
1694 {
1695         return init_encoding(ct, openQuoted);
1696 }
1697
1698
1699 static int
1700 openQuoted(CT ct, char **file)
1701 {
1702         int cc, len, quoted, own_ct_fp = 0;
1703         unsigned char *cp, *ep;
1704         char buffer[BUFSIZ];
1705         unsigned char mask = 0;
1706         CE ce;
1707         /* sbeck -- handle suffixes */
1708         CI ci;
1709
1710         ce = ct->c_cefile;
1711         if (ce->ce_fp) {
1712                 fseek(ce->ce_fp, 0L, SEEK_SET);
1713                 goto ready_to_go;
1714         }
1715
1716         if (ce->ce_file) {
1717                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1718                         content_error(ce->ce_file, ct,
1719                                         "unable to fopen for reading");
1720                         return NOTOK;
1721                 }
1722                 goto ready_to_go;
1723         }
1724
1725         if (*file == NULL) {
1726                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1727                 ce->ce_unlink = 1;
1728         } else {
1729                 ce->ce_file = getcpy(*file);
1730                 ce->ce_unlink = 0;
1731         }
1732
1733         /* sbeck@cise.ufl.edu -- handle suffixes */
1734         ci = &ct->c_ctinfo;
1735         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1736                         invo_name, ci->ci_type, ci->ci_subtype);
1737         cp = context_find(buffer);
1738         if (cp == NULL || *cp == '\0') {
1739                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1740                                 ci->ci_type);
1741                 cp = context_find(buffer);
1742         }
1743         if (cp != NULL && *cp != '\0') {
1744                 if (ce->ce_unlink) {
1745                         /*
1746                         ** Temporary file already exists, so we rename to
1747                         ** version with extension.
1748                         */
1749                         char *file_org = strdup(ce->ce_file);
1750                         ce->ce_file = add(cp, ce->ce_file);
1751                         if (rename(file_org, ce->ce_file)) {
1752                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1753                                                 file_org);
1754                         }
1755                         free(file_org);
1756
1757                 } else {
1758                         ce->ce_file = add(cp, ce->ce_file);
1759                 }
1760         }
1761
1762         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1763                 content_error(ce->ce_file, ct,
1764                                 "unable to fopen for reading/writing");
1765                 return NOTOK;
1766         }
1767
1768         if ((len = ct->c_end - ct->c_begin) < 0)
1769                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1770
1771         if (!ct->c_fp) {
1772                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1773                         content_error(ct->c_file, ct,
1774                                         "unable to open for reading");
1775                         return NOTOK;
1776                 }
1777                 own_ct_fp = 1;
1778         }
1779
1780         quoted = 0;
1781
1782         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1783         while (len > 0) {
1784                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1785                         content_error(NULL, ct, "premature eof");
1786                         goto clean_up;
1787                 }
1788
1789                 if ((cc = strlen(buffer)) > len)
1790                         cc = len;
1791                 len -= cc;
1792
1793                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1794                         if (!isspace(*ep))
1795                                 break;
1796                 *++ep = '\n', ep++;
1797
1798                 for (; cp < ep; cp++) {
1799                         if (quoted > 0) {
1800                                 /* in an escape sequence */
1801                                 if (quoted == 1) {
1802                                         /* at byte 1 of an escape sequence */
1803                                         mask = hex2nib[*cp & 0x7f];
1804                                         /* next is byte 2 */
1805                                         quoted = 2;
1806                                 } else {
1807                                         /* at byte 2 of an escape sequence */
1808                                         mask <<= 4;
1809                                         mask |= hex2nib[*cp & 0x7f];
1810                                         putc(mask, ce->ce_fp);
1811                                         if (ferror(ce->ce_fp)) {
1812                                                 content_error(ce->ce_file, ct, "error writing to");
1813                                                 goto clean_up;
1814                                         }
1815                                         /*
1816                                         ** finished escape sequence; next may
1817                                         ** be literal or a new escape sequence
1818                                         */
1819                                         quoted = 0;
1820                                 }
1821                                 /* on to next byte */
1822                                 continue;
1823                         }
1824
1825                         /* not in an escape sequence */
1826                         if (*cp == '=') {
1827                                 /*
1828                                 ** starting an escape sequence,
1829                                 ** or invalid '='?
1830                                 */
1831                                 if (cp + 1 < ep && cp[1] == '\n') {
1832                                         /* "=\n" soft line break, eat the \n */
1833                                         cp++;
1834                                         continue;
1835                                 }
1836                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1837                                         /*
1838                                         ** We don't have 2 bytes left,
1839                                         ** so this is an invalid escape
1840                                         ** sequence; just show the raw bytes
1841                                         ** (below).
1842                                         */
1843                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1844                                         /*
1845                                         ** Next 2 bytes are hex digits,
1846                                         ** making this a valid escape
1847                                         ** sequence; let's decode it (above).
1848                                         */
1849                                         quoted = 1;
1850                                         continue;
1851                                 } else {
1852                                         /*
1853                                         ** One or both of the next 2 is
1854                                         ** out of range, making this an
1855                                         ** invalid escape sequence; just
1856                                         ** show the raw bytes (below).
1857                                         */
1858                                 }
1859                         }
1860
1861                         /* Just show the raw byte. */
1862                         putc(*cp, ce->ce_fp);
1863                         if (ferror(ce->ce_fp)) {
1864                                 content_error(ce->ce_file, ct,
1865                                                 "error writing to");
1866                                 goto clean_up;
1867                         }
1868                 }
1869         }
1870         if (quoted) {
1871                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1872                 goto clean_up;
1873         }
1874
1875         fseek(ct->c_fp, 0L, SEEK_SET);
1876
1877         if (fflush(ce->ce_fp)) {
1878                 content_error(ce->ce_file, ct, "error writing to");
1879                 goto clean_up;
1880         }
1881
1882         fseek(ce->ce_fp, 0L, SEEK_SET);
1883
1884 ready_to_go:
1885         *file = ce->ce_file;
1886         if (own_ct_fp) {
1887                 fclose(ct->c_fp);
1888                 ct->c_fp = NULL;
1889         }
1890         return fileno(ce->ce_fp);
1891
1892 clean_up:
1893         free_encoding(ct, 0);
1894         if (own_ct_fp) {
1895                 fclose(ct->c_fp);
1896                 ct->c_fp = NULL;
1897         }
1898         return NOTOK;
1899 }
1900
1901
1902 /*
1903 ** 7BIT
1904 */
1905
1906 static int
1907 Init7Bit(CT ct)
1908 {
1909         if (init_encoding(ct, open7Bit) == NOTOK)
1910                 return NOTOK;
1911
1912         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1913         return OK;
1914 }
1915
1916
1917 int
1918 open7Bit(CT ct, char **file)
1919 {
1920         int cc, fd, len, own_ct_fp = 0;
1921         char buffer[BUFSIZ];
1922         /* sbeck -- handle suffixes */
1923         char *cp;
1924         CI ci;
1925         CE ce;
1926
1927         ce = ct->c_cefile;
1928         if (ce->ce_fp) {
1929                 fseek(ce->ce_fp, 0L, SEEK_SET);
1930                 goto ready_to_go;
1931         }
1932
1933         if (ce->ce_file) {
1934                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1935                         content_error(ce->ce_file, ct,
1936                                         "unable to fopen for reading");
1937                         return NOTOK;
1938                 }
1939                 goto ready_to_go;
1940         }
1941
1942         if (*file == NULL) {
1943                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1944                 ce->ce_unlink = 1;
1945         } else {
1946                 ce->ce_file = getcpy(*file);
1947                 ce->ce_unlink = 0;
1948         }
1949
1950         /* sbeck@cise.ufl.edu -- handle suffixes */
1951         ci = &ct->c_ctinfo;
1952         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1953                         invo_name, ci->ci_type, ci->ci_subtype);
1954         cp = context_find(buffer);
1955         if (cp == NULL || *cp == '\0') {
1956                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1957                                 ci->ci_type);
1958                 cp = context_find(buffer);
1959         }
1960         if (cp != NULL && *cp != '\0') {
1961                 if (ce->ce_unlink) {
1962                         /*
1963                         ** Temporary file already exists, so we rename to
1964                         ** version with extension.
1965                         */
1966                         char *file_org = strdup(ce->ce_file);
1967                         ce->ce_file = add(cp, ce->ce_file);
1968                         if (rename(file_org, ce->ce_file)) {
1969                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1970                                                 file_org);
1971                         }
1972                         free(file_org);
1973
1974                 } else {
1975                         ce->ce_file = add(cp, ce->ce_file);
1976                 }
1977         }
1978
1979         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1980                 content_error(ce->ce_file, ct,
1981                                 "unable to fopen for reading/writing");
1982                 return NOTOK;
1983         }
1984
1985         if (ct->c_type == CT_MULTIPART) {
1986                 char **ap, **ep;
1987                 CI ci = &ct->c_ctinfo;
1988
1989                 len = 0;
1990                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1991                                 ci->ci_subtype);
1992                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1993                                 strlen(ci->ci_subtype);
1994                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1995                         putc(';', ce->ce_fp);
1996                         len++;
1997
1998                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1999                                         *ap, *ep);
2000
2001                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2002                                 fputs("\n\t", ce->ce_fp);
2003                                 len = 8;
2004                         } else {
2005                                 putc(' ', ce->ce_fp);
2006                                 len++;
2007                         }
2008                         fprintf(ce->ce_fp, "%s", buffer);
2009                         len += cc;
2010                 }
2011
2012                 if (ci->ci_comment) {
2013                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2014                                                 >= CPERLIN) {
2015                                 fputs("\n\t", ce->ce_fp);
2016                                 len = 8;
2017                         } else {
2018                                 putc(' ', ce->ce_fp);
2019                                 len++;
2020                         }
2021                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2022                         len += cc;
2023                 }
2024                 fprintf(ce->ce_fp, "\n");
2025                 if (ct->c_id)
2026                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2027                 if (ct->c_descr)
2028                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2029                 if (ct->c_dispo)
2030                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2031                 fprintf(ce->ce_fp, "\n");
2032         }
2033
2034         if ((len = ct->c_end - ct->c_begin) < 0)
2035                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2036
2037         if (!ct->c_fp) {
2038                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2039                         content_error(ct->c_file, ct,
2040                                         "unable to open for reading");
2041                         return NOTOK;
2042                 }
2043                 own_ct_fp = 1;
2044         }
2045
2046         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2047         while (len > 0)
2048                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2049                 case NOTOK:
2050                         content_error(ct->c_file, ct, "error reading from");
2051                         goto clean_up;
2052
2053                 case OK:
2054                         content_error(NULL, ct, "premature eof");
2055                         goto clean_up;
2056
2057                 default:
2058                         if (cc > len)
2059                                 cc = len;
2060                         len -= cc;
2061
2062                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2063                         if (ferror(ce->ce_fp)) {
2064                                 content_error(ce->ce_file, ct,
2065                                                 "error writing to");
2066                                 goto clean_up;
2067                         }
2068                 }
2069
2070         fseek(ct->c_fp, 0L, SEEK_SET);
2071
2072         if (fflush(ce->ce_fp)) {
2073                 content_error(ce->ce_file, ct, "error writing to");
2074                 goto clean_up;
2075         }
2076
2077         fseek(ce->ce_fp, 0L, SEEK_SET);
2078
2079 ready_to_go:
2080         *file = ce->ce_file;
2081         if (own_ct_fp) {
2082                 fclose(ct->c_fp);
2083                 ct->c_fp = NULL;
2084         }
2085         return fileno(ce->ce_fp);
2086
2087 clean_up:
2088         free_encoding(ct, 0);
2089         if (own_ct_fp) {
2090                 fclose(ct->c_fp);
2091                 ct->c_fp = NULL;
2092         }
2093         return NOTOK;
2094 }