fad75d7f628c87c7e767a3d10232692709572211
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         int compnum, state;
235         char buf[BUFSIZ], name[NAMESZ];
236         char *np, *vp;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         if (!(ct = (CT) calloc(1, sizeof(*ct))))
242                 adios(EX_OSERR, NULL, "out of memory");
243
244         ct->c_fp = in;
245         ct->c_file = getcpy(file);
246         ct->c_begin = ftell(ct->c_fp) + 1;
247
248         /*
249         ** Parse the header fields for this
250         ** content into a linked list.
251         */
252         for (compnum = 1, state = FLD;;) {
253                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
254                 case FLD:
255                 case FLDPLUS:
256                 case FLDEOF:
257                         compnum++;
258
259                         /* get copies of the buffers */
260                         np = getcpy(name);
261                         vp = getcpy(buf);
262
263                         /* if necessary, get rest of field */
264                         while (state == FLDPLUS) {
265                                 state = m_getfld(state, name, buf,
266                                                 sizeof(buf), in);
267                                 vp = add(buf, vp);  /* add to previous value */
268                         }
269
270                         /* Now add the header data to the list */
271                         add_header(ct, np, vp);
272
273                         /* continue, if this isn't the last header field */
274                         if (state != FLDEOF) {
275                                 ct->c_begin = ftell(in) + 1;
276                                 continue;
277                         }
278                         /* else fall... */
279
280                 case BODY:
281                 case BODYEOF:
282                         ct->c_begin = ftell(in) - strlen(buf);
283                         break;
284
285                 case FILEEOF:
286                         ct->c_begin = ftell(in);
287                         break;
288
289                 case LENERR:
290                 case FMTERR:
291                         adios(EX_DATAERR, NULL, "message format error in component #%d",
292                                         compnum);
293
294                 default:
295                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
296                 }
297
298                 /* break out of the loop */
299                 break;
300         }
301
302         /*
303         ** Read the content headers.  We will parse the
304         ** MIME related header fields into their various
305         ** structures and set internal flags related to
306         ** content type/subtype, etc.
307         */
308
309         hp = ct->c_first_hf;  /* start at first header field */
310         while (hp) {
311                 /* Get MIME-Version field */
312                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
313                         int ucmp;
314                         char c;
315                         unsigned char *cp, *dp;
316
317                         if (ct->c_vrsn) {
318                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
319                                 goto next_header;
320                         }
321                         ct->c_vrsn = getcpy(hp->value);
322
323                         /* Now, cleanup this field */
324                         cp = ct->c_vrsn;
325
326                         while (isspace(*cp))
327                                 cp++;
328                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
329                                 *dp++ = ' ';
330                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
331                                 if (!isspace(*dp))
332                                         break;
333                         *++dp = '\0';
334                         if (debugsw)
335                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
336
337                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
338                                 goto out;
339
340                         for (dp = cp; istoken(*dp); dp++)
341                                 continue;
342                         c = *dp;
343                         *dp = '\0';
344                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
345                         *dp = c;
346                         if (!ucmp) {
347                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
348                         }
349
350                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
351                         /* Get Content-Type field */
352                         struct str2init *s2i;
353                         CI ci = &ct->c_ctinfo;
354
355                         /* Check if we've already seen a Content-Type header */
356                         if (ct->c_ctline) {
357                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
358                                 goto next_header;
359                         }
360
361                         /* Parse the Content-Type field */
362                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
363                                 goto out;
364
365                         /*
366                         ** Set the Init function and the internal
367                         ** flag for this content type.
368                         */
369                         for (s2i = str2cts; s2i->si_key; s2i++)
370                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
371                                         break;
372                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
373                                 s2i++;
374                         ct->c_type = s2i->si_val;
375                         ct->c_ctinitfnx = s2i->si_init;
376
377                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
378                         /* Get Content-Transfer-Encoding field */
379                         char c;
380                         unsigned char *cp, *dp;
381                         struct str2init *s2i;
382
383                         /*
384                         ** Check if we've already seen the
385                         ** Content-Transfer-Encoding field
386                         */
387                         if (ct->c_celine) {
388                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
389                                 goto next_header;
390                         }
391
392                         /* get copy of this field */
393                         ct->c_celine = cp = getcpy(hp->value);
394
395                         while (isspace(*cp))
396                                 cp++;
397                         for (dp = cp; istoken(*dp); dp++)
398                                 continue;
399                         c = *dp;
400                         *dp = '\0';
401
402                         /*
403                         ** Find the internal flag and Init function
404                         ** for this transfer encoding.
405                         */
406                         for (s2i = str2ces; s2i->si_key; s2i++)
407                                 if (!mh_strcasecmp(cp, s2i->si_key))
408                                         break;
409                         if (!s2i->si_key && !uprf(cp, "X-"))
410                                 s2i++;
411                         *dp = c;
412                         ct->c_encoding = s2i->si_val;
413
414                         /* Call the Init function for this encoding */
415                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
416                                 goto out;
417
418                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
419                         /* Get Content-ID field */
420                         ct->c_id = add(hp->value, ct->c_id);
421
422                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
423                         /* Get Content-Description field */
424                         ct->c_descr = add(hp->value, ct->c_descr);
425
426                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
427                         /* Get Content-Disposition field */
428                         ct->c_dispo = add(hp->value, ct->c_dispo);
429                 }
430
431 next_header:
432                 hp = hp->next;  /* next header field */
433         }
434
435         /*
436         ** Check if we saw a Content-Type field.
437         ** If not, then assign a default value for
438         ** it, and the Init function.
439         */
440         if (!ct->c_ctline) {
441                 /*
442                 ** If we are inside a multipart/digest message,
443                 ** so default type is message/rfc822
444                 */
445                 if (toplevel < 0) {
446                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
447                                 goto out;
448                         ct->c_type = CT_MESSAGE;
449                         ct->c_ctinitfnx = InitMessage;
450                 } else {
451                         /*
452                         ** Else default type is text/plain
453                         */
454                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
455                                 goto out;
456                         ct->c_type = CT_TEXT;
457                         ct->c_ctinitfnx = InitText;
458                 }
459         }
460
461         /* Use default Transfer-Encoding, if necessary */
462         if (!ct->c_celine) {
463                 ct->c_encoding = CE_7BIT;
464                 Init7Bit(ct);
465         }
466
467         return ct;
468
469 out:
470         free_content(ct);
471         return NULL;
472 }
473
474
475 /*
476 ** small routine to add header field to list
477 */
478
479 int
480 add_header(CT ct, char *name, char *value)
481 {
482         HF hp;
483
484         /* allocate header field structure */
485         hp = mh_xmalloc(sizeof(*hp));
486
487         /* link data into header structure */
488         hp->name = name;
489         hp->value = value;
490         hp->next = NULL;
491
492         /* link header structure into the list */
493         if (ct->c_first_hf == NULL) {
494                 ct->c_first_hf = hp;  /* this is the first */
495                 ct->c_last_hf = hp;
496         } else {
497                 ct->c_last_hf->next = hp;  /* add it to the end */
498                 ct->c_last_hf = hp;
499         }
500
501         return 0;
502 }
503
504
505 /*
506 ** Make sure that buf contains at least one appearance of name,
507 ** followed by =.  If not, insert both name and value, just after
508 ** first semicolon, if any.  Note that name should not contain a
509 ** trailing =.  And quotes will be added around the value.  Typical
510 ** usage:  make sure that a Content-Disposition header contains
511 ** filename="foo".  If it doesn't and value does, use value from
512 ** that.
513 */
514 static char *
515 incl_name_value(unsigned char *buf, char *name, char *value) {
516         char *newbuf = buf;
517
518         /* Assume that name is non-null. */
519         if (buf && value) {
520                 char *name_plus_equal = concat(name, "=", NULL);
521
522                 if (!strstr(buf, name_plus_equal)) {
523                         char *insertion;
524                         unsigned char *cp;
525                         char *prefix, *suffix;
526
527                         /* Trim trailing space, esp. newline. */
528                         for (cp = &buf[strlen(buf) - 1];
529                                          cp >= buf && isspace(*cp); --cp) {
530                                 *cp = '\0';
531                         }
532
533                         insertion = concat("; ", name, "=", "\"", value, "\"",
534                                         NULL);
535
536                         /*
537                         ** Insert at first semicolon, if any.
538                         ** If none, append to end.
539                         */
540                         prefix = getcpy(buf);
541                         if ((cp = strchr(prefix, ';'))) {
542                                 suffix = concat(cp, NULL);
543                                 *cp = '\0';
544                                 newbuf = concat(prefix, insertion, suffix,
545                                                 "\n", NULL);
546                                 free(suffix);
547                         } else {
548                                 /* Append to end. */
549                                 newbuf = concat(buf, insertion, "\n", NULL);
550                         }
551
552                         free(prefix);
553                         free(insertion);
554                         free(buf);
555                 }
556
557                 free(name_plus_equal);
558         }
559
560         return newbuf;
561 }
562
563 /*
564 ** Extract just name_suffix="foo", if any, from value.  If there isn't
565 ** one, return the entire value.  Note that, for example, a name_suffix
566 ** of name will match filename="foo", and return foo.
567 */
568 static char *
569 extract_name_value(char *name_suffix, char *value) {
570         char *extracted_name_value = value;
571         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
572         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
573         char *cp;
574
575         free(name_suffix_plus_quote);
576         if (name_suffix_equals) {
577                 char *name_suffix_begin;
578
579                 /* Find first \". */
580                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
581                         ;
582                 name_suffix_begin = ++cp;
583                 /* Find second \". */
584                 for (; *cp != '"'; ++cp)
585                         ;
586
587                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
588                 memcpy(extracted_name_value, name_suffix_begin,
589                                 cp - name_suffix_begin);
590                 extracted_name_value[cp - name_suffix_begin] = '\0';
591         }
592
593         return extracted_name_value;
594 }
595
596 /*
597 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
598 ** directives.  Fills in the information of the CTinfo structure.
599 */
600 int
601 get_ctinfo(unsigned char *cp, CT ct, int magic)
602 {
603         int i;
604         unsigned char *dp;
605         char **ap, **ep;
606         char c;
607         CI ci;
608
609         ci = &ct->c_ctinfo;
610         i = strlen(invo_name) + 2;
611
612         /* store copy of Content-Type line */
613         cp = ct->c_ctline = getcpy(cp);
614
615         while (isspace(*cp))  /* trim leading spaces */
616                 cp++;
617
618         /* change newlines to spaces */
619         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
620                 *dp++ = ' ';
621
622         /* trim trailing spaces */
623         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
624                 if (!isspace(*dp))
625                         break;
626         *++dp = '\0';
627
628         if (debugsw)
629                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
630
631         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
632                 return NOTOK;
633
634         for (dp = cp; istoken(*dp); dp++)
635                 continue;
636         c = *dp, *dp = '\0';
637         ci->ci_type = getcpy(cp);  /* store content type */
638         *dp = c, cp = dp;
639
640         if (!*ci->ci_type) {
641                 advise(NULL, "invalid %s: field in message %s (empty type)",
642                                 TYPE_FIELD, ct->c_file);
643                 return NOTOK;
644         }
645
646         /* down case the content type string */
647         for (dp = ci->ci_type; *dp; dp++)
648                 if (isalpha(*dp) && isupper(*dp))
649                         *dp = tolower(*dp);
650
651         while (isspace(*cp))
652                 cp++;
653
654         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
655                 return NOTOK;
656
657         if (*cp != '/') {
658                 if (!magic)
659                         ci->ci_subtype = getcpy("");
660                 goto magic_skip;
661         }
662
663         cp++;
664         while (isspace(*cp))
665                 cp++;
666
667         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
668                 return NOTOK;
669
670         for (dp = cp; istoken(*dp); dp++)
671                 continue;
672         c = *dp, *dp = '\0';
673         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
674         *dp = c, cp = dp;
675
676         if (!*ci->ci_subtype) {
677                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
678                 return NOTOK;
679         }
680
681         /* down case the content subtype string */
682         for (dp = ci->ci_subtype; *dp; dp++)
683                 if (isalpha(*dp) && isupper(*dp))
684                         *dp = tolower(*dp);
685
686 magic_skip:
687         while (isspace(*cp))
688                 cp++;
689
690         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
691                 return NOTOK;
692
693         /*
694         ** Parse attribute/value pairs given with Content-Type
695         */
696         ep = (ap = ci->ci_attrs) + NPARMS;
697         while (*cp == ';') {
698                 char *vp;
699                 unsigned char *up;
700
701                 if (ap >= ep) {
702                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
703                         return NOTOK;
704                 }
705
706                 cp++;
707                 while (isspace(*cp))
708                         cp++;
709
710                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
711                         return NOTOK;
712
713                 if (*cp == 0) {
714                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
715                         return OK;
716                 }
717
718                 /* down case the attribute name */
719                 for (dp = cp; istoken(*dp); dp++)
720                         if (isalpha(*dp) && isupper(*dp))
721                                 *dp = tolower(*dp);
722
723                 for (up = dp; isspace(*dp);)
724                         dp++;
725                 if (dp == cp || *dp != '=') {
726                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
727                         return NOTOK;
728                 }
729
730                 vp = (*ap = getcpy(cp)) + (up - cp);
731                 *vp = '\0';
732                 for (dp++; isspace(*dp);)
733                         dp++;
734
735                 /* now add the attribute value */
736                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
737
738                 if (*dp == '"') {
739                         for (cp = ++dp, dp = vp;;) {
740                                 switch (c = *cp++) {
741                                 case '\0':
742 bad_quote:
743                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
744                                         return NOTOK;
745
746                                 case '\\':
747                                         *dp++ = c;
748                                         if ((c = *cp++) == '\0')
749                                                 goto bad_quote;
750                                         /* else fall... */
751
752                                 default:
753                                         *dp++ = c;
754                                         continue;
755
756                                 case '"':
757                                         *dp = '\0';
758                                         break;
759                                 }
760                                 break;
761                         }
762                 } else {
763                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
764                                 continue;
765                         *dp = '\0';
766                 }
767                 if (!*vp) {
768                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
769                         return NOTOK;
770                 }
771                 ap++;
772
773                 while (isspace(*cp))
774                         cp++;
775
776                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
777                         return NOTOK;
778         }
779
780         /*
781         ** Get any <Content-Id> given in buffer
782         */
783         if (magic && *cp == '<') {
784                 if (ct->c_id) {
785                         free(ct->c_id);
786                         ct->c_id = NULL;
787                 }
788                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
789                         advise(NULL, "invalid ID in message %s", ct->c_file);
790                         return NOTOK;
791                 }
792                 c = *dp;
793                 *dp = '\0';
794                 if (*ct->c_id)
795                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
796                 else
797                         ct->c_id = NULL;
798                 *dp++ = c;
799                 cp = dp;
800
801                 while (isspace(*cp))
802                         cp++;
803         }
804
805         /*
806         ** Get any [Content-Description] given in buffer.
807         */
808         if (magic && *cp == '[') {
809                 ct->c_descr = ++cp;
810                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
811                         if (*dp == ']')
812                                 break;
813                 if (dp < cp) {
814                         advise(NULL, "invalid description in message %s",
815                                         ct->c_file);
816                         ct->c_descr = NULL;
817                         return NOTOK;
818                 }
819
820                 c = *dp;
821                 *dp = '\0';
822                 if (*ct->c_descr)
823                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
824                 else
825                         ct->c_descr = NULL;
826                 *dp++ = c;
827                 cp = dp;
828
829                 while (isspace(*cp))
830                         cp++;
831         }
832
833         /*
834         ** Get any {Content-Disposition} given in buffer.
835         */
836         if (magic && *cp == '{') {
837                 ct->c_dispo = ++cp;
838                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
839                         if (*dp == '}')
840                                 break;
841                 if (dp < cp) {
842                         advise(NULL, "invalid disposition in message %s",
843                                         ct->c_file);
844                         ct->c_dispo = NULL;
845                         return NOTOK;
846                 }
847
848                 c = *dp;
849                 *dp = '\0';
850                 if (*ct->c_dispo)
851                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
852                 else
853                         ct->c_dispo = NULL;
854                 *dp++ = c;
855                 cp = dp;
856
857                 while (isspace(*cp))
858                         cp++;
859         }
860
861         /*
862         ** Check if anything is left over
863         */
864         if (*cp) {
865                 if (magic) {
866                         ci->ci_magic = getcpy(cp);
867
868                         /*
869                         ** If there is a Content-Disposition header and
870                         ** it doesn't have a *filename=, extract it from
871                         ** the magic contents.  The mhbasename call skips
872                         ** any leading directory components.
873                         */
874                         if (ct->c_dispo)
875                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
876                         } else
877                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
878         }
879
880         return OK;
881 }
882
883
884 static int
885 get_comment(CT ct, unsigned char **ap, int istype)
886 {
887         int i;
888         char *bp;
889         unsigned char *cp;
890         char c, buffer[BUFSIZ], *dp;
891         CI ci;
892
893         ci = &ct->c_ctinfo;
894         cp = *ap;
895         bp = buffer;
896         cp++;
897
898         for (i = 0;;) {
899                 switch (c = *cp++) {
900                 case '\0':
901 invalid:
902                 advise(NULL, "invalid comment in message %s's %s: field",
903                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
904                 return NOTOK;
905
906                 case '\\':
907                         *bp++ = c;
908                         if ((c = *cp++) == '\0')
909                                 goto invalid;
910                         *bp++ = c;
911                         continue;
912
913                 case '(':
914                         i++;
915                         /* and fall... */
916                 default:
917                         *bp++ = c;
918                         continue;
919
920                 case ')':
921                         if (--i < 0)
922                                 break;
923                         *bp++ = c;
924                         continue;
925                 }
926                 break;
927         }
928         *bp = '\0';
929
930         if (istype) {
931                 if ((dp = ci->ci_comment)) {
932                         ci->ci_comment = concat(dp, " ", buffer, NULL);
933                         free(dp);
934                 } else {
935                         ci->ci_comment = getcpy(buffer);
936                 }
937         }
938
939         while (isspace(*cp))
940                 cp++;
941
942         *ap = cp;
943         return OK;
944 }
945
946
947 /*
948 ** CONTENTS
949 **
950 ** Handles content types audio, image, and video.
951 ** There's not much to do right here.
952 */
953
954 static int
955 InitGeneric(CT ct)
956 {
957         return OK;  /* not much to do here */
958 }
959
960
961 /*
962 ** TEXT
963 */
964
965 static int
966 InitText(CT ct)
967 {
968         char **ap, **ep;
969         struct k2v *kv;
970         struct text *t;
971         CI ci = &ct->c_ctinfo;
972
973         /* check for missing subtype */
974         if (!*ci->ci_subtype)
975                 ci->ci_subtype = add("plain", ci->ci_subtype);
976
977         /* match subtype */
978         for (kv = SubText; kv->kv_key; kv++)
979                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
980                         break;
981         ct->c_subtype = kv->kv_value;
982
983         /* allocate text character set structure */
984         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
985                 adios(EX_OSERR, NULL, "out of memory");
986         ct->c_ctparams = (void *) t;
987
988         /* scan for charset parameter */
989         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
990                 if (!mh_strcasecmp(*ap, "charset"))
991                         break;
992
993         /* check if content specified a character set */
994         if (*ap) {
995                 /* store its name */
996                 ct->c_charset = getcpy(norm_charmap(*ep));
997                 /* match character set or set to CHARSET_UNKNOWN */
998                 for (kv = Charset; kv->kv_key; kv++) {
999                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
1000                                 break;
1001                         }
1002                 }
1003                 t->tx_charset = kv->kv_value;
1004         } else {
1005                 t->tx_charset = CHARSET_UNSPECIFIED;
1006         }
1007
1008         return OK;
1009 }
1010
1011
1012 /*
1013 ** MULTIPART
1014 */
1015
1016 static int
1017 InitMultiPart(CT ct)
1018 {
1019         int inout;
1020         long last, pos;
1021         unsigned char *cp, *dp;
1022         char **ap, **ep;
1023         char *bp, buffer[BUFSIZ];
1024         struct multipart *m;
1025         struct k2v *kv;
1026         struct part *part, **next;
1027         CI ci = &ct->c_ctinfo;
1028         CT p;
1029         FILE *fp;
1030
1031         /*
1032         ** The encoding for multipart messages must be either
1033         ** 7bit, 8bit, or binary (per RFC2045).
1034         */
1035         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1036                 && ct->c_encoding != CE_BINARY) {
1037                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1038                 return NOTOK;
1039         }
1040
1041         /* match subtype */
1042         for (kv = SubMultiPart; kv->kv_key; kv++)
1043                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1044                         break;
1045         ct->c_subtype = kv->kv_value;
1046
1047         /*
1048         ** Check for "boundary" parameter, which is
1049         ** required for multipart messages.
1050         */
1051         bp = 0;
1052         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1053                 if (!mh_strcasecmp(*ap, "boundary")) {
1054                         bp = *ep;
1055                         break;
1056                 }
1057         }
1058
1059         /* complain if boundary parameter is missing */
1060         if (!*ap) {
1061                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1062                 return NOTOK;
1063         }
1064
1065         /* allocate primary structure for multipart info */
1066         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1067                 adios(EX_OSERR, NULL, "out of memory");
1068         ct->c_ctparams = (void *) m;
1069
1070         /* check if boundary parameter contains only whitespace characters */
1071         for (cp = bp; isspace(*cp); cp++)
1072                 continue;
1073         if (!*cp) {
1074                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1075                 return NOTOK;
1076         }
1077
1078         /* remove trailing whitespace from boundary parameter */
1079         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1080                 if (!isspace(*dp))
1081                         break;
1082         *++dp = '\0';
1083
1084         /* record boundary separators */
1085         m->mp_start = concat(bp, "\n", NULL);
1086         m->mp_stop = concat(bp, "--\n", NULL);
1087
1088         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1089                 advise(ct->c_file, "unable to open for reading");
1090                 return NOTOK;
1091         }
1092
1093         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1094         last = ct->c_end;
1095         next = &m->mp_parts;
1096         part = NULL;
1097         inout = 1;
1098
1099         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1100                 if (pos > last)
1101                         break;
1102
1103                 pos += strlen(buffer);
1104                 if (buffer[0] != '-' || buffer[1] != '-')
1105                         continue;
1106                 if (inout) {
1107                         if (strcmp(buffer + 2, m->mp_start)!=0)
1108                                 continue;
1109 next_part:
1110                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1111                                         == NULL)
1112                                 adios(EX_OSERR, NULL, "out of memory");
1113                         *next = part;
1114                         next = &part->mp_next;
1115
1116                         if (!(p = get_content(fp, ct->c_file,
1117                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1118                                 ct->c_fp = NULL;
1119                                 return NOTOK;
1120                         }
1121                         p->c_fp = NULL;
1122                         part->mp_part = p;
1123                         pos = p->c_begin;
1124                         fseek(fp, pos, SEEK_SET);
1125                         inout = 0;
1126                 } else {
1127                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1128                                 inout = 1;
1129 end_part:
1130                                 p = part->mp_part;
1131                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1132                                 if (p->c_end < p->c_begin)
1133                                         p->c_begin = p->c_end;
1134                                 if (inout)
1135                                         goto next_part;
1136                                 goto last_part;
1137                         } else {
1138                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1139                                         goto end_part;
1140                         }
1141                 }
1142         }
1143
1144         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1145         if (!inout && part) {
1146                 p = part->mp_part;
1147                 p->c_end = ct->c_end;
1148
1149                 if (p->c_begin >= p->c_end) {
1150                         for (next = &m->mp_parts; *next != part;
1151                                 next = &((*next)->mp_next))
1152                                 continue;
1153                         *next = NULL;
1154                         free_content(p);
1155                         free((char *) part);
1156                 }
1157         }
1158
1159 last_part:
1160         /* reverse the order of the parts for multipart/alternative */
1161         if (ct->c_subtype == MULTI_ALTERNATE)
1162                 reverse_parts(ct);
1163
1164         /*
1165         ** label all subparts with part number, and
1166         ** then initialize the content of the subpart.
1167         */
1168         {
1169                 int partnum;
1170                 char *pp;
1171                 char partnam[BUFSIZ];
1172
1173                 if (ct->c_partno) {
1174                         snprintf(partnam, sizeof(partnam), "%s.",
1175                                         ct->c_partno);
1176                         pp = partnam + strlen(partnam);
1177                 } else {
1178                         pp = partnam;
1179                 }
1180
1181                 for (part = m->mp_parts, partnum = 1; part;
1182                         part = part->mp_next, partnum++) {
1183                         p = part->mp_part;
1184
1185                         sprintf(pp, "%d", partnum);
1186                         p->c_partno = getcpy(partnam);
1187
1188                         /* initialize the content of the subparts */
1189                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1190                                 fclose(ct->c_fp);
1191                                 ct->c_fp = NULL;
1192                                 return NOTOK;
1193                         }
1194                 }
1195         }
1196
1197         fclose(ct->c_fp);
1198         ct->c_fp = NULL;
1199         return OK;
1200 }
1201
1202
1203 /*
1204 ** reverse the order of the parts of a multipart
1205 */
1206
1207 static void
1208 reverse_parts(CT ct)
1209 {
1210         int i;
1211         struct multipart *m;
1212         struct part **base, **bmp, **next, *part;
1213
1214         m = (struct multipart *) ct->c_ctparams;
1215
1216         /* if only one part, just return */
1217         if (!m->mp_parts || !m->mp_parts->mp_next)
1218                 return;
1219
1220         /* count number of parts */
1221         i = 0;
1222         for (part = m->mp_parts; part; part = part->mp_next)
1223                 i++;
1224
1225         /* allocate array of pointers to the parts */
1226         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1227                 adios(EX_OSERR, NULL, "out of memory");
1228         bmp = base;
1229
1230         /* point at all the parts */
1231         for (part = m->mp_parts; part; part = part->mp_next)
1232                 *bmp++ = part;
1233         *bmp = NULL;
1234
1235         /* reverse the order of the parts */
1236         next = &m->mp_parts;
1237         for (bmp--; bmp >= base; bmp--) {
1238                 part = *bmp;
1239                 *next = part;
1240                 next = &part->mp_next;
1241         }
1242         *next = NULL;
1243
1244         /* free array of pointers */
1245         free((char *) base);
1246 }
1247
1248
1249 /*
1250 ** MESSAGE
1251 */
1252
1253 static int
1254 InitMessage(CT ct)
1255 {
1256         struct k2v *kv;
1257         CI ci = &ct->c_ctinfo;
1258
1259         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1260                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1261                 return NOTOK;
1262         }
1263
1264         /* check for missing subtype */
1265         if (!*ci->ci_subtype)
1266                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1267
1268         /* match subtype */
1269         for (kv = SubMessage; kv->kv_key; kv++)
1270                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1271                         break;
1272         ct->c_subtype = kv->kv_value;
1273
1274         switch (ct->c_subtype) {
1275         case MESSAGE_RFC822:
1276                 break;
1277
1278         case MESSAGE_PARTIAL:
1279                 {
1280                 char **ap, **ep;
1281                 struct partial *p;
1282
1283                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1284                 adios(EX_OSERR, NULL, "out of memory");
1285                 ct->c_ctparams = (void *) p;
1286
1287                 /*
1288                 ** scan for parameters "id", "number",
1289                 ** and "total"
1290                 */
1291                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1292                         if (!mh_strcasecmp(*ap, "id")) {
1293                                 p->pm_partid = getcpy(*ep);
1294                                 continue;
1295                         }
1296                         if (!mh_strcasecmp(*ap, "number")) {
1297                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1298 invalid_param:
1299                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1300                                         return NOTOK;
1301                                 }
1302                                 continue;
1303                         }
1304                         if (!mh_strcasecmp(*ap, "total")) {
1305                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1306                                                 p->pm_maxno < 1)
1307                                         goto invalid_param;
1308                                 continue;
1309                         }
1310                 }
1311
1312                 if (!p->pm_partid || !p->pm_partno
1313                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1314                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1315                         return NOTOK;
1316                 }
1317                 }
1318                 break;
1319
1320         case MESSAGE_EXTERNAL:
1321                 {
1322                 CT p;
1323                 FILE *fp;
1324
1325                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1326                         advise(ct->c_file, "unable to open for reading");
1327                         return NOTOK;
1328                 }
1329
1330                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1331
1332                 if (!(p = get_content(fp, ct->c_file, 0))) {
1333                         ct->c_fp = NULL;
1334                         return NOTOK;
1335                 }
1336
1337                 p->c_fp = NULL;
1338                 p->c_end = p->c_begin;
1339
1340                 fclose(ct->c_fp);
1341                 ct->c_fp = NULL;
1342
1343                 switch (p->c_type) {
1344                 case CT_MULTIPART:
1345                         break;
1346
1347                 case CT_MESSAGE:
1348                         if (p->c_subtype != MESSAGE_RFC822)
1349                                 break;
1350                         /* else fall... */
1351                 default:
1352                         if (p->c_ctinitfnx)
1353                                 (*p->c_ctinitfnx) (p);
1354                         break;
1355                 }
1356                 }
1357                 break;
1358
1359         default:
1360                 break;
1361         }
1362
1363         return OK;
1364 }
1365
1366
1367 /*
1368 ** APPLICATION
1369 */
1370
1371 static int
1372 InitApplication(CT ct)
1373 {
1374         struct k2v *kv;
1375         CI ci = &ct->c_ctinfo;
1376
1377         /* match subtype */
1378         for (kv = SubApplication; kv->kv_key; kv++)
1379                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1380                         break;
1381         ct->c_subtype = kv->kv_value;
1382
1383         return OK;
1384 }
1385
1386
1387 /*
1388 ** TRANSFER ENCODINGS
1389 */
1390
1391 static int
1392 init_encoding(CT ct, OpenCEFunc openfnx)
1393 {
1394         CE ce;
1395
1396         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1397                 adios(EX_OSERR, NULL, "out of memory");
1398
1399         ct->c_cefile     = ce;
1400         ct->c_ceopenfnx  = openfnx;
1401         ct->c_ceclosefnx = close_encoding;
1402         ct->c_cesizefnx  = size_encoding;
1403
1404         return OK;
1405 }
1406
1407
1408 void
1409 close_encoding(CT ct)
1410 {
1411         CE ce;
1412
1413         if (!(ce = ct->c_cefile))
1414                 return;
1415
1416         if (ce->ce_fp) {
1417                 fclose(ce->ce_fp);
1418                 ce->ce_fp = NULL;
1419         }
1420 }
1421
1422
1423 static unsigned long
1424 size_encoding(CT ct)
1425 {
1426         int fd;
1427         unsigned long size;
1428         char *file;
1429         CE ce;
1430         struct stat st;
1431
1432         if (!(ce = ct->c_cefile))
1433                 return (ct->c_end - ct->c_begin);
1434
1435         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1436                 return (long) st.st_size;
1437
1438         if (ce->ce_file) {
1439                 if (stat(ce->ce_file, &st) != NOTOK)
1440                         return (long) st.st_size;
1441                 else
1442                         return 0L;
1443         }
1444
1445         if (ct->c_encoding == CE_EXTERNAL)
1446                 return (ct->c_end - ct->c_begin);
1447
1448         file = NULL;
1449         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1450                 return (ct->c_end - ct->c_begin);
1451
1452         if (fstat(fd, &st) != NOTOK)
1453                 size = (long) st.st_size;
1454         else
1455                 size = 0L;
1456
1457         (*ct->c_ceclosefnx) (ct);
1458         return size;
1459 }
1460
1461
1462 /*
1463 ** BASE64
1464 */
1465
1466 static unsigned char b642nib[0x80] = {
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1473         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1474         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1475         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1476         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1477         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1478         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1479         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1480         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1481         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1482         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1483 };
1484
1485
1486 static int
1487 InitBase64(CT ct)
1488 {
1489         return init_encoding(ct, openBase64);
1490 }
1491
1492
1493 static int
1494 openBase64(CT ct, char **file)
1495 {
1496         int bitno, cc;
1497         int fd, len, skip, own_ct_fp = 0;
1498         unsigned long bits;
1499         unsigned char value, *b, *b1, *b2, *b3;
1500         unsigned char *cp, *ep;
1501         char buffer[BUFSIZ];
1502         /* sbeck -- handle suffixes */
1503         CI ci;
1504         CE ce;
1505
1506         b  = (unsigned char *) &bits;
1507         b1 = &b[endian > 0 ? 1 : 2];
1508         b2 = &b[endian > 0 ? 2 : 1];
1509         b3 = &b[endian > 0 ? 3 : 0];
1510
1511         ce = ct->c_cefile;
1512         if (ce->ce_fp) {
1513                 fseek(ce->ce_fp, 0L, SEEK_SET);
1514                 goto ready_to_go;
1515         }
1516
1517         if (ce->ce_file) {
1518                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1519                         content_error(ce->ce_file, ct,
1520                                         "unable to fopen for reading");
1521                         return NOTOK;
1522                 }
1523                 goto ready_to_go;
1524         }
1525
1526         if (*file == NULL) {
1527                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1528                 ce->ce_unlink = 1;
1529         } else {
1530                 ce->ce_file = getcpy(*file);
1531                 ce->ce_unlink = 0;
1532         }
1533
1534         /* sbeck@cise.ufl.edu -- handle suffixes */
1535         ci = &ct->c_ctinfo;
1536         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1537                         invo_name, ci->ci_type, ci->ci_subtype);
1538         cp = context_find(buffer);
1539         if (cp == NULL || *cp == '\0') {
1540                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1541                                 ci->ci_type);
1542                 cp = context_find(buffer);
1543         }
1544         if (cp != NULL && *cp != '\0') {
1545                 if (ce->ce_unlink) {
1546                         /*
1547                         ** Temporary file already exists, so we rename to
1548                         ** version with extension.
1549                         */
1550                         char *file_org = strdup(ce->ce_file);
1551                         ce->ce_file = add(cp, ce->ce_file);
1552                         if (rename(file_org, ce->ce_file)) {
1553                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1554                                                 file_org);
1555                         }
1556                         free(file_org);
1557
1558                 } else {
1559                         ce->ce_file = add(cp, ce->ce_file);
1560                 }
1561         }
1562
1563         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1564                 content_error(ce->ce_file, ct,
1565                                 "unable to fopen for reading/writing");
1566                 return NOTOK;
1567         }
1568
1569         if ((len = ct->c_end - ct->c_begin) < 0)
1570                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1571
1572         if (!ct->c_fp) {
1573                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1574                         content_error(ct->c_file, ct,
1575                                         "unable to open for reading");
1576                         return NOTOK;
1577                 }
1578                 own_ct_fp = 1;
1579         }
1580
1581         bitno = 18;
1582         bits = 0L;
1583         skip = 0;
1584
1585         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1586         while (len > 0) {
1587                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1588                 case NOTOK:
1589                         content_error(ct->c_file, ct, "error reading from");
1590                         goto clean_up;
1591
1592                 case OK:
1593                         content_error(NULL, ct, "premature eof");
1594                         goto clean_up;
1595
1596                 default:
1597                         if (cc > len)
1598                                 cc = len;
1599                         len -= cc;
1600
1601                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1602                                 switch (*cp) {
1603                                 default:
1604                                         if (isspace(*cp))
1605                                                 break;
1606                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1607                                                 if (debugsw) {
1608                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1609                                                 }
1610                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1611                                                 continue;
1612                                         }
1613
1614                                         bits |= value << bitno;
1615 test_end:
1616                                         if ((bitno -= 6) < 0) {
1617                                                 putc((char) *b1, ce->ce_fp);
1618                                                 if (skip < 2) {
1619                                                         putc((char) *b2, ce->ce_fp);
1620                                                         if (skip < 1) {
1621                                                                 putc((char) *b3, ce->ce_fp);
1622                                                         }
1623                                                 }
1624
1625                                                 if (ferror(ce->ce_fp)) {
1626                                                         content_error(ce->ce_file, ct,
1627                                                                                    "error writing to");
1628                                                         goto clean_up;
1629                                                 }
1630                                                 bitno = 18, bits = 0L, skip = 0;
1631                                         }
1632                                         break;
1633
1634                                 case '=':
1635                                         if (++skip > 3)
1636                                                 goto self_delimiting;
1637                                         goto test_end;
1638                                 }
1639                         }
1640                 }
1641         }
1642
1643         if (bitno != 18) {
1644                 if (debugsw)
1645                         fprintf(stderr, "premature ending (bitno %d)\n",
1646                                         bitno);
1647
1648                 content_error(NULL, ct, "invalid BASE64 encoding");
1649                 goto clean_up;
1650         }
1651
1652 self_delimiting:
1653         fseek(ct->c_fp, 0L, SEEK_SET);
1654
1655         if (fflush(ce->ce_fp)) {
1656                 content_error(ce->ce_file, ct, "error writing to");
1657                 goto clean_up;
1658         }
1659
1660         fseek(ce->ce_fp, 0L, SEEK_SET);
1661
1662 ready_to_go:
1663         *file = ce->ce_file;
1664         if (own_ct_fp) {
1665                 fclose(ct->c_fp);
1666                 ct->c_fp = NULL;
1667         }
1668         return fileno(ce->ce_fp);
1669
1670 clean_up:
1671         free_encoding(ct, 0);
1672         if (own_ct_fp) {
1673                 fclose(ct->c_fp);
1674                 ct->c_fp = NULL;
1675         }
1676         return NOTOK;
1677 }
1678
1679
1680 /*
1681 ** QUOTED PRINTABLE
1682 */
1683
1684 static char hex2nib[0x80] = {
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1692         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1701 };
1702
1703
1704 static int
1705 InitQuoted(CT ct)
1706 {
1707         return init_encoding(ct, openQuoted);
1708 }
1709
1710
1711 static int
1712 openQuoted(CT ct, char **file)
1713 {
1714         int cc, len, quoted, own_ct_fp = 0;
1715         unsigned char *cp, *ep;
1716         char buffer[BUFSIZ];
1717         unsigned char mask = 0;
1718         CE ce;
1719         /* sbeck -- handle suffixes */
1720         CI ci;
1721
1722         ce = ct->c_cefile;
1723         if (ce->ce_fp) {
1724                 fseek(ce->ce_fp, 0L, SEEK_SET);
1725                 goto ready_to_go;
1726         }
1727
1728         if (ce->ce_file) {
1729                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1730                         content_error(ce->ce_file, ct,
1731                                         "unable to fopen for reading");
1732                         return NOTOK;
1733                 }
1734                 goto ready_to_go;
1735         }
1736
1737         if (*file == NULL) {
1738                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1739                 ce->ce_unlink = 1;
1740         } else {
1741                 ce->ce_file = getcpy(*file);
1742                 ce->ce_unlink = 0;
1743         }
1744
1745         /* sbeck@cise.ufl.edu -- handle suffixes */
1746         ci = &ct->c_ctinfo;
1747         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1748                         invo_name, ci->ci_type, ci->ci_subtype);
1749         cp = context_find(buffer);
1750         if (cp == NULL || *cp == '\0') {
1751                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1752                                 ci->ci_type);
1753                 cp = context_find(buffer);
1754         }
1755         if (cp != NULL && *cp != '\0') {
1756                 if (ce->ce_unlink) {
1757                         /*
1758                         ** Temporary file already exists, so we rename to
1759                         ** version with extension.
1760                         */
1761                         char *file_org = strdup(ce->ce_file);
1762                         ce->ce_file = add(cp, ce->ce_file);
1763                         if (rename(file_org, ce->ce_file)) {
1764                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1765                                                 file_org);
1766                         }
1767                         free(file_org);
1768
1769                 } else {
1770                         ce->ce_file = add(cp, ce->ce_file);
1771                 }
1772         }
1773
1774         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1775                 content_error(ce->ce_file, ct,
1776                                 "unable to fopen for reading/writing");
1777                 return NOTOK;
1778         }
1779
1780         if ((len = ct->c_end - ct->c_begin) < 0)
1781                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1782
1783         if (!ct->c_fp) {
1784                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1785                         content_error(ct->c_file, ct,
1786                                         "unable to open for reading");
1787                         return NOTOK;
1788                 }
1789                 own_ct_fp = 1;
1790         }
1791
1792         quoted = 0;
1793
1794         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1795         while (len > 0) {
1796                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1797                         content_error(NULL, ct, "premature eof");
1798                         goto clean_up;
1799                 }
1800
1801                 if ((cc = strlen(buffer)) > len)
1802                         cc = len;
1803                 len -= cc;
1804
1805                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1806                         if (!isspace(*ep))
1807                                 break;
1808                 *++ep = '\n', ep++;
1809
1810                 for (; cp < ep; cp++) {
1811                         if (quoted > 0) {
1812                                 /* in an escape sequence */
1813                                 if (quoted == 1) {
1814                                         /* at byte 1 of an escape sequence */
1815                                         mask = hex2nib[*cp & 0x7f];
1816                                         /* next is byte 2 */
1817                                         quoted = 2;
1818                                 } else {
1819                                         /* at byte 2 of an escape sequence */
1820                                         mask <<= 4;
1821                                         mask |= hex2nib[*cp & 0x7f];
1822                                         putc(mask, ce->ce_fp);
1823                                         if (ferror(ce->ce_fp)) {
1824                                                 content_error(ce->ce_file, ct, "error writing to");
1825                                                 goto clean_up;
1826                                         }
1827                                         /*
1828                                         ** finished escape sequence; next may
1829                                         ** be literal or a new escape sequence
1830                                         */
1831                                         quoted = 0;
1832                                 }
1833                                 /* on to next byte */
1834                                 continue;
1835                         }
1836
1837                         /* not in an escape sequence */
1838                         if (*cp == '=') {
1839                                 /*
1840                                 ** starting an escape sequence,
1841                                 ** or invalid '='?
1842                                 */
1843                                 if (cp + 1 < ep && cp[1] == '\n') {
1844                                         /* "=\n" soft line break, eat the \n */
1845                                         cp++;
1846                                         continue;
1847                                 }
1848                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1849                                         /*
1850                                         ** We don't have 2 bytes left,
1851                                         ** so this is an invalid escape
1852                                         ** sequence; just show the raw bytes
1853                                         ** (below).
1854                                         */
1855                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1856                                         /*
1857                                         ** Next 2 bytes are hex digits,
1858                                         ** making this a valid escape
1859                                         ** sequence; let's decode it (above).
1860                                         */
1861                                         quoted = 1;
1862                                         continue;
1863                                 } else {
1864                                         /*
1865                                         ** One or both of the next 2 is
1866                                         ** out of range, making this an
1867                                         ** invalid escape sequence; just
1868                                         ** show the raw bytes (below).
1869                                         */
1870                                 }
1871                         }
1872
1873                         /* Just show the raw byte. */
1874                         putc(*cp, ce->ce_fp);
1875                         if (ferror(ce->ce_fp)) {
1876                                 content_error(ce->ce_file, ct,
1877                                                 "error writing to");
1878                                 goto clean_up;
1879                         }
1880                 }
1881         }
1882         if (quoted) {
1883                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1884                 goto clean_up;
1885         }
1886
1887         fseek(ct->c_fp, 0L, SEEK_SET);
1888
1889         if (fflush(ce->ce_fp)) {
1890                 content_error(ce->ce_file, ct, "error writing to");
1891                 goto clean_up;
1892         }
1893
1894         fseek(ce->ce_fp, 0L, SEEK_SET);
1895
1896 ready_to_go:
1897         *file = ce->ce_file;
1898         if (own_ct_fp) {
1899                 fclose(ct->c_fp);
1900                 ct->c_fp = NULL;
1901         }
1902         return fileno(ce->ce_fp);
1903
1904 clean_up:
1905         free_encoding(ct, 0);
1906         if (own_ct_fp) {
1907                 fclose(ct->c_fp);
1908                 ct->c_fp = NULL;
1909         }
1910         return NOTOK;
1911 }
1912
1913
1914 /*
1915 ** 7BIT
1916 */
1917
1918 static int
1919 Init7Bit(CT ct)
1920 {
1921         if (init_encoding(ct, open7Bit) == NOTOK)
1922                 return NOTOK;
1923
1924         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1925         return OK;
1926 }
1927
1928
1929 int
1930 open7Bit(CT ct, char **file)
1931 {
1932         int cc, fd, len, own_ct_fp = 0;
1933         char buffer[BUFSIZ];
1934         /* sbeck -- handle suffixes */
1935         char *cp;
1936         CI ci;
1937         CE ce;
1938
1939         ce = ct->c_cefile;
1940         if (ce->ce_fp) {
1941                 fseek(ce->ce_fp, 0L, SEEK_SET);
1942                 goto ready_to_go;
1943         }
1944
1945         if (ce->ce_file) {
1946                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1947                         content_error(ce->ce_file, ct,
1948                                         "unable to fopen for reading");
1949                         return NOTOK;
1950                 }
1951                 goto ready_to_go;
1952         }
1953
1954         if (*file == NULL) {
1955                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1956                 ce->ce_unlink = 1;
1957         } else {
1958                 ce->ce_file = getcpy(*file);
1959                 ce->ce_unlink = 0;
1960         }
1961
1962         /* sbeck@cise.ufl.edu -- handle suffixes */
1963         ci = &ct->c_ctinfo;
1964         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1965                         invo_name, ci->ci_type, ci->ci_subtype);
1966         cp = context_find(buffer);
1967         if (cp == NULL || *cp == '\0') {
1968                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1969                                 ci->ci_type);
1970                 cp = context_find(buffer);
1971         }
1972         if (cp != NULL && *cp != '\0') {
1973                 if (ce->ce_unlink) {
1974                         /*
1975                         ** Temporary file already exists, so we rename to
1976                         ** version with extension.
1977                         */
1978                         char *file_org = strdup(ce->ce_file);
1979                         ce->ce_file = add(cp, ce->ce_file);
1980                         if (rename(file_org, ce->ce_file)) {
1981                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1982                                                 file_org);
1983                         }
1984                         free(file_org);
1985
1986                 } else {
1987                         ce->ce_file = add(cp, ce->ce_file);
1988                 }
1989         }
1990
1991         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1992                 content_error(ce->ce_file, ct,
1993                                 "unable to fopen for reading/writing");
1994                 return NOTOK;
1995         }
1996
1997         if (ct->c_type == CT_MULTIPART) {
1998                 char **ap, **ep;
1999                 CI ci = &ct->c_ctinfo;
2000
2001                 len = 0;
2002                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2003                                 ci->ci_subtype);
2004                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2005                                 strlen(ci->ci_subtype);
2006                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2007                         putc(';', ce->ce_fp);
2008                         len++;
2009
2010                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2011                                         *ap, *ep);
2012
2013                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2014                                 fputs("\n\t", ce->ce_fp);
2015                                 len = 8;
2016                         } else {
2017                                 putc(' ', ce->ce_fp);
2018                                 len++;
2019                         }
2020                         fprintf(ce->ce_fp, "%s", buffer);
2021                         len += cc;
2022                 }
2023
2024                 if (ci->ci_comment) {
2025                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2026                                                 >= CPERLIN) {
2027                                 fputs("\n\t", ce->ce_fp);
2028                                 len = 8;
2029                         } else {
2030                                 putc(' ', ce->ce_fp);
2031                                 len++;
2032                         }
2033                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2034                         len += cc;
2035                 }
2036                 fprintf(ce->ce_fp, "\n");
2037                 if (ct->c_id)
2038                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2039                 if (ct->c_descr)
2040                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2041                 if (ct->c_dispo)
2042                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2043                 fprintf(ce->ce_fp, "\n");
2044         }
2045
2046         if ((len = ct->c_end - ct->c_begin) < 0)
2047                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2048
2049         if (!ct->c_fp) {
2050                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2051                         content_error(ct->c_file, ct,
2052                                         "unable to open for reading");
2053                         return NOTOK;
2054                 }
2055                 own_ct_fp = 1;
2056         }
2057
2058         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2059         while (len > 0)
2060                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2061                 case NOTOK:
2062                         content_error(ct->c_file, ct, "error reading from");
2063                         goto clean_up;
2064
2065                 case OK:
2066                         content_error(NULL, ct, "premature eof");
2067                         goto clean_up;
2068
2069                 default:
2070                         if (cc > len)
2071                                 cc = len;
2072                         len -= cc;
2073
2074                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2075                         if (ferror(ce->ce_fp)) {
2076                                 content_error(ce->ce_file, ct,
2077                                                 "error writing to");
2078                                 goto clean_up;
2079                         }
2080                 }
2081
2082         fseek(ct->c_fp, 0L, SEEK_SET);
2083
2084         if (fflush(ce->ce_fp)) {
2085                 content_error(ce->ce_file, ct, "error writing to");
2086                 goto clean_up;
2087         }
2088
2089         fseek(ce->ce_fp, 0L, SEEK_SET);
2090
2091 ready_to_go:
2092         *file = ce->ce_file;
2093         if (own_ct_fp) {
2094                 fclose(ct->c_fp);
2095                 ct->c_fp = NULL;
2096         }
2097         return fileno(ce->ce_fp);
2098
2099 clean_up:
2100         free_encoding(ct, 0);
2101         if (own_ct_fp) {
2102                 fclose(ct->c_fp);
2103                 ct->c_fp = NULL;
2104         }
2105         return NOTOK;
2106 }