m_getfld2: ERR2 means a read error, thus name it IOERR2
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = (CT) mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = getcpy(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD2;;) {
252                 switch (state = m_getfld2(state, &f, in)) {
253                 case FLD2:
254                         compnum++;
255
256                         /* add the header data to the list */
257                         add_header(ct, getcpy(f.name), getcpy(f.value));
258
259                         ct->c_begin = ftell(in) + 1;
260                         continue;
261
262                 case BODY2:
263                         ct->c_begin = ftell(in) - strlen(f.value);
264                         break;
265
266                 case FILEEOF2:
267                         ct->c_begin = ftell(in);
268                         break;
269
270                 case LENERR2:
271                 case FMTERR2:
272                 case IOERR2:
273                         adios(EX_DATAERR, NULL, "message format error in component #%d",
274                                         compnum);
275
276                 default:
277                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
278                 }
279                 break;
280         }
281
282         /*
283         ** Read the content headers.  We will parse the
284         ** MIME related header fields into their various
285         ** structures and set internal flags related to
286         ** content type/subtype, etc.
287         */
288
289         hp = ct->c_first_hf;  /* start at first header field */
290         while (hp) {
291                 /* Get MIME-Version field */
292                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
293                         int ucmp;
294                         char c;
295                         unsigned char *cp, *dp;
296
297                         if (ct->c_vrsn) {
298                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
299                                 goto next_header;
300                         }
301                         ct->c_vrsn = getcpy(hp->value);
302
303                         /* Now, cleanup this field */
304                         cp = ct->c_vrsn;
305
306                         while (isspace(*cp))
307                                 cp++;
308                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
309                                 *dp++ = ' ';
310                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
311                                 if (!isspace(*dp))
312                                         break;
313                         *++dp = '\0';
314                         if (debugsw)
315                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
316
317                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
318                                 goto out;
319
320                         for (dp = cp; istoken(*dp); dp++)
321                                 continue;
322                         c = *dp;
323                         *dp = '\0';
324                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
325                         *dp = c;
326                         if (!ucmp) {
327                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
328                         }
329
330                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
331                         /* Get Content-Type field */
332                         struct str2init *s2i;
333                         CI ci = &ct->c_ctinfo;
334
335                         /* Check if we've already seen a Content-Type header */
336                         if (ct->c_ctline) {
337                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
338                                 goto next_header;
339                         }
340
341                         /* Parse the Content-Type field */
342                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
343                                 goto out;
344
345                         /*
346                         ** Set the Init function and the internal
347                         ** flag for this content type.
348                         */
349                         for (s2i = str2cts; s2i->si_key; s2i++)
350                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
351                                         break;
352                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
353                                 s2i++;
354                         ct->c_type = s2i->si_val;
355                         ct->c_ctinitfnx = s2i->si_init;
356
357                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
358                         /* Get Content-Transfer-Encoding field */
359                         char c;
360                         unsigned char *cp, *dp;
361                         struct str2init *s2i;
362
363                         /*
364                         ** Check if we've already seen the
365                         ** Content-Transfer-Encoding field
366                         */
367                         if (ct->c_celine) {
368                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
369                                 goto next_header;
370                         }
371
372                         /* get copy of this field */
373                         ct->c_celine = cp = getcpy(hp->value);
374
375                         while (isspace(*cp))
376                                 cp++;
377                         for (dp = cp; istoken(*dp); dp++)
378                                 continue;
379                         c = *dp;
380                         *dp = '\0';
381
382                         /*
383                         ** Find the internal flag and Init function
384                         ** for this transfer encoding.
385                         */
386                         for (s2i = str2ces; s2i->si_key; s2i++)
387                                 if (!mh_strcasecmp(cp, s2i->si_key))
388                                         break;
389                         if (!s2i->si_key && !uprf(cp, "X-"))
390                                 s2i++;
391                         *dp = c;
392                         ct->c_encoding = s2i->si_val;
393
394                         /* Call the Init function for this encoding */
395                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
396                                 goto out;
397
398                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
399                         /* Get Content-ID field */
400                         ct->c_id = add(hp->value, ct->c_id);
401
402                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
403                         /* Get Content-Description field */
404                         ct->c_descr = add(hp->value, ct->c_descr);
405
406                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
407                         /* Get Content-Disposition field */
408                         ct->c_dispo = add(hp->value, ct->c_dispo);
409                 }
410
411 next_header:
412                 hp = hp->next;  /* next header field */
413         }
414
415         /*
416         ** Check if we saw a Content-Type field.
417         ** If not, then assign a default value for
418         ** it, and the Init function.
419         */
420         if (!ct->c_ctline) {
421                 /*
422                 ** If we are inside a multipart/digest message,
423                 ** so default type is message/rfc822
424                 */
425                 if (toplevel < 0) {
426                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
427                                 goto out;
428                         ct->c_type = CT_MESSAGE;
429                         ct->c_ctinitfnx = InitMessage;
430                 } else {
431                         /*
432                         ** Else default type is text/plain
433                         */
434                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
435                                 goto out;
436                         ct->c_type = CT_TEXT;
437                         ct->c_ctinitfnx = InitText;
438                 }
439         }
440
441         /* Use default Transfer-Encoding, if necessary */
442         if (!ct->c_celine) {
443                 ct->c_encoding = CE_7BIT;
444                 Init7Bit(ct);
445         }
446
447         return ct;
448
449 out:
450         free_content(ct);
451         return NULL;
452 }
453
454
455 /*
456 ** small routine to add header field to list
457 */
458
459 int
460 add_header(CT ct, char *name, char *value)
461 {
462         HF hp;
463
464         /* allocate header field structure */
465         hp = mh_xmalloc(sizeof(*hp));
466
467         /* link data into header structure */
468         hp->name = name;
469         hp->value = value;
470         hp->next = NULL;
471
472         /* link header structure into the list */
473         if (ct->c_first_hf == NULL) {
474                 ct->c_first_hf = hp;  /* this is the first */
475                 ct->c_last_hf = hp;
476         } else {
477                 ct->c_last_hf->next = hp;  /* add it to the end */
478                 ct->c_last_hf = hp;
479         }
480
481         return 0;
482 }
483
484
485 /*
486 ** Make sure that buf contains at least one appearance of name,
487 ** followed by =.  If not, insert both name and value, just after
488 ** first semicolon, if any.  Note that name should not contain a
489 ** trailing =.  And quotes will be added around the value.  Typical
490 ** usage:  make sure that a Content-Disposition header contains
491 ** filename="foo".  If it doesn't and value does, use value from
492 ** that.
493 */
494 static char *
495 incl_name_value(unsigned char *buf, char *name, char *value) {
496         char *newbuf = buf;
497
498         /* Assume that name is non-null. */
499         if (buf && value) {
500                 char *name_plus_equal = concat(name, "=", NULL);
501
502                 if (!strstr(buf, name_plus_equal)) {
503                         char *insertion;
504                         unsigned char *cp;
505                         char *prefix, *suffix;
506
507                         /* Trim trailing space, esp. newline. */
508                         for (cp = &buf[strlen(buf) - 1];
509                                          cp >= buf && isspace(*cp); --cp) {
510                                 *cp = '\0';
511                         }
512
513                         insertion = concat("; ", name, "=", "\"", value, "\"",
514                                         NULL);
515
516                         /*
517                         ** Insert at first semicolon, if any.
518                         ** If none, append to end.
519                         */
520                         prefix = getcpy(buf);
521                         if ((cp = strchr(prefix, ';'))) {
522                                 suffix = concat(cp, NULL);
523                                 *cp = '\0';
524                                 newbuf = concat(prefix, insertion, suffix,
525                                                 "\n", NULL);
526                                 free(suffix);
527                         } else {
528                                 /* Append to end. */
529                                 newbuf = concat(buf, insertion, "\n", NULL);
530                         }
531
532                         free(prefix);
533                         free(insertion);
534                         free(buf);
535                 }
536
537                 free(name_plus_equal);
538         }
539
540         return newbuf;
541 }
542
543 /*
544 ** Extract just name_suffix="foo", if any, from value.  If there isn't
545 ** one, return the entire value.  Note that, for example, a name_suffix
546 ** of name will match filename="foo", and return foo.
547 */
548 static char *
549 extract_name_value(char *name_suffix, char *value) {
550         char *extracted_name_value = value;
551         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
552         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
553         char *cp;
554
555         free(name_suffix_plus_quote);
556         if (name_suffix_equals) {
557                 char *name_suffix_begin;
558
559                 /* Find first \". */
560                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
561                         ;
562                 name_suffix_begin = ++cp;
563                 /* Find second \". */
564                 for (; *cp != '"'; ++cp)
565                         ;
566
567                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
568                 memcpy(extracted_name_value, name_suffix_begin,
569                                 cp - name_suffix_begin);
570                 extracted_name_value[cp - name_suffix_begin] = '\0';
571         }
572
573         return extracted_name_value;
574 }
575
576 /*
577 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
578 ** directives.  Fills in the information of the CTinfo structure.
579 */
580 int
581 get_ctinfo(unsigned char *cp, CT ct, int magic)
582 {
583         int i;
584         unsigned char *dp;
585         char **ap, **ep;
586         char c;
587         CI ci;
588
589         ci = &ct->c_ctinfo;
590         i = strlen(invo_name) + 2;
591
592         /* store copy of Content-Type line */
593         cp = ct->c_ctline = getcpy(cp);
594
595         while (isspace(*cp))  /* trim leading spaces */
596                 cp++;
597
598         /* change newlines to spaces */
599         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
600                 *dp++ = ' ';
601
602         /* trim trailing spaces */
603         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
604                 if (!isspace(*dp))
605                         break;
606         *++dp = '\0';
607
608         if (debugsw)
609                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
610
611         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
612                 return NOTOK;
613
614         for (dp = cp; istoken(*dp); dp++)
615                 continue;
616         c = *dp, *dp = '\0';
617         ci->ci_type = getcpy(cp);  /* store content type */
618         *dp = c, cp = dp;
619
620         if (!*ci->ci_type) {
621                 advise(NULL, "invalid %s: field in message %s (empty type)",
622                                 TYPE_FIELD, ct->c_file);
623                 return NOTOK;
624         }
625
626         /* down case the content type string */
627         for (dp = ci->ci_type; *dp; dp++)
628                 if (isalpha(*dp) && isupper(*dp))
629                         *dp = tolower(*dp);
630
631         while (isspace(*cp))
632                 cp++;
633
634         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
635                 return NOTOK;
636
637         if (*cp != '/') {
638                 if (!magic)
639                         ci->ci_subtype = getcpy("");
640                 goto magic_skip;
641         }
642
643         cp++;
644         while (isspace(*cp))
645                 cp++;
646
647         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
648                 return NOTOK;
649
650         for (dp = cp; istoken(*dp); dp++)
651                 continue;
652         c = *dp, *dp = '\0';
653         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
654         *dp = c, cp = dp;
655
656         if (!*ci->ci_subtype) {
657                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
658                 return NOTOK;
659         }
660
661         /* down case the content subtype string */
662         for (dp = ci->ci_subtype; *dp; dp++)
663                 if (isalpha(*dp) && isupper(*dp))
664                         *dp = tolower(*dp);
665
666 magic_skip:
667         while (isspace(*cp))
668                 cp++;
669
670         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
671                 return NOTOK;
672
673         /*
674         ** Parse attribute/value pairs given with Content-Type
675         */
676         ep = (ap = ci->ci_attrs) + NPARMS;
677         while (*cp == ';') {
678                 char *vp;
679                 unsigned char *up;
680
681                 if (ap >= ep) {
682                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
683                         return NOTOK;
684                 }
685
686                 cp++;
687                 while (isspace(*cp))
688                         cp++;
689
690                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
691                         return NOTOK;
692
693                 if (*cp == 0) {
694                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
695                         return OK;
696                 }
697
698                 /* down case the attribute name */
699                 for (dp = cp; istoken(*dp); dp++)
700                         if (isalpha(*dp) && isupper(*dp))
701                                 *dp = tolower(*dp);
702
703                 for (up = dp; isspace(*dp);)
704                         dp++;
705                 if (dp == cp || *dp != '=') {
706                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
707                         return NOTOK;
708                 }
709
710                 vp = (*ap = getcpy(cp)) + (up - cp);
711                 *vp = '\0';
712                 for (dp++; isspace(*dp);)
713                         dp++;
714
715                 /* now add the attribute value */
716                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
717
718                 if (*dp == '"') {
719                         for (cp = ++dp, dp = vp;;) {
720                                 switch (c = *cp++) {
721                                 case '\0':
722 bad_quote:
723                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
724                                         return NOTOK;
725
726                                 case '\\':
727                                         *dp++ = c;
728                                         if ((c = *cp++) == '\0')
729                                                 goto bad_quote;
730                                         /* else fall... */
731
732                                 default:
733                                         *dp++ = c;
734                                         continue;
735
736                                 case '"':
737                                         *dp = '\0';
738                                         break;
739                                 }
740                                 break;
741                         }
742                 } else {
743                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
744                                 continue;
745                         *dp = '\0';
746                 }
747                 if (!*vp) {
748                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
749                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
750                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
751                         continue;
752                 }
753                 ap++;
754
755                 while (isspace(*cp))
756                         cp++;
757
758                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
759                         return NOTOK;
760         }
761
762         /*
763         ** Get any <Content-Id> given in buffer
764         */
765         if (magic && *cp == '<') {
766                 if (ct->c_id) {
767                         free(ct->c_id);
768                         ct->c_id = NULL;
769                 }
770                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
771                         advise(NULL, "invalid ID in message %s", ct->c_file);
772                         return NOTOK;
773                 }
774                 c = *dp;
775                 *dp = '\0';
776                 if (*ct->c_id)
777                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
778                 else
779                         ct->c_id = NULL;
780                 *dp++ = c;
781                 cp = dp;
782
783                 while (isspace(*cp))
784                         cp++;
785         }
786
787         /*
788         ** Get any [Content-Description] given in buffer.
789         */
790         if (magic && *cp == '[') {
791                 ct->c_descr = ++cp;
792                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
793                         if (*dp == ']')
794                                 break;
795                 if (dp < cp) {
796                         advise(NULL, "invalid description in message %s",
797                                         ct->c_file);
798                         ct->c_descr = NULL;
799                         return NOTOK;
800                 }
801
802                 c = *dp;
803                 *dp = '\0';
804                 if (*ct->c_descr)
805                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
806                 else
807                         ct->c_descr = NULL;
808                 *dp++ = c;
809                 cp = dp;
810
811                 while (isspace(*cp))
812                         cp++;
813         }
814
815         /*
816         ** Get any {Content-Disposition} given in buffer.
817         */
818         if (magic && *cp == '{') {
819                 ct->c_dispo = ++cp;
820                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
821                         if (*dp == '}')
822                                 break;
823                 if (dp < cp) {
824                         advise(NULL, "invalid disposition in message %s",
825                                         ct->c_file);
826                         ct->c_dispo = NULL;
827                         return NOTOK;
828                 }
829
830                 c = *dp;
831                 *dp = '\0';
832                 if (*ct->c_dispo)
833                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
834                 else
835                         ct->c_dispo = NULL;
836                 *dp++ = c;
837                 cp = dp;
838
839                 while (isspace(*cp))
840                         cp++;
841         }
842
843         /*
844         ** Check if anything is left over
845         */
846         if (*cp) {
847                 if (magic) {
848                         ci->ci_magic = getcpy(cp);
849
850                         /*
851                         ** If there is a Content-Disposition header and
852                         ** it doesn't have a *filename=, extract it from
853                         ** the magic contents.  The mhbasename call skips
854                         ** any leading directory components.
855                         */
856                         if (ct->c_dispo)
857                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
858                         } else
859                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
860         }
861
862         return OK;
863 }
864
865
866 static int
867 get_comment(CT ct, unsigned char **ap, int istype)
868 {
869         int i;
870         char *bp;
871         unsigned char *cp;
872         char c, buffer[BUFSIZ], *dp;
873         CI ci;
874
875         ci = &ct->c_ctinfo;
876         cp = *ap;
877         bp = buffer;
878         cp++;
879
880         for (i = 0;;) {
881                 switch (c = *cp++) {
882                 case '\0':
883 invalid:
884                 advise(NULL, "invalid comment in message %s's %s: field",
885                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
886                 return NOTOK;
887
888                 case '\\':
889                         *bp++ = c;
890                         if ((c = *cp++) == '\0')
891                                 goto invalid;
892                         *bp++ = c;
893                         continue;
894
895                 case '(':
896                         i++;
897                         /* and fall... */
898                 default:
899                         *bp++ = c;
900                         continue;
901
902                 case ')':
903                         if (--i < 0)
904                                 break;
905                         *bp++ = c;
906                         continue;
907                 }
908                 break;
909         }
910         *bp = '\0';
911
912         if (istype) {
913                 if ((dp = ci->ci_comment)) {
914                         ci->ci_comment = concat(dp, " ", buffer, NULL);
915                         free(dp);
916                 } else {
917                         ci->ci_comment = getcpy(buffer);
918                 }
919         }
920
921         while (isspace(*cp))
922                 cp++;
923
924         *ap = cp;
925         return OK;
926 }
927
928
929 /*
930 ** CONTENTS
931 **
932 ** Handles content types audio, image, and video.
933 ** There's not much to do right here.
934 */
935
936 static int
937 InitGeneric(CT ct)
938 {
939         return OK;  /* not much to do here */
940 }
941
942
943 /*
944 ** TEXT
945 */
946
947 static int
948 InitText(CT ct)
949 {
950         char **ap, **ep;
951         struct k2v *kv;
952         struct text *t;
953         CI ci = &ct->c_ctinfo;
954
955         /* check for missing subtype */
956         if (!*ci->ci_subtype)
957                 ci->ci_subtype = add("plain", ci->ci_subtype);
958
959         /* match subtype */
960         for (kv = SubText; kv->kv_key; kv++)
961                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
962                         break;
963         ct->c_subtype = kv->kv_value;
964
965         /* allocate text character set structure */
966         t = (struct text *) mh_xcalloc(1, sizeof(*t));
967         ct->c_ctparams = (void *) t;
968
969         /* scan for charset parameter */
970         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
971                 if (!mh_strcasecmp(*ap, "charset"))
972                         break;
973
974         /* check if content specified a character set */
975         if (*ap) {
976                 /* store its name */
977                 ct->c_charset = getcpy(norm_charmap(*ep));
978                 /* match character set or set to CHARSET_UNKNOWN */
979                 for (kv = Charset; kv->kv_key; kv++) {
980                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
981                                 break;
982                         }
983                 }
984                 t->tx_charset = kv->kv_value;
985         } else {
986                 t->tx_charset = CHARSET_UNSPECIFIED;
987         }
988
989         return OK;
990 }
991
992
993 /*
994 ** MULTIPART
995 */
996
997 static int
998 InitMultiPart(CT ct)
999 {
1000         int inout;
1001         long last, pos;
1002         unsigned char *cp, *dp;
1003         char **ap, **ep;
1004         char *bp, buffer[BUFSIZ];
1005         struct multipart *m;
1006         struct k2v *kv;
1007         struct part *part, **next;
1008         CI ci = &ct->c_ctinfo;
1009         CT p;
1010         FILE *fp;
1011
1012         /*
1013         ** The encoding for multipart messages must be either
1014         ** 7bit, 8bit, or binary (per RFC2045).
1015         */
1016         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1017                 && ct->c_encoding != CE_BINARY) {
1018                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1019                 ct->c_encoding = CE_7BIT;
1020         }
1021
1022         /* match subtype */
1023         for (kv = SubMultiPart; kv->kv_key; kv++)
1024                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1025                         break;
1026         ct->c_subtype = kv->kv_value;
1027
1028         /*
1029         ** Check for "boundary" parameter, which is
1030         ** required for multipart messages.
1031         */
1032         bp = 0;
1033         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1034                 if (!mh_strcasecmp(*ap, "boundary")) {
1035                         bp = *ep;
1036                         break;
1037                 }
1038         }
1039
1040         /* complain if boundary parameter is missing */
1041         if (!*ap) {
1042                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1043                 return NOTOK;
1044         }
1045
1046         /* allocate primary structure for multipart info */
1047         m = (struct multipart *) mh_xcalloc(1, sizeof(*m));
1048         ct->c_ctparams = (void *) m;
1049
1050         /* check if boundary parameter contains only whitespace characters */
1051         for (cp = bp; isspace(*cp); cp++)
1052                 continue;
1053         if (!*cp) {
1054                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1055                 return NOTOK;
1056         }
1057
1058         /* remove trailing whitespace from boundary parameter */
1059         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1060                 if (!isspace(*dp))
1061                         break;
1062         *++dp = '\0';
1063
1064         /* record boundary separators */
1065         m->mp_start = concat(bp, "\n", NULL);
1066         m->mp_stop = concat(bp, "--\n", NULL);
1067
1068         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1069                 advise(ct->c_file, "unable to open for reading");
1070                 return NOTOK;
1071         }
1072
1073         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1074         last = ct->c_end;
1075         next = &m->mp_parts;
1076         part = NULL;
1077         inout = 1;
1078
1079         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1080                 if (pos > last)
1081                         break;
1082
1083                 pos += strlen(buffer);
1084                 if (buffer[0] != '-' || buffer[1] != '-')
1085                         continue;
1086                 if (inout) {
1087                         if (strcmp(buffer + 2, m->mp_start)!=0)
1088                                 continue;
1089 next_part:
1090                         part = (struct part *) mh_xcalloc(1, sizeof(*part));
1091                         *next = part;
1092                         next = &part->mp_next;
1093
1094                         if (!(p = get_content(fp, ct->c_file,
1095                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1096                                 ct->c_fp = NULL;
1097                                 return NOTOK;
1098                         }
1099                         p->c_fp = NULL;
1100                         part->mp_part = p;
1101                         pos = p->c_begin;
1102                         fseek(fp, pos, SEEK_SET);
1103                         inout = 0;
1104                 } else {
1105                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1106                                 inout = 1;
1107 end_part:
1108                                 p = part->mp_part;
1109                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1110                                 if (p->c_end < p->c_begin)
1111                                         p->c_begin = p->c_end;
1112                                 if (inout)
1113                                         goto next_part;
1114                                 goto last_part;
1115                         } else {
1116                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1117                                         goto end_part;
1118                         }
1119                 }
1120         }
1121
1122         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1123         if (!inout && part) {
1124                 p = part->mp_part;
1125                 p->c_end = ct->c_end;
1126
1127                 if (p->c_begin >= p->c_end) {
1128                         for (next = &m->mp_parts; *next != part;
1129                                 next = &((*next)->mp_next))
1130                                 continue;
1131                         *next = NULL;
1132                         free_content(p);
1133                         free((char *) part);
1134                 }
1135         }
1136
1137 last_part:
1138         /* reverse the order of the parts for multipart/alternative */
1139         if (ct->c_subtype == MULTI_ALTERNATE)
1140                 reverse_parts(ct);
1141
1142         /*
1143         ** label all subparts with part number, and
1144         ** then initialize the content of the subpart.
1145         */
1146         {
1147                 int partnum;
1148                 char *pp;
1149                 char partnam[BUFSIZ];
1150
1151                 if (ct->c_partno) {
1152                         snprintf(partnam, sizeof(partnam), "%s.",
1153                                         ct->c_partno);
1154                         pp = partnam + strlen(partnam);
1155                 } else {
1156                         pp = partnam;
1157                 }
1158
1159                 for (part = m->mp_parts, partnum = 1; part;
1160                         part = part->mp_next, partnum++) {
1161                         p = part->mp_part;
1162
1163                         sprintf(pp, "%d", partnum);
1164                         p->c_partno = getcpy(partnam);
1165
1166                         /* initialize the content of the subparts */
1167                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1168                                 fclose(ct->c_fp);
1169                                 ct->c_fp = NULL;
1170                                 return NOTOK;
1171                         }
1172                 }
1173         }
1174
1175         fclose(ct->c_fp);
1176         ct->c_fp = NULL;
1177         return OK;
1178 }
1179
1180
1181 /*
1182 ** reverse the order of the parts of a multipart
1183 */
1184
1185 static void
1186 reverse_parts(CT ct)
1187 {
1188         int i;
1189         struct multipart *m;
1190         struct part **base, **bmp, **next, *part;
1191
1192         m = (struct multipart *) ct->c_ctparams;
1193
1194         /* if only one part, just return */
1195         if (!m->mp_parts || !m->mp_parts->mp_next)
1196                 return;
1197
1198         /* count number of parts */
1199         i = 0;
1200         for (part = m->mp_parts; part; part = part->mp_next)
1201                 i++;
1202
1203         /* allocate array of pointers to the parts */
1204         base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base));
1205         bmp = base;
1206
1207         /* point at all the parts */
1208         for (part = m->mp_parts; part; part = part->mp_next)
1209                 *bmp++ = part;
1210         *bmp = NULL;
1211
1212         /* reverse the order of the parts */
1213         next = &m->mp_parts;
1214         for (bmp--; bmp >= base; bmp--) {
1215                 part = *bmp;
1216                 *next = part;
1217                 next = &part->mp_next;
1218         }
1219         *next = NULL;
1220
1221         /* free array of pointers */
1222         free((char *) base);
1223 }
1224
1225
1226 /*
1227 ** MESSAGE
1228 */
1229
1230 static int
1231 InitMessage(CT ct)
1232 {
1233         struct k2v *kv;
1234         CI ci = &ct->c_ctinfo;
1235
1236         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1237                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1238                 return NOTOK;
1239         }
1240
1241         /* check for missing subtype */
1242         if (!*ci->ci_subtype)
1243                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1244
1245         /* match subtype */
1246         for (kv = SubMessage; kv->kv_key; kv++)
1247                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1248                         break;
1249         ct->c_subtype = kv->kv_value;
1250
1251         switch (ct->c_subtype) {
1252         case MESSAGE_RFC822:
1253                 break;
1254
1255         case MESSAGE_PARTIAL:
1256                 {
1257                 char **ap, **ep;
1258                 struct partial *p;
1259
1260                 p = (struct partial *) mh_xcalloc(1, sizeof(*p));
1261                 ct->c_ctparams = (void *) p;
1262
1263                 /*
1264                 ** scan for parameters "id", "number",
1265                 ** and "total"
1266                 */
1267                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1268                         if (!mh_strcasecmp(*ap, "id")) {
1269                                 p->pm_partid = getcpy(*ep);
1270                                 continue;
1271                         }
1272                         if (!mh_strcasecmp(*ap, "number")) {
1273                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1274 invalid_param:
1275                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1276                                         return NOTOK;
1277                                 }
1278                                 continue;
1279                         }
1280                         if (!mh_strcasecmp(*ap, "total")) {
1281                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1282                                                 p->pm_maxno < 1)
1283                                         goto invalid_param;
1284                                 continue;
1285                         }
1286                 }
1287
1288                 if (!p->pm_partid || !p->pm_partno
1289                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1290                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1291                         return NOTOK;
1292                 }
1293                 }
1294                 break;
1295
1296         case MESSAGE_EXTERNAL:
1297                 {
1298                 CT p;
1299                 FILE *fp;
1300
1301                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1302                         advise(ct->c_file, "unable to open for reading");
1303                         return NOTOK;
1304                 }
1305
1306                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1307
1308                 if (!(p = get_content(fp, ct->c_file, 0))) {
1309                         ct->c_fp = NULL;
1310                         return NOTOK;
1311                 }
1312
1313                 p->c_fp = NULL;
1314                 p->c_end = p->c_begin;
1315
1316                 fclose(ct->c_fp);
1317                 ct->c_fp = NULL;
1318
1319                 switch (p->c_type) {
1320                 case CT_MULTIPART:
1321                         break;
1322
1323                 case CT_MESSAGE:
1324                         if (p->c_subtype != MESSAGE_RFC822)
1325                                 break;
1326                         /* else fall... */
1327                 default:
1328                         if (p->c_ctinitfnx)
1329                                 (*p->c_ctinitfnx) (p);
1330                         break;
1331                 }
1332                 }
1333                 break;
1334
1335         default:
1336                 break;
1337         }
1338
1339         return OK;
1340 }
1341
1342
1343 /*
1344 ** APPLICATION
1345 */
1346
1347 static int
1348 InitApplication(CT ct)
1349 {
1350         struct k2v *kv;
1351         CI ci = &ct->c_ctinfo;
1352
1353         /* match subtype */
1354         for (kv = SubApplication; kv->kv_key; kv++)
1355                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1356                         break;
1357         ct->c_subtype = kv->kv_value;
1358
1359         return OK;
1360 }
1361
1362
1363 /*
1364 ** TRANSFER ENCODINGS
1365 */
1366
1367 static int
1368 init_encoding(CT ct, OpenCEFunc openfnx)
1369 {
1370         CE ce;
1371
1372         ce = (CE) mh_xcalloc(1, sizeof(*ce));
1373
1374         ct->c_cefile     = ce;
1375         ct->c_ceopenfnx  = openfnx;
1376         ct->c_ceclosefnx = close_encoding;
1377         ct->c_cesizefnx  = size_encoding;
1378
1379         return OK;
1380 }
1381
1382
1383 void
1384 close_encoding(CT ct)
1385 {
1386         CE ce;
1387
1388         if (!(ce = ct->c_cefile))
1389                 return;
1390
1391         if (ce->ce_fp) {
1392                 fclose(ce->ce_fp);
1393                 ce->ce_fp = NULL;
1394         }
1395 }
1396
1397
1398 static unsigned long
1399 size_encoding(CT ct)
1400 {
1401         int fd;
1402         unsigned long size;
1403         char *file;
1404         CE ce;
1405         struct stat st;
1406
1407         if (!(ce = ct->c_cefile))
1408                 return (ct->c_end - ct->c_begin);
1409
1410         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1411                 return (long) st.st_size;
1412
1413         if (ce->ce_file) {
1414                 if (stat(ce->ce_file, &st) != NOTOK)
1415                         return (long) st.st_size;
1416                 else
1417                         return 0L;
1418         }
1419
1420         if (ct->c_encoding == CE_EXTERNAL)
1421                 return (ct->c_end - ct->c_begin);
1422
1423         file = NULL;
1424         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1425                 return (ct->c_end - ct->c_begin);
1426
1427         if (fstat(fd, &st) != NOTOK)
1428                 size = (long) st.st_size;
1429         else
1430                 size = 0L;
1431
1432         (*ct->c_ceclosefnx) (ct);
1433         return size;
1434 }
1435
1436
1437 /*
1438 ** BASE64
1439 */
1440
1441 static unsigned char b642nib[0x80] = {
1442         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1443         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1444         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1445         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1446         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1447         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1448         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1449         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1451         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1452         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1453         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1454         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1455         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1456         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1457         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1458 };
1459
1460
1461 static int
1462 InitBase64(CT ct)
1463 {
1464         return init_encoding(ct, openBase64);
1465 }
1466
1467
1468 static int
1469 openBase64(CT ct, char **file)
1470 {
1471         int bitno, cc;
1472         int fd, len, skip, own_ct_fp = 0;
1473         unsigned long bits;
1474         unsigned char value, *b, *b1, *b2, *b3;
1475         unsigned char *cp, *ep;
1476         char buffer[BUFSIZ];
1477         /* sbeck -- handle suffixes */
1478         CI ci;
1479         CE ce;
1480
1481         b  = (unsigned char *) &bits;
1482         b1 = &b[endian > 0 ? 1 : 2];
1483         b2 = &b[endian > 0 ? 2 : 1];
1484         b3 = &b[endian > 0 ? 3 : 0];
1485
1486         ce = ct->c_cefile;
1487         if (ce->ce_fp) {
1488                 fseek(ce->ce_fp, 0L, SEEK_SET);
1489                 goto ready_to_go;
1490         }
1491
1492         if (ce->ce_file) {
1493                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1494                         content_error(ce->ce_file, ct,
1495                                         "unable to fopen for reading");
1496                         return NOTOK;
1497                 }
1498                 goto ready_to_go;
1499         }
1500
1501         if (*file == NULL) {
1502                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1503                 ce->ce_unlink = 1;
1504         } else {
1505                 ce->ce_file = getcpy(*file);
1506                 ce->ce_unlink = 0;
1507         }
1508
1509         /* sbeck@cise.ufl.edu -- handle suffixes */
1510         ci = &ct->c_ctinfo;
1511         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1512                         invo_name, ci->ci_type, ci->ci_subtype);
1513         cp = context_find(buffer);
1514         if (cp == NULL || *cp == '\0') {
1515                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1516                                 ci->ci_type);
1517                 cp = context_find(buffer);
1518         }
1519         if (cp != NULL && *cp != '\0') {
1520                 if (ce->ce_unlink) {
1521                         /*
1522                         ** Temporary file already exists, so we rename to
1523                         ** version with extension.
1524                         */
1525                         char *file_org = strdup(ce->ce_file);
1526                         ce->ce_file = add(cp, ce->ce_file);
1527                         if (rename(file_org, ce->ce_file)) {
1528                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1529                                                 file_org);
1530                         }
1531                         free(file_org);
1532
1533                 } else {
1534                         ce->ce_file = add(cp, ce->ce_file);
1535                 }
1536         }
1537
1538         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1539                 content_error(ce->ce_file, ct,
1540                                 "unable to fopen for reading/writing");
1541                 return NOTOK;
1542         }
1543
1544         if ((len = ct->c_end - ct->c_begin) < 0)
1545                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1546
1547         if (!ct->c_fp) {
1548                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1549                         content_error(ct->c_file, ct,
1550                                         "unable to open for reading");
1551                         return NOTOK;
1552                 }
1553                 own_ct_fp = 1;
1554         }
1555
1556         bitno = 18;
1557         bits = 0L;
1558         skip = 0;
1559
1560         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1561         while (len > 0) {
1562                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1563                 case NOTOK:
1564                         content_error(ct->c_file, ct, "error reading from");
1565                         goto clean_up;
1566
1567                 case OK:
1568                         content_error(NULL, ct, "premature eof");
1569                         goto clean_up;
1570
1571                 default:
1572                         if (cc > len)
1573                                 cc = len;
1574                         len -= cc;
1575
1576                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1577                                 switch (*cp) {
1578                                 default:
1579                                         if (isspace(*cp))
1580                                                 break;
1581                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1582                                                 if (debugsw) {
1583                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1584                                                 }
1585                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1586                                                 continue;
1587                                         }
1588
1589                                         bits |= value << bitno;
1590 test_end:
1591                                         if ((bitno -= 6) < 0) {
1592                                                 putc((char) *b1, ce->ce_fp);
1593                                                 if (skip < 2) {
1594                                                         putc((char) *b2, ce->ce_fp);
1595                                                         if (skip < 1) {
1596                                                                 putc((char) *b3, ce->ce_fp);
1597                                                         }
1598                                                 }
1599
1600                                                 if (ferror(ce->ce_fp)) {
1601                                                         content_error(ce->ce_file, ct,
1602                                                                                    "error writing to");
1603                                                         goto clean_up;
1604                                                 }
1605                                                 bitno = 18, bits = 0L, skip = 0;
1606                                         }
1607                                         break;
1608
1609                                 case '=':
1610                                         if (++skip > 3)
1611                                                 goto self_delimiting;
1612                                         goto test_end;
1613                                 }
1614                         }
1615                 }
1616         }
1617
1618         if (bitno != 18) {
1619                 if (debugsw)
1620                         fprintf(stderr, "premature ending (bitno %d)\n",
1621                                         bitno);
1622
1623                 content_error(NULL, ct, "invalid BASE64 encoding");
1624                 goto clean_up;
1625         }
1626
1627 self_delimiting:
1628         fseek(ct->c_fp, 0L, SEEK_SET);
1629
1630         if (fflush(ce->ce_fp)) {
1631                 content_error(ce->ce_file, ct, "error writing to");
1632                 goto clean_up;
1633         }
1634
1635         fseek(ce->ce_fp, 0L, SEEK_SET);
1636
1637 ready_to_go:
1638         *file = ce->ce_file;
1639         if (own_ct_fp) {
1640                 fclose(ct->c_fp);
1641                 ct->c_fp = NULL;
1642         }
1643         return fileno(ce->ce_fp);
1644
1645 clean_up:
1646         free_encoding(ct, 0);
1647         if (own_ct_fp) {
1648                 fclose(ct->c_fp);
1649                 ct->c_fp = NULL;
1650         }
1651         return NOTOK;
1652 }
1653
1654
1655 /*
1656 ** QUOTED PRINTABLE
1657 */
1658
1659 static char hex2nib[0x80] = {
1660         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1661         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1662         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1663         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1664         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1667         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1669         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1676 };
1677
1678
1679 static int
1680 InitQuoted(CT ct)
1681 {
1682         return init_encoding(ct, openQuoted);
1683 }
1684
1685
1686 static int
1687 openQuoted(CT ct, char **file)
1688 {
1689         int cc, len, quoted, own_ct_fp = 0;
1690         unsigned char *cp, *ep;
1691         char buffer[BUFSIZ];
1692         unsigned char mask = 0;
1693         CE ce;
1694         /* sbeck -- handle suffixes */
1695         CI ci;
1696
1697         ce = ct->c_cefile;
1698         if (ce->ce_fp) {
1699                 fseek(ce->ce_fp, 0L, SEEK_SET);
1700                 goto ready_to_go;
1701         }
1702
1703         if (ce->ce_file) {
1704                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1705                         content_error(ce->ce_file, ct,
1706                                         "unable to fopen for reading");
1707                         return NOTOK;
1708                 }
1709                 goto ready_to_go;
1710         }
1711
1712         if (*file == NULL) {
1713                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1714                 ce->ce_unlink = 1;
1715         } else {
1716                 ce->ce_file = getcpy(*file);
1717                 ce->ce_unlink = 0;
1718         }
1719
1720         /* sbeck@cise.ufl.edu -- handle suffixes */
1721         ci = &ct->c_ctinfo;
1722         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1723                         invo_name, ci->ci_type, ci->ci_subtype);
1724         cp = context_find(buffer);
1725         if (cp == NULL || *cp == '\0') {
1726                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1727                                 ci->ci_type);
1728                 cp = context_find(buffer);
1729         }
1730         if (cp != NULL && *cp != '\0') {
1731                 if (ce->ce_unlink) {
1732                         /*
1733                         ** Temporary file already exists, so we rename to
1734                         ** version with extension.
1735                         */
1736                         char *file_org = strdup(ce->ce_file);
1737                         ce->ce_file = add(cp, ce->ce_file);
1738                         if (rename(file_org, ce->ce_file)) {
1739                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1740                                                 file_org);
1741                         }
1742                         free(file_org);
1743
1744                 } else {
1745                         ce->ce_file = add(cp, ce->ce_file);
1746                 }
1747         }
1748
1749         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1750                 content_error(ce->ce_file, ct,
1751                                 "unable to fopen for reading/writing");
1752                 return NOTOK;
1753         }
1754
1755         if ((len = ct->c_end - ct->c_begin) < 0)
1756                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1757
1758         if (!ct->c_fp) {
1759                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1760                         content_error(ct->c_file, ct,
1761                                         "unable to open for reading");
1762                         return NOTOK;
1763                 }
1764                 own_ct_fp = 1;
1765         }
1766
1767         quoted = 0;
1768
1769         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1770         while (len > 0) {
1771                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1772                         content_error(NULL, ct, "premature eof");
1773                         goto clean_up;
1774                 }
1775
1776                 if ((cc = strlen(buffer)) > len)
1777                         cc = len;
1778                 len -= cc;
1779
1780                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1781                         if (!isspace(*ep))
1782                                 break;
1783                 *++ep = '\n', ep++;
1784
1785                 for (; cp < ep; cp++) {
1786                         if (quoted > 0) {
1787                                 /* in an escape sequence */
1788                                 if (quoted == 1) {
1789                                         /* at byte 1 of an escape sequence */
1790                                         mask = hex2nib[*cp & 0x7f];
1791                                         /* next is byte 2 */
1792                                         quoted = 2;
1793                                 } else {
1794                                         /* at byte 2 of an escape sequence */
1795                                         mask <<= 4;
1796                                         mask |= hex2nib[*cp & 0x7f];
1797                                         putc(mask, ce->ce_fp);
1798                                         if (ferror(ce->ce_fp)) {
1799                                                 content_error(ce->ce_file, ct, "error writing to");
1800                                                 goto clean_up;
1801                                         }
1802                                         /*
1803                                         ** finished escape sequence; next may
1804                                         ** be literal or a new escape sequence
1805                                         */
1806                                         quoted = 0;
1807                                 }
1808                                 /* on to next byte */
1809                                 continue;
1810                         }
1811
1812                         /* not in an escape sequence */
1813                         if (*cp == '=') {
1814                                 /*
1815                                 ** starting an escape sequence,
1816                                 ** or invalid '='?
1817                                 */
1818                                 if (cp + 1 < ep && cp[1] == '\n') {
1819                                         /* "=\n" soft line break, eat the \n */
1820                                         cp++;
1821                                         continue;
1822                                 }
1823                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1824                                         /*
1825                                         ** We don't have 2 bytes left,
1826                                         ** so this is an invalid escape
1827                                         ** sequence; just show the raw bytes
1828                                         ** (below).
1829                                         */
1830                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1831                                         /*
1832                                         ** Next 2 bytes are hex digits,
1833                                         ** making this a valid escape
1834                                         ** sequence; let's decode it (above).
1835                                         */
1836                                         quoted = 1;
1837                                         continue;
1838                                 } else {
1839                                         /*
1840                                         ** One or both of the next 2 is
1841                                         ** out of range, making this an
1842                                         ** invalid escape sequence; just
1843                                         ** show the raw bytes (below).
1844                                         */
1845                                 }
1846                         }
1847
1848                         /* Just show the raw byte. */
1849                         putc(*cp, ce->ce_fp);
1850                         if (ferror(ce->ce_fp)) {
1851                                 content_error(ce->ce_file, ct,
1852                                                 "error writing to");
1853                                 goto clean_up;
1854                         }
1855                 }
1856         }
1857         if (quoted) {
1858                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1859                 goto clean_up;
1860         }
1861
1862         fseek(ct->c_fp, 0L, SEEK_SET);
1863
1864         if (fflush(ce->ce_fp)) {
1865                 content_error(ce->ce_file, ct, "error writing to");
1866                 goto clean_up;
1867         }
1868
1869         fseek(ce->ce_fp, 0L, SEEK_SET);
1870
1871 ready_to_go:
1872         *file = ce->ce_file;
1873         if (own_ct_fp) {
1874                 fclose(ct->c_fp);
1875                 ct->c_fp = NULL;
1876         }
1877         return fileno(ce->ce_fp);
1878
1879 clean_up:
1880         free_encoding(ct, 0);
1881         if (own_ct_fp) {
1882                 fclose(ct->c_fp);
1883                 ct->c_fp = NULL;
1884         }
1885         return NOTOK;
1886 }
1887
1888
1889 /*
1890 ** 7BIT
1891 */
1892
1893 static int
1894 Init7Bit(CT ct)
1895 {
1896         if (init_encoding(ct, open7Bit) == NOTOK)
1897                 return NOTOK;
1898
1899         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1900         return OK;
1901 }
1902
1903
1904 int
1905 open7Bit(CT ct, char **file)
1906 {
1907         int cc, fd, len, own_ct_fp = 0;
1908         char buffer[BUFSIZ];
1909         /* sbeck -- handle suffixes */
1910         char *cp;
1911         CI ci;
1912         CE ce;
1913
1914         ce = ct->c_cefile;
1915         if (ce->ce_fp) {
1916                 fseek(ce->ce_fp, 0L, SEEK_SET);
1917                 goto ready_to_go;
1918         }
1919
1920         if (ce->ce_file) {
1921                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1922                         content_error(ce->ce_file, ct,
1923                                         "unable to fopen for reading");
1924                         return NOTOK;
1925                 }
1926                 goto ready_to_go;
1927         }
1928
1929         if (*file == NULL) {
1930                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1931                 ce->ce_unlink = 1;
1932         } else {
1933                 ce->ce_file = getcpy(*file);
1934                 ce->ce_unlink = 0;
1935         }
1936
1937         /* sbeck@cise.ufl.edu -- handle suffixes */
1938         ci = &ct->c_ctinfo;
1939         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1940                         invo_name, ci->ci_type, ci->ci_subtype);
1941         cp = context_find(buffer);
1942         if (cp == NULL || *cp == '\0') {
1943                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1944                                 ci->ci_type);
1945                 cp = context_find(buffer);
1946         }
1947         if (cp != NULL && *cp != '\0') {
1948                 if (ce->ce_unlink) {
1949                         /*
1950                         ** Temporary file already exists, so we rename to
1951                         ** version with extension.
1952                         */
1953                         char *file_org = strdup(ce->ce_file);
1954                         ce->ce_file = add(cp, ce->ce_file);
1955                         if (rename(file_org, ce->ce_file)) {
1956                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1957                                                 file_org);
1958                         }
1959                         free(file_org);
1960
1961                 } else {
1962                         ce->ce_file = add(cp, ce->ce_file);
1963                 }
1964         }
1965
1966         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1967                 content_error(ce->ce_file, ct,
1968                                 "unable to fopen for reading/writing");
1969                 return NOTOK;
1970         }
1971
1972         if (ct->c_type == CT_MULTIPART) {
1973                 char **ap, **ep;
1974                 CI ci = &ct->c_ctinfo;
1975
1976                 len = 0;
1977                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1978                                 ci->ci_subtype);
1979                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1980                                 strlen(ci->ci_subtype);
1981                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1982                         putc(';', ce->ce_fp);
1983                         len++;
1984
1985                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1986                                         *ap, *ep);
1987
1988                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1989                                 fputs("\n\t", ce->ce_fp);
1990                                 len = 8;
1991                         } else {
1992                                 putc(' ', ce->ce_fp);
1993                                 len++;
1994                         }
1995                         fprintf(ce->ce_fp, "%s", buffer);
1996                         len += cc;
1997                 }
1998
1999                 if (ci->ci_comment) {
2000                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2001                                                 >= CPERLIN) {
2002                                 fputs("\n\t", ce->ce_fp);
2003                                 len = 8;
2004                         } else {
2005                                 putc(' ', ce->ce_fp);
2006                                 len++;
2007                         }
2008                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2009                         len += cc;
2010                 }
2011                 fprintf(ce->ce_fp, "\n");
2012                 if (ct->c_id)
2013                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2014                 if (ct->c_descr)
2015                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2016                 if (ct->c_dispo)
2017                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2018                 fprintf(ce->ce_fp, "\n");
2019         }
2020
2021         if ((len = ct->c_end - ct->c_begin) < 0)
2022                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2023
2024         if (!ct->c_fp) {
2025                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2026                         content_error(ct->c_file, ct,
2027                                         "unable to open for reading");
2028                         return NOTOK;
2029                 }
2030                 own_ct_fp = 1;
2031         }
2032
2033         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2034         while (len > 0)
2035                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2036                 case NOTOK:
2037                         content_error(ct->c_file, ct, "error reading from");
2038                         goto clean_up;
2039
2040                 case OK:
2041                         content_error(NULL, ct, "premature eof");
2042                         goto clean_up;
2043
2044                 default:
2045                         if (cc > len)
2046                                 cc = len;
2047                         len -= cc;
2048
2049                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2050                         if (ferror(ce->ce_fp)) {
2051                                 content_error(ce->ce_file, ct,
2052                                                 "error writing to");
2053                                 goto clean_up;
2054                         }
2055                 }
2056
2057         fseek(ct->c_fp, 0L, SEEK_SET);
2058
2059         if (fflush(ce->ce_fp)) {
2060                 content_error(ce->ce_file, ct, "error writing to");
2061                 goto clean_up;
2062         }
2063
2064         fseek(ce->ce_fp, 0L, SEEK_SET);
2065
2066 ready_to_go:
2067         *file = ce->ce_file;
2068         if (own_ct_fp) {
2069                 fclose(ct->c_fp);
2070                 ct->c_fp = NULL;
2071         }
2072         return fileno(ce->ce_fp);
2073
2074 clean_up:
2075         free_encoding(ct, 0);
2076         if (own_ct_fp) {
2077                 fclose(ct->c_fp);
2078                 ct->c_fp = NULL;
2079         }
2080         return NOTOK;
2081 }