b144c375a2cf43e4c9f65304d06ee893eb9048c6
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242                 adios(EX_OSERR, NULL, "out of memory");
243
244         ct->c_fp = in;
245         ct->c_file = getcpy(file);
246         ct->c_begin = ftell(ct->c_fp) + 1;
247
248         /*
249         ** Parse the header fields for this
250         ** content into a linked list.
251         */
252         for (compnum = 1, state = FLD2;;) {
253                 switch (state = m_getfld2(state, &f, in)) {
254                 case FLD2:
255                         compnum++;
256
257                         /* add the header data to the list */
258                         add_header(ct, getcpy(f.name), getcpy(f.value));
259
260                         ct->c_begin = ftell(in) + 1;
261                         continue;
262
263                 case BODY2:
264                         ct->c_begin = ftell(in) - strlen(f.value);
265                         break;
266
267                 case FILEEOF2:
268                         ct->c_begin = ftell(in);
269                         break;
270
271                 case LENERR2:
272                 case FMTERR2:
273                 case ERR2:
274                         adios(EX_DATAERR, NULL, "message format error in component #%d",
275                                         compnum);
276
277                 default:
278                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
279                 }
280                 break;
281         }
282
283         /*
284         ** Read the content headers.  We will parse the
285         ** MIME related header fields into their various
286         ** structures and set internal flags related to
287         ** content type/subtype, etc.
288         */
289
290         hp = ct->c_first_hf;  /* start at first header field */
291         while (hp) {
292                 /* Get MIME-Version field */
293                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
294                         int ucmp;
295                         char c;
296                         unsigned char *cp, *dp;
297
298                         if (ct->c_vrsn) {
299                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
300                                 goto next_header;
301                         }
302                         ct->c_vrsn = getcpy(hp->value);
303
304                         /* Now, cleanup this field */
305                         cp = ct->c_vrsn;
306
307                         while (isspace(*cp))
308                                 cp++;
309                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
310                                 *dp++ = ' ';
311                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
312                                 if (!isspace(*dp))
313                                         break;
314                         *++dp = '\0';
315                         if (debugsw)
316                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
317
318                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
319                                 goto out;
320
321                         for (dp = cp; istoken(*dp); dp++)
322                                 continue;
323                         c = *dp;
324                         *dp = '\0';
325                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
326                         *dp = c;
327                         if (!ucmp) {
328                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
329                         }
330
331                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
332                         /* Get Content-Type field */
333                         struct str2init *s2i;
334                         CI ci = &ct->c_ctinfo;
335
336                         /* Check if we've already seen a Content-Type header */
337                         if (ct->c_ctline) {
338                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
339                                 goto next_header;
340                         }
341
342                         /* Parse the Content-Type field */
343                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
344                                 goto out;
345
346                         /*
347                         ** Set the Init function and the internal
348                         ** flag for this content type.
349                         */
350                         for (s2i = str2cts; s2i->si_key; s2i++)
351                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
352                                         break;
353                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
354                                 s2i++;
355                         ct->c_type = s2i->si_val;
356                         ct->c_ctinitfnx = s2i->si_init;
357
358                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
359                         /* Get Content-Transfer-Encoding field */
360                         char c;
361                         unsigned char *cp, *dp;
362                         struct str2init *s2i;
363
364                         /*
365                         ** Check if we've already seen the
366                         ** Content-Transfer-Encoding field
367                         */
368                         if (ct->c_celine) {
369                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
370                                 goto next_header;
371                         }
372
373                         /* get copy of this field */
374                         ct->c_celine = cp = getcpy(hp->value);
375
376                         while (isspace(*cp))
377                                 cp++;
378                         for (dp = cp; istoken(*dp); dp++)
379                                 continue;
380                         c = *dp;
381                         *dp = '\0';
382
383                         /*
384                         ** Find the internal flag and Init function
385                         ** for this transfer encoding.
386                         */
387                         for (s2i = str2ces; s2i->si_key; s2i++)
388                                 if (!mh_strcasecmp(cp, s2i->si_key))
389                                         break;
390                         if (!s2i->si_key && !uprf(cp, "X-"))
391                                 s2i++;
392                         *dp = c;
393                         ct->c_encoding = s2i->si_val;
394
395                         /* Call the Init function for this encoding */
396                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
397                                 goto out;
398
399                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
400                         /* Get Content-ID field */
401                         ct->c_id = add(hp->value, ct->c_id);
402
403                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
404                         /* Get Content-Description field */
405                         ct->c_descr = add(hp->value, ct->c_descr);
406
407                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
408                         /* Get Content-Disposition field */
409                         ct->c_dispo = add(hp->value, ct->c_dispo);
410                 }
411
412 next_header:
413                 hp = hp->next;  /* next header field */
414         }
415
416         /*
417         ** Check if we saw a Content-Type field.
418         ** If not, then assign a default value for
419         ** it, and the Init function.
420         */
421         if (!ct->c_ctline) {
422                 /*
423                 ** If we are inside a multipart/digest message,
424                 ** so default type is message/rfc822
425                 */
426                 if (toplevel < 0) {
427                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
428                                 goto out;
429                         ct->c_type = CT_MESSAGE;
430                         ct->c_ctinitfnx = InitMessage;
431                 } else {
432                         /*
433                         ** Else default type is text/plain
434                         */
435                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
436                                 goto out;
437                         ct->c_type = CT_TEXT;
438                         ct->c_ctinitfnx = InitText;
439                 }
440         }
441
442         /* Use default Transfer-Encoding, if necessary */
443         if (!ct->c_celine) {
444                 ct->c_encoding = CE_7BIT;
445                 Init7Bit(ct);
446         }
447
448         return ct;
449
450 out:
451         free_content(ct);
452         return NULL;
453 }
454
455
456 /*
457 ** small routine to add header field to list
458 */
459
460 int
461 add_header(CT ct, char *name, char *value)
462 {
463         HF hp;
464
465         /* allocate header field structure */
466         hp = mh_xmalloc(sizeof(*hp));
467
468         /* link data into header structure */
469         hp->name = name;
470         hp->value = value;
471         hp->next = NULL;
472
473         /* link header structure into the list */
474         if (ct->c_first_hf == NULL) {
475                 ct->c_first_hf = hp;  /* this is the first */
476                 ct->c_last_hf = hp;
477         } else {
478                 ct->c_last_hf->next = hp;  /* add it to the end */
479                 ct->c_last_hf = hp;
480         }
481
482         return 0;
483 }
484
485
486 /*
487 ** Make sure that buf contains at least one appearance of name,
488 ** followed by =.  If not, insert both name and value, just after
489 ** first semicolon, if any.  Note that name should not contain a
490 ** trailing =.  And quotes will be added around the value.  Typical
491 ** usage:  make sure that a Content-Disposition header contains
492 ** filename="foo".  If it doesn't and value does, use value from
493 ** that.
494 */
495 static char *
496 incl_name_value(unsigned char *buf, char *name, char *value) {
497         char *newbuf = buf;
498
499         /* Assume that name is non-null. */
500         if (buf && value) {
501                 char *name_plus_equal = concat(name, "=", NULL);
502
503                 if (!strstr(buf, name_plus_equal)) {
504                         char *insertion;
505                         unsigned char *cp;
506                         char *prefix, *suffix;
507
508                         /* Trim trailing space, esp. newline. */
509                         for (cp = &buf[strlen(buf) - 1];
510                                          cp >= buf && isspace(*cp); --cp) {
511                                 *cp = '\0';
512                         }
513
514                         insertion = concat("; ", name, "=", "\"", value, "\"",
515                                         NULL);
516
517                         /*
518                         ** Insert at first semicolon, if any.
519                         ** If none, append to end.
520                         */
521                         prefix = getcpy(buf);
522                         if ((cp = strchr(prefix, ';'))) {
523                                 suffix = concat(cp, NULL);
524                                 *cp = '\0';
525                                 newbuf = concat(prefix, insertion, suffix,
526                                                 "\n", NULL);
527                                 free(suffix);
528                         } else {
529                                 /* Append to end. */
530                                 newbuf = concat(buf, insertion, "\n", NULL);
531                         }
532
533                         free(prefix);
534                         free(insertion);
535                         free(buf);
536                 }
537
538                 free(name_plus_equal);
539         }
540
541         return newbuf;
542 }
543
544 /*
545 ** Extract just name_suffix="foo", if any, from value.  If there isn't
546 ** one, return the entire value.  Note that, for example, a name_suffix
547 ** of name will match filename="foo", and return foo.
548 */
549 static char *
550 extract_name_value(char *name_suffix, char *value) {
551         char *extracted_name_value = value;
552         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
553         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
554         char *cp;
555
556         free(name_suffix_plus_quote);
557         if (name_suffix_equals) {
558                 char *name_suffix_begin;
559
560                 /* Find first \". */
561                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
562                         ;
563                 name_suffix_begin = ++cp;
564                 /* Find second \". */
565                 for (; *cp != '"'; ++cp)
566                         ;
567
568                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
569                 memcpy(extracted_name_value, name_suffix_begin,
570                                 cp - name_suffix_begin);
571                 extracted_name_value[cp - name_suffix_begin] = '\0';
572         }
573
574         return extracted_name_value;
575 }
576
577 /*
578 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
579 ** directives.  Fills in the information of the CTinfo structure.
580 */
581 int
582 get_ctinfo(unsigned char *cp, CT ct, int magic)
583 {
584         int i;
585         unsigned char *dp;
586         char **ap, **ep;
587         char c;
588         CI ci;
589
590         ci = &ct->c_ctinfo;
591         i = strlen(invo_name) + 2;
592
593         /* store copy of Content-Type line */
594         cp = ct->c_ctline = getcpy(cp);
595
596         while (isspace(*cp))  /* trim leading spaces */
597                 cp++;
598
599         /* change newlines to spaces */
600         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
601                 *dp++ = ' ';
602
603         /* trim trailing spaces */
604         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
605                 if (!isspace(*dp))
606                         break;
607         *++dp = '\0';
608
609         if (debugsw)
610                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
611
612         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
613                 return NOTOK;
614
615         for (dp = cp; istoken(*dp); dp++)
616                 continue;
617         c = *dp, *dp = '\0';
618         ci->ci_type = getcpy(cp);  /* store content type */
619         *dp = c, cp = dp;
620
621         if (!*ci->ci_type) {
622                 advise(NULL, "invalid %s: field in message %s (empty type)",
623                                 TYPE_FIELD, ct->c_file);
624                 return NOTOK;
625         }
626
627         /* down case the content type string */
628         for (dp = ci->ci_type; *dp; dp++)
629                 if (isalpha(*dp) && isupper(*dp))
630                         *dp = tolower(*dp);
631
632         while (isspace(*cp))
633                 cp++;
634
635         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
636                 return NOTOK;
637
638         if (*cp != '/') {
639                 if (!magic)
640                         ci->ci_subtype = getcpy("");
641                 goto magic_skip;
642         }
643
644         cp++;
645         while (isspace(*cp))
646                 cp++;
647
648         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
649                 return NOTOK;
650
651         for (dp = cp; istoken(*dp); dp++)
652                 continue;
653         c = *dp, *dp = '\0';
654         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
655         *dp = c, cp = dp;
656
657         if (!*ci->ci_subtype) {
658                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
659                 return NOTOK;
660         }
661
662         /* down case the content subtype string */
663         for (dp = ci->ci_subtype; *dp; dp++)
664                 if (isalpha(*dp) && isupper(*dp))
665                         *dp = tolower(*dp);
666
667 magic_skip:
668         while (isspace(*cp))
669                 cp++;
670
671         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
672                 return NOTOK;
673
674         /*
675         ** Parse attribute/value pairs given with Content-Type
676         */
677         ep = (ap = ci->ci_attrs) + NPARMS;
678         while (*cp == ';') {
679                 char *vp;
680                 unsigned char *up;
681
682                 if (ap >= ep) {
683                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
684                         return NOTOK;
685                 }
686
687                 cp++;
688                 while (isspace(*cp))
689                         cp++;
690
691                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
692                         return NOTOK;
693
694                 if (*cp == 0) {
695                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
696                         return OK;
697                 }
698
699                 /* down case the attribute name */
700                 for (dp = cp; istoken(*dp); dp++)
701                         if (isalpha(*dp) && isupper(*dp))
702                                 *dp = tolower(*dp);
703
704                 for (up = dp; isspace(*dp);)
705                         dp++;
706                 if (dp == cp || *dp != '=') {
707                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
708                         return NOTOK;
709                 }
710
711                 vp = (*ap = getcpy(cp)) + (up - cp);
712                 *vp = '\0';
713                 for (dp++; isspace(*dp);)
714                         dp++;
715
716                 /* now add the attribute value */
717                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
718
719                 if (*dp == '"') {
720                         for (cp = ++dp, dp = vp;;) {
721                                 switch (c = *cp++) {
722                                 case '\0':
723 bad_quote:
724                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
725                                         return NOTOK;
726
727                                 case '\\':
728                                         *dp++ = c;
729                                         if ((c = *cp++) == '\0')
730                                                 goto bad_quote;
731                                         /* else fall... */
732
733                                 default:
734                                         *dp++ = c;
735                                         continue;
736
737                                 case '"':
738                                         *dp = '\0';
739                                         break;
740                                 }
741                                 break;
742                         }
743                 } else {
744                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
745                                 continue;
746                         *dp = '\0';
747                 }
748                 if (!*vp) {
749                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
750                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
751                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
752                         continue;
753                 }
754                 ap++;
755
756                 while (isspace(*cp))
757                         cp++;
758
759                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
760                         return NOTOK;
761         }
762
763         /*
764         ** Get any <Content-Id> given in buffer
765         */
766         if (magic && *cp == '<') {
767                 if (ct->c_id) {
768                         free(ct->c_id);
769                         ct->c_id = NULL;
770                 }
771                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
772                         advise(NULL, "invalid ID in message %s", ct->c_file);
773                         return NOTOK;
774                 }
775                 c = *dp;
776                 *dp = '\0';
777                 if (*ct->c_id)
778                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
779                 else
780                         ct->c_id = NULL;
781                 *dp++ = c;
782                 cp = dp;
783
784                 while (isspace(*cp))
785                         cp++;
786         }
787
788         /*
789         ** Get any [Content-Description] given in buffer.
790         */
791         if (magic && *cp == '[') {
792                 ct->c_descr = ++cp;
793                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
794                         if (*dp == ']')
795                                 break;
796                 if (dp < cp) {
797                         advise(NULL, "invalid description in message %s",
798                                         ct->c_file);
799                         ct->c_descr = NULL;
800                         return NOTOK;
801                 }
802
803                 c = *dp;
804                 *dp = '\0';
805                 if (*ct->c_descr)
806                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
807                 else
808                         ct->c_descr = NULL;
809                 *dp++ = c;
810                 cp = dp;
811
812                 while (isspace(*cp))
813                         cp++;
814         }
815
816         /*
817         ** Get any {Content-Disposition} given in buffer.
818         */
819         if (magic && *cp == '{') {
820                 ct->c_dispo = ++cp;
821                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
822                         if (*dp == '}')
823                                 break;
824                 if (dp < cp) {
825                         advise(NULL, "invalid disposition in message %s",
826                                         ct->c_file);
827                         ct->c_dispo = NULL;
828                         return NOTOK;
829                 }
830
831                 c = *dp;
832                 *dp = '\0';
833                 if (*ct->c_dispo)
834                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
835                 else
836                         ct->c_dispo = NULL;
837                 *dp++ = c;
838                 cp = dp;
839
840                 while (isspace(*cp))
841                         cp++;
842         }
843
844         /*
845         ** Check if anything is left over
846         */
847         if (*cp) {
848                 if (magic) {
849                         ci->ci_magic = getcpy(cp);
850
851                         /*
852                         ** If there is a Content-Disposition header and
853                         ** it doesn't have a *filename=, extract it from
854                         ** the magic contents.  The mhbasename call skips
855                         ** any leading directory components.
856                         */
857                         if (ct->c_dispo)
858                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
859                         } else
860                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
861         }
862
863         return OK;
864 }
865
866
867 static int
868 get_comment(CT ct, unsigned char **ap, int istype)
869 {
870         int i;
871         char *bp;
872         unsigned char *cp;
873         char c, buffer[BUFSIZ], *dp;
874         CI ci;
875
876         ci = &ct->c_ctinfo;
877         cp = *ap;
878         bp = buffer;
879         cp++;
880
881         for (i = 0;;) {
882                 switch (c = *cp++) {
883                 case '\0':
884 invalid:
885                 advise(NULL, "invalid comment in message %s's %s: field",
886                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
887                 return NOTOK;
888
889                 case '\\':
890                         *bp++ = c;
891                         if ((c = *cp++) == '\0')
892                                 goto invalid;
893                         *bp++ = c;
894                         continue;
895
896                 case '(':
897                         i++;
898                         /* and fall... */
899                 default:
900                         *bp++ = c;
901                         continue;
902
903                 case ')':
904                         if (--i < 0)
905                                 break;
906                         *bp++ = c;
907                         continue;
908                 }
909                 break;
910         }
911         *bp = '\0';
912
913         if (istype) {
914                 if ((dp = ci->ci_comment)) {
915                         ci->ci_comment = concat(dp, " ", buffer, NULL);
916                         free(dp);
917                 } else {
918                         ci->ci_comment = getcpy(buffer);
919                 }
920         }
921
922         while (isspace(*cp))
923                 cp++;
924
925         *ap = cp;
926         return OK;
927 }
928
929
930 /*
931 ** CONTENTS
932 **
933 ** Handles content types audio, image, and video.
934 ** There's not much to do right here.
935 */
936
937 static int
938 InitGeneric(CT ct)
939 {
940         return OK;  /* not much to do here */
941 }
942
943
944 /*
945 ** TEXT
946 */
947
948 static int
949 InitText(CT ct)
950 {
951         char **ap, **ep;
952         struct k2v *kv;
953         struct text *t;
954         CI ci = &ct->c_ctinfo;
955
956         /* check for missing subtype */
957         if (!*ci->ci_subtype)
958                 ci->ci_subtype = add("plain", ci->ci_subtype);
959
960         /* match subtype */
961         for (kv = SubText; kv->kv_key; kv++)
962                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
963                         break;
964         ct->c_subtype = kv->kv_value;
965
966         /* allocate text character set structure */
967         if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
968                 adios(EX_OSERR, NULL, "out of memory");
969         ct->c_ctparams = (void *) t;
970
971         /* scan for charset parameter */
972         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
973                 if (!mh_strcasecmp(*ap, "charset"))
974                         break;
975
976         /* check if content specified a character set */
977         if (*ap) {
978                 /* store its name */
979                 ct->c_charset = getcpy(norm_charmap(*ep));
980                 /* match character set or set to CHARSET_UNKNOWN */
981                 for (kv = Charset; kv->kv_key; kv++) {
982                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
983                                 break;
984                         }
985                 }
986                 t->tx_charset = kv->kv_value;
987         } else {
988                 t->tx_charset = CHARSET_UNSPECIFIED;
989         }
990
991         return OK;
992 }
993
994
995 /*
996 ** MULTIPART
997 */
998
999 static int
1000 InitMultiPart(CT ct)
1001 {
1002         int inout;
1003         long last, pos;
1004         unsigned char *cp, *dp;
1005         char **ap, **ep;
1006         char *bp, buffer[BUFSIZ];
1007         struct multipart *m;
1008         struct k2v *kv;
1009         struct part *part, **next;
1010         CI ci = &ct->c_ctinfo;
1011         CT p;
1012         FILE *fp;
1013
1014         /*
1015         ** The encoding for multipart messages must be either
1016         ** 7bit, 8bit, or binary (per RFC2045).
1017         */
1018         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1019                 && ct->c_encoding != CE_BINARY) {
1020                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1021                 ct->c_encoding = CE_7BIT;
1022         }
1023
1024         /* match subtype */
1025         for (kv = SubMultiPart; kv->kv_key; kv++)
1026                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1027                         break;
1028         ct->c_subtype = kv->kv_value;
1029
1030         /*
1031         ** Check for "boundary" parameter, which is
1032         ** required for multipart messages.
1033         */
1034         bp = 0;
1035         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1036                 if (!mh_strcasecmp(*ap, "boundary")) {
1037                         bp = *ep;
1038                         break;
1039                 }
1040         }
1041
1042         /* complain if boundary parameter is missing */
1043         if (!*ap) {
1044                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1045                 return NOTOK;
1046         }
1047
1048         /* allocate primary structure for multipart info */
1049         if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1050                 adios(EX_OSERR, NULL, "out of memory");
1051         ct->c_ctparams = (void *) m;
1052
1053         /* check if boundary parameter contains only whitespace characters */
1054         for (cp = bp; isspace(*cp); cp++)
1055                 continue;
1056         if (!*cp) {
1057                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1058                 return NOTOK;
1059         }
1060
1061         /* remove trailing whitespace from boundary parameter */
1062         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1063                 if (!isspace(*dp))
1064                         break;
1065         *++dp = '\0';
1066
1067         /* record boundary separators */
1068         m->mp_start = concat(bp, "\n", NULL);
1069         m->mp_stop = concat(bp, "--\n", NULL);
1070
1071         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1072                 advise(ct->c_file, "unable to open for reading");
1073                 return NOTOK;
1074         }
1075
1076         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1077         last = ct->c_end;
1078         next = &m->mp_parts;
1079         part = NULL;
1080         inout = 1;
1081
1082         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1083                 if (pos > last)
1084                         break;
1085
1086                 pos += strlen(buffer);
1087                 if (buffer[0] != '-' || buffer[1] != '-')
1088                         continue;
1089                 if (inout) {
1090                         if (strcmp(buffer + 2, m->mp_start)!=0)
1091                                 continue;
1092 next_part:
1093                         if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1094                                         == NULL)
1095                                 adios(EX_OSERR, NULL, "out of memory");
1096                         *next = part;
1097                         next = &part->mp_next;
1098
1099                         if (!(p = get_content(fp, ct->c_file,
1100                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1101                                 ct->c_fp = NULL;
1102                                 return NOTOK;
1103                         }
1104                         p->c_fp = NULL;
1105                         part->mp_part = p;
1106                         pos = p->c_begin;
1107                         fseek(fp, pos, SEEK_SET);
1108                         inout = 0;
1109                 } else {
1110                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1111                                 inout = 1;
1112 end_part:
1113                                 p = part->mp_part;
1114                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1115                                 if (p->c_end < p->c_begin)
1116                                         p->c_begin = p->c_end;
1117                                 if (inout)
1118                                         goto next_part;
1119                                 goto last_part;
1120                         } else {
1121                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1122                                         goto end_part;
1123                         }
1124                 }
1125         }
1126
1127         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1128         if (!inout && part) {
1129                 p = part->mp_part;
1130                 p->c_end = ct->c_end;
1131
1132                 if (p->c_begin >= p->c_end) {
1133                         for (next = &m->mp_parts; *next != part;
1134                                 next = &((*next)->mp_next))
1135                                 continue;
1136                         *next = NULL;
1137                         free_content(p);
1138                         free((char *) part);
1139                 }
1140         }
1141
1142 last_part:
1143         /* reverse the order of the parts for multipart/alternative */
1144         if (ct->c_subtype == MULTI_ALTERNATE)
1145                 reverse_parts(ct);
1146
1147         /*
1148         ** label all subparts with part number, and
1149         ** then initialize the content of the subpart.
1150         */
1151         {
1152                 int partnum;
1153                 char *pp;
1154                 char partnam[BUFSIZ];
1155
1156                 if (ct->c_partno) {
1157                         snprintf(partnam, sizeof(partnam), "%s.",
1158                                         ct->c_partno);
1159                         pp = partnam + strlen(partnam);
1160                 } else {
1161                         pp = partnam;
1162                 }
1163
1164                 for (part = m->mp_parts, partnum = 1; part;
1165                         part = part->mp_next, partnum++) {
1166                         p = part->mp_part;
1167
1168                         sprintf(pp, "%d", partnum);
1169                         p->c_partno = getcpy(partnam);
1170
1171                         /* initialize the content of the subparts */
1172                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1173                                 fclose(ct->c_fp);
1174                                 ct->c_fp = NULL;
1175                                 return NOTOK;
1176                         }
1177                 }
1178         }
1179
1180         fclose(ct->c_fp);
1181         ct->c_fp = NULL;
1182         return OK;
1183 }
1184
1185
1186 /*
1187 ** reverse the order of the parts of a multipart
1188 */
1189
1190 static void
1191 reverse_parts(CT ct)
1192 {
1193         int i;
1194         struct multipart *m;
1195         struct part **base, **bmp, **next, *part;
1196
1197         m = (struct multipart *) ct->c_ctparams;
1198
1199         /* if only one part, just return */
1200         if (!m->mp_parts || !m->mp_parts->mp_next)
1201                 return;
1202
1203         /* count number of parts */
1204         i = 0;
1205         for (part = m->mp_parts; part; part = part->mp_next)
1206                 i++;
1207
1208         /* allocate array of pointers to the parts */
1209         if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1210                 adios(EX_OSERR, NULL, "out of memory");
1211         bmp = base;
1212
1213         /* point at all the parts */
1214         for (part = m->mp_parts; part; part = part->mp_next)
1215                 *bmp++ = part;
1216         *bmp = NULL;
1217
1218         /* reverse the order of the parts */
1219         next = &m->mp_parts;
1220         for (bmp--; bmp >= base; bmp--) {
1221                 part = *bmp;
1222                 *next = part;
1223                 next = &part->mp_next;
1224         }
1225         *next = NULL;
1226
1227         /* free array of pointers */
1228         free((char *) base);
1229 }
1230
1231
1232 /*
1233 ** MESSAGE
1234 */
1235
1236 static int
1237 InitMessage(CT ct)
1238 {
1239         struct k2v *kv;
1240         CI ci = &ct->c_ctinfo;
1241
1242         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1243                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1244                 return NOTOK;
1245         }
1246
1247         /* check for missing subtype */
1248         if (!*ci->ci_subtype)
1249                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1250
1251         /* match subtype */
1252         for (kv = SubMessage; kv->kv_key; kv++)
1253                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1254                         break;
1255         ct->c_subtype = kv->kv_value;
1256
1257         switch (ct->c_subtype) {
1258         case MESSAGE_RFC822:
1259                 break;
1260
1261         case MESSAGE_PARTIAL:
1262                 {
1263                 char **ap, **ep;
1264                 struct partial *p;
1265
1266                 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1267                 adios(EX_OSERR, NULL, "out of memory");
1268                 ct->c_ctparams = (void *) p;
1269
1270                 /*
1271                 ** scan for parameters "id", "number",
1272                 ** and "total"
1273                 */
1274                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1275                         if (!mh_strcasecmp(*ap, "id")) {
1276                                 p->pm_partid = getcpy(*ep);
1277                                 continue;
1278                         }
1279                         if (!mh_strcasecmp(*ap, "number")) {
1280                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1281 invalid_param:
1282                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1283                                         return NOTOK;
1284                                 }
1285                                 continue;
1286                         }
1287                         if (!mh_strcasecmp(*ap, "total")) {
1288                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1289                                                 p->pm_maxno < 1)
1290                                         goto invalid_param;
1291                                 continue;
1292                         }
1293                 }
1294
1295                 if (!p->pm_partid || !p->pm_partno
1296                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1297                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1298                         return NOTOK;
1299                 }
1300                 }
1301                 break;
1302
1303         case MESSAGE_EXTERNAL:
1304                 {
1305                 CT p;
1306                 FILE *fp;
1307
1308                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1309                         advise(ct->c_file, "unable to open for reading");
1310                         return NOTOK;
1311                 }
1312
1313                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1314
1315                 if (!(p = get_content(fp, ct->c_file, 0))) {
1316                         ct->c_fp = NULL;
1317                         return NOTOK;
1318                 }
1319
1320                 p->c_fp = NULL;
1321                 p->c_end = p->c_begin;
1322
1323                 fclose(ct->c_fp);
1324                 ct->c_fp = NULL;
1325
1326                 switch (p->c_type) {
1327                 case CT_MULTIPART:
1328                         break;
1329
1330                 case CT_MESSAGE:
1331                         if (p->c_subtype != MESSAGE_RFC822)
1332                                 break;
1333                         /* else fall... */
1334                 default:
1335                         if (p->c_ctinitfnx)
1336                                 (*p->c_ctinitfnx) (p);
1337                         break;
1338                 }
1339                 }
1340                 break;
1341
1342         default:
1343                 break;
1344         }
1345
1346         return OK;
1347 }
1348
1349
1350 /*
1351 ** APPLICATION
1352 */
1353
1354 static int
1355 InitApplication(CT ct)
1356 {
1357         struct k2v *kv;
1358         CI ci = &ct->c_ctinfo;
1359
1360         /* match subtype */
1361         for (kv = SubApplication; kv->kv_key; kv++)
1362                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1363                         break;
1364         ct->c_subtype = kv->kv_value;
1365
1366         return OK;
1367 }
1368
1369
1370 /*
1371 ** TRANSFER ENCODINGS
1372 */
1373
1374 static int
1375 init_encoding(CT ct, OpenCEFunc openfnx)
1376 {
1377         CE ce;
1378
1379         if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1380                 adios(EX_OSERR, NULL, "out of memory");
1381
1382         ct->c_cefile     = ce;
1383         ct->c_ceopenfnx  = openfnx;
1384         ct->c_ceclosefnx = close_encoding;
1385         ct->c_cesizefnx  = size_encoding;
1386
1387         return OK;
1388 }
1389
1390
1391 void
1392 close_encoding(CT ct)
1393 {
1394         CE ce;
1395
1396         if (!(ce = ct->c_cefile))
1397                 return;
1398
1399         if (ce->ce_fp) {
1400                 fclose(ce->ce_fp);
1401                 ce->ce_fp = NULL;
1402         }
1403 }
1404
1405
1406 static unsigned long
1407 size_encoding(CT ct)
1408 {
1409         int fd;
1410         unsigned long size;
1411         char *file;
1412         CE ce;
1413         struct stat st;
1414
1415         if (!(ce = ct->c_cefile))
1416                 return (ct->c_end - ct->c_begin);
1417
1418         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1419                 return (long) st.st_size;
1420
1421         if (ce->ce_file) {
1422                 if (stat(ce->ce_file, &st) != NOTOK)
1423                         return (long) st.st_size;
1424                 else
1425                         return 0L;
1426         }
1427
1428         if (ct->c_encoding == CE_EXTERNAL)
1429                 return (ct->c_end - ct->c_begin);
1430
1431         file = NULL;
1432         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1433                 return (ct->c_end - ct->c_begin);
1434
1435         if (fstat(fd, &st) != NOTOK)
1436                 size = (long) st.st_size;
1437         else
1438                 size = 0L;
1439
1440         (*ct->c_ceclosefnx) (ct);
1441         return size;
1442 }
1443
1444
1445 /*
1446 ** BASE64
1447 */
1448
1449 static unsigned char b642nib[0x80] = {
1450         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1452         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1453         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1454         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1456         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1457         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1459         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1460         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1461         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1462         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1463         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1464         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1465         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1466 };
1467
1468
1469 static int
1470 InitBase64(CT ct)
1471 {
1472         return init_encoding(ct, openBase64);
1473 }
1474
1475
1476 static int
1477 openBase64(CT ct, char **file)
1478 {
1479         int bitno, cc;
1480         int fd, len, skip, own_ct_fp = 0;
1481         unsigned long bits;
1482         unsigned char value, *b, *b1, *b2, *b3;
1483         unsigned char *cp, *ep;
1484         char buffer[BUFSIZ];
1485         /* sbeck -- handle suffixes */
1486         CI ci;
1487         CE ce;
1488
1489         b  = (unsigned char *) &bits;
1490         b1 = &b[endian > 0 ? 1 : 2];
1491         b2 = &b[endian > 0 ? 2 : 1];
1492         b3 = &b[endian > 0 ? 3 : 0];
1493
1494         ce = ct->c_cefile;
1495         if (ce->ce_fp) {
1496                 fseek(ce->ce_fp, 0L, SEEK_SET);
1497                 goto ready_to_go;
1498         }
1499
1500         if (ce->ce_file) {
1501                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1502                         content_error(ce->ce_file, ct,
1503                                         "unable to fopen for reading");
1504                         return NOTOK;
1505                 }
1506                 goto ready_to_go;
1507         }
1508
1509         if (*file == NULL) {
1510                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1511                 ce->ce_unlink = 1;
1512         } else {
1513                 ce->ce_file = getcpy(*file);
1514                 ce->ce_unlink = 0;
1515         }
1516
1517         /* sbeck@cise.ufl.edu -- handle suffixes */
1518         ci = &ct->c_ctinfo;
1519         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1520                         invo_name, ci->ci_type, ci->ci_subtype);
1521         cp = context_find(buffer);
1522         if (cp == NULL || *cp == '\0') {
1523                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1524                                 ci->ci_type);
1525                 cp = context_find(buffer);
1526         }
1527         if (cp != NULL && *cp != '\0') {
1528                 if (ce->ce_unlink) {
1529                         /*
1530                         ** Temporary file already exists, so we rename to
1531                         ** version with extension.
1532                         */
1533                         char *file_org = strdup(ce->ce_file);
1534                         ce->ce_file = add(cp, ce->ce_file);
1535                         if (rename(file_org, ce->ce_file)) {
1536                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1537                                                 file_org);
1538                         }
1539                         free(file_org);
1540
1541                 } else {
1542                         ce->ce_file = add(cp, ce->ce_file);
1543                 }
1544         }
1545
1546         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1547                 content_error(ce->ce_file, ct,
1548                                 "unable to fopen for reading/writing");
1549                 return NOTOK;
1550         }
1551
1552         if ((len = ct->c_end - ct->c_begin) < 0)
1553                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1554
1555         if (!ct->c_fp) {
1556                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1557                         content_error(ct->c_file, ct,
1558                                         "unable to open for reading");
1559                         return NOTOK;
1560                 }
1561                 own_ct_fp = 1;
1562         }
1563
1564         bitno = 18;
1565         bits = 0L;
1566         skip = 0;
1567
1568         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1569         while (len > 0) {
1570                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1571                 case NOTOK:
1572                         content_error(ct->c_file, ct, "error reading from");
1573                         goto clean_up;
1574
1575                 case OK:
1576                         content_error(NULL, ct, "premature eof");
1577                         goto clean_up;
1578
1579                 default:
1580                         if (cc > len)
1581                                 cc = len;
1582                         len -= cc;
1583
1584                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1585                                 switch (*cp) {
1586                                 default:
1587                                         if (isspace(*cp))
1588                                                 break;
1589                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1590                                                 if (debugsw) {
1591                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1592                                                 }
1593                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1594                                                 continue;
1595                                         }
1596
1597                                         bits |= value << bitno;
1598 test_end:
1599                                         if ((bitno -= 6) < 0) {
1600                                                 putc((char) *b1, ce->ce_fp);
1601                                                 if (skip < 2) {
1602                                                         putc((char) *b2, ce->ce_fp);
1603                                                         if (skip < 1) {
1604                                                                 putc((char) *b3, ce->ce_fp);
1605                                                         }
1606                                                 }
1607
1608                                                 if (ferror(ce->ce_fp)) {
1609                                                         content_error(ce->ce_file, ct,
1610                                                                                    "error writing to");
1611                                                         goto clean_up;
1612                                                 }
1613                                                 bitno = 18, bits = 0L, skip = 0;
1614                                         }
1615                                         break;
1616
1617                                 case '=':
1618                                         if (++skip > 3)
1619                                                 goto self_delimiting;
1620                                         goto test_end;
1621                                 }
1622                         }
1623                 }
1624         }
1625
1626         if (bitno != 18) {
1627                 if (debugsw)
1628                         fprintf(stderr, "premature ending (bitno %d)\n",
1629                                         bitno);
1630
1631                 content_error(NULL, ct, "invalid BASE64 encoding");
1632                 goto clean_up;
1633         }
1634
1635 self_delimiting:
1636         fseek(ct->c_fp, 0L, SEEK_SET);
1637
1638         if (fflush(ce->ce_fp)) {
1639                 content_error(ce->ce_file, ct, "error writing to");
1640                 goto clean_up;
1641         }
1642
1643         fseek(ce->ce_fp, 0L, SEEK_SET);
1644
1645 ready_to_go:
1646         *file = ce->ce_file;
1647         if (own_ct_fp) {
1648                 fclose(ct->c_fp);
1649                 ct->c_fp = NULL;
1650         }
1651         return fileno(ce->ce_fp);
1652
1653 clean_up:
1654         free_encoding(ct, 0);
1655         if (own_ct_fp) {
1656                 fclose(ct->c_fp);
1657                 ct->c_fp = NULL;
1658         }
1659         return NOTOK;
1660 }
1661
1662
1663 /*
1664 ** QUOTED PRINTABLE
1665 */
1666
1667 static char hex2nib[0x80] = {
1668         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1675         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1677         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1684 };
1685
1686
1687 static int
1688 InitQuoted(CT ct)
1689 {
1690         return init_encoding(ct, openQuoted);
1691 }
1692
1693
1694 static int
1695 openQuoted(CT ct, char **file)
1696 {
1697         int cc, len, quoted, own_ct_fp = 0;
1698         unsigned char *cp, *ep;
1699         char buffer[BUFSIZ];
1700         unsigned char mask = 0;
1701         CE ce;
1702         /* sbeck -- handle suffixes */
1703         CI ci;
1704
1705         ce = ct->c_cefile;
1706         if (ce->ce_fp) {
1707                 fseek(ce->ce_fp, 0L, SEEK_SET);
1708                 goto ready_to_go;
1709         }
1710
1711         if (ce->ce_file) {
1712                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1713                         content_error(ce->ce_file, ct,
1714                                         "unable to fopen for reading");
1715                         return NOTOK;
1716                 }
1717                 goto ready_to_go;
1718         }
1719
1720         if (*file == NULL) {
1721                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1722                 ce->ce_unlink = 1;
1723         } else {
1724                 ce->ce_file = getcpy(*file);
1725                 ce->ce_unlink = 0;
1726         }
1727
1728         /* sbeck@cise.ufl.edu -- handle suffixes */
1729         ci = &ct->c_ctinfo;
1730         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1731                         invo_name, ci->ci_type, ci->ci_subtype);
1732         cp = context_find(buffer);
1733         if (cp == NULL || *cp == '\0') {
1734                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1735                                 ci->ci_type);
1736                 cp = context_find(buffer);
1737         }
1738         if (cp != NULL && *cp != '\0') {
1739                 if (ce->ce_unlink) {
1740                         /*
1741                         ** Temporary file already exists, so we rename to
1742                         ** version with extension.
1743                         */
1744                         char *file_org = strdup(ce->ce_file);
1745                         ce->ce_file = add(cp, ce->ce_file);
1746                         if (rename(file_org, ce->ce_file)) {
1747                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1748                                                 file_org);
1749                         }
1750                         free(file_org);
1751
1752                 } else {
1753                         ce->ce_file = add(cp, ce->ce_file);
1754                 }
1755         }
1756
1757         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1758                 content_error(ce->ce_file, ct,
1759                                 "unable to fopen for reading/writing");
1760                 return NOTOK;
1761         }
1762
1763         if ((len = ct->c_end - ct->c_begin) < 0)
1764                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1765
1766         if (!ct->c_fp) {
1767                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1768                         content_error(ct->c_file, ct,
1769                                         "unable to open for reading");
1770                         return NOTOK;
1771                 }
1772                 own_ct_fp = 1;
1773         }
1774
1775         quoted = 0;
1776
1777         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1778         while (len > 0) {
1779                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1780                         content_error(NULL, ct, "premature eof");
1781                         goto clean_up;
1782                 }
1783
1784                 if ((cc = strlen(buffer)) > len)
1785                         cc = len;
1786                 len -= cc;
1787
1788                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1789                         if (!isspace(*ep))
1790                                 break;
1791                 *++ep = '\n', ep++;
1792
1793                 for (; cp < ep; cp++) {
1794                         if (quoted > 0) {
1795                                 /* in an escape sequence */
1796                                 if (quoted == 1) {
1797                                         /* at byte 1 of an escape sequence */
1798                                         mask = hex2nib[*cp & 0x7f];
1799                                         /* next is byte 2 */
1800                                         quoted = 2;
1801                                 } else {
1802                                         /* at byte 2 of an escape sequence */
1803                                         mask <<= 4;
1804                                         mask |= hex2nib[*cp & 0x7f];
1805                                         putc(mask, ce->ce_fp);
1806                                         if (ferror(ce->ce_fp)) {
1807                                                 content_error(ce->ce_file, ct, "error writing to");
1808                                                 goto clean_up;
1809                                         }
1810                                         /*
1811                                         ** finished escape sequence; next may
1812                                         ** be literal or a new escape sequence
1813                                         */
1814                                         quoted = 0;
1815                                 }
1816                                 /* on to next byte */
1817                                 continue;
1818                         }
1819
1820                         /* not in an escape sequence */
1821                         if (*cp == '=') {
1822                                 /*
1823                                 ** starting an escape sequence,
1824                                 ** or invalid '='?
1825                                 */
1826                                 if (cp + 1 < ep && cp[1] == '\n') {
1827                                         /* "=\n" soft line break, eat the \n */
1828                                         cp++;
1829                                         continue;
1830                                 }
1831                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1832                                         /*
1833                                         ** We don't have 2 bytes left,
1834                                         ** so this is an invalid escape
1835                                         ** sequence; just show the raw bytes
1836                                         ** (below).
1837                                         */
1838                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1839                                         /*
1840                                         ** Next 2 bytes are hex digits,
1841                                         ** making this a valid escape
1842                                         ** sequence; let's decode it (above).
1843                                         */
1844                                         quoted = 1;
1845                                         continue;
1846                                 } else {
1847                                         /*
1848                                         ** One or both of the next 2 is
1849                                         ** out of range, making this an
1850                                         ** invalid escape sequence; just
1851                                         ** show the raw bytes (below).
1852                                         */
1853                                 }
1854                         }
1855
1856                         /* Just show the raw byte. */
1857                         putc(*cp, ce->ce_fp);
1858                         if (ferror(ce->ce_fp)) {
1859                                 content_error(ce->ce_file, ct,
1860                                                 "error writing to");
1861                                 goto clean_up;
1862                         }
1863                 }
1864         }
1865         if (quoted) {
1866                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1867                 goto clean_up;
1868         }
1869
1870         fseek(ct->c_fp, 0L, SEEK_SET);
1871
1872         if (fflush(ce->ce_fp)) {
1873                 content_error(ce->ce_file, ct, "error writing to");
1874                 goto clean_up;
1875         }
1876
1877         fseek(ce->ce_fp, 0L, SEEK_SET);
1878
1879 ready_to_go:
1880         *file = ce->ce_file;
1881         if (own_ct_fp) {
1882                 fclose(ct->c_fp);
1883                 ct->c_fp = NULL;
1884         }
1885         return fileno(ce->ce_fp);
1886
1887 clean_up:
1888         free_encoding(ct, 0);
1889         if (own_ct_fp) {
1890                 fclose(ct->c_fp);
1891                 ct->c_fp = NULL;
1892         }
1893         return NOTOK;
1894 }
1895
1896
1897 /*
1898 ** 7BIT
1899 */
1900
1901 static int
1902 Init7Bit(CT ct)
1903 {
1904         if (init_encoding(ct, open7Bit) == NOTOK)
1905                 return NOTOK;
1906
1907         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1908         return OK;
1909 }
1910
1911
1912 int
1913 open7Bit(CT ct, char **file)
1914 {
1915         int cc, fd, len, own_ct_fp = 0;
1916         char buffer[BUFSIZ];
1917         /* sbeck -- handle suffixes */
1918         char *cp;
1919         CI ci;
1920         CE ce;
1921
1922         ce = ct->c_cefile;
1923         if (ce->ce_fp) {
1924                 fseek(ce->ce_fp, 0L, SEEK_SET);
1925                 goto ready_to_go;
1926         }
1927
1928         if (ce->ce_file) {
1929                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1930                         content_error(ce->ce_file, ct,
1931                                         "unable to fopen for reading");
1932                         return NOTOK;
1933                 }
1934                 goto ready_to_go;
1935         }
1936
1937         if (*file == NULL) {
1938                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1939                 ce->ce_unlink = 1;
1940         } else {
1941                 ce->ce_file = getcpy(*file);
1942                 ce->ce_unlink = 0;
1943         }
1944
1945         /* sbeck@cise.ufl.edu -- handle suffixes */
1946         ci = &ct->c_ctinfo;
1947         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1948                         invo_name, ci->ci_type, ci->ci_subtype);
1949         cp = context_find(buffer);
1950         if (cp == NULL || *cp == '\0') {
1951                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1952                                 ci->ci_type);
1953                 cp = context_find(buffer);
1954         }
1955         if (cp != NULL && *cp != '\0') {
1956                 if (ce->ce_unlink) {
1957                         /*
1958                         ** Temporary file already exists, so we rename to
1959                         ** version with extension.
1960                         */
1961                         char *file_org = strdup(ce->ce_file);
1962                         ce->ce_file = add(cp, ce->ce_file);
1963                         if (rename(file_org, ce->ce_file)) {
1964                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1965                                                 file_org);
1966                         }
1967                         free(file_org);
1968
1969                 } else {
1970                         ce->ce_file = add(cp, ce->ce_file);
1971                 }
1972         }
1973
1974         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1975                 content_error(ce->ce_file, ct,
1976                                 "unable to fopen for reading/writing");
1977                 return NOTOK;
1978         }
1979
1980         if (ct->c_type == CT_MULTIPART) {
1981                 char **ap, **ep;
1982                 CI ci = &ct->c_ctinfo;
1983
1984                 len = 0;
1985                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1986                                 ci->ci_subtype);
1987                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1988                                 strlen(ci->ci_subtype);
1989                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1990                         putc(';', ce->ce_fp);
1991                         len++;
1992
1993                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1994                                         *ap, *ep);
1995
1996                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1997                                 fputs("\n\t", ce->ce_fp);
1998                                 len = 8;
1999                         } else {
2000                                 putc(' ', ce->ce_fp);
2001                                 len++;
2002                         }
2003                         fprintf(ce->ce_fp, "%s", buffer);
2004                         len += cc;
2005                 }
2006
2007                 if (ci->ci_comment) {
2008                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2009                                                 >= CPERLIN) {
2010                                 fputs("\n\t", ce->ce_fp);
2011                                 len = 8;
2012                         } else {
2013                                 putc(' ', ce->ce_fp);
2014                                 len++;
2015                         }
2016                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2017                         len += cc;
2018                 }
2019                 fprintf(ce->ce_fp, "\n");
2020                 if (ct->c_id)
2021                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2022                 if (ct->c_descr)
2023                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2024                 if (ct->c_dispo)
2025                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2026                 fprintf(ce->ce_fp, "\n");
2027         }
2028
2029         if ((len = ct->c_end - ct->c_begin) < 0)
2030                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2031
2032         if (!ct->c_fp) {
2033                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2034                         content_error(ct->c_file, ct,
2035                                         "unable to open for reading");
2036                         return NOTOK;
2037                 }
2038                 own_ct_fp = 1;
2039         }
2040
2041         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2042         while (len > 0)
2043                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2044                 case NOTOK:
2045                         content_error(ct->c_file, ct, "error reading from");
2046                         goto clean_up;
2047
2048                 case OK:
2049                         content_error(NULL, ct, "premature eof");
2050                         goto clean_up;
2051
2052                 default:
2053                         if (cc > len)
2054                                 cc = len;
2055                         len -= cc;
2056
2057                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2058                         if (ferror(ce->ce_fp)) {
2059                                 content_error(ce->ce_file, ct,
2060                                                 "error writing to");
2061                                 goto clean_up;
2062                         }
2063                 }
2064
2065         fseek(ct->c_fp, 0L, SEEK_SET);
2066
2067         if (fflush(ce->ce_fp)) {
2068                 content_error(ce->ce_file, ct, "error writing to");
2069                 goto clean_up;
2070         }
2071
2072         fseek(ce->ce_fp, 0L, SEEK_SET);
2073
2074 ready_to_go:
2075         *file = ce->ce_file;
2076         if (own_ct_fp) {
2077                 fclose(ct->c_fp);
2078                 ct->c_fp = NULL;
2079         }
2080         return fileno(ce->ce_fp);
2081
2082 clean_up:
2083         free_encoding(ct, 0);
2084         if (own_ct_fp) {
2085                 fclose(ct->c_fp);
2086                 ct->c_fp = NULL;
2087         }
2088         return NOTOK;
2089 }