Replace done with exit at uip
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <h/signals.h>
12 #include <errno.h>
13 #include <setjmp.h>
14 #include <signal.h>
15 #include <h/tws.h>
16 #include <h/mime.h>
17 #include <h/mhparse.h>
18 #include <h/utils.h>
19
20 extern int debugsw;
21
22 extern int endian;  /* mhmisc.c */
23
24 extern pid_t xpid;  /* mhshowsbr.c  */
25
26 /*
27 ** Directory to place temp files.  This must
28 ** be set before these routines are called.
29 */
30 char *tmp;
31
32 /*
33 ** Structures for TEXT messages
34 */
35 struct k2v SubText[] = {
36         { "plain", TEXT_PLAIN },
37         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
38         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
39         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
40 };
41
42 struct k2v Charset[] = {
43         { "us-ascii",   CHARSET_USASCII },
44         { "iso-8859-1", CHARSET_LATIN },
45         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
46 };
47
48 /*
49 ** Structures for MULTIPART messages
50 */
51 struct k2v SubMultiPart[] = {
52         { "mixed",       MULTI_MIXED },
53         { "alternative", MULTI_ALTERNATE },
54         { "digest",      MULTI_DIGEST },
55         { "parallel",    MULTI_PARALLEL },
56         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
57 };
58
59 /*
60 ** Structures for MESSAGE messages
61 */
62 struct k2v SubMessage[] = {
63         { "rfc822",        MESSAGE_RFC822 },
64         { "partial",       MESSAGE_PARTIAL },
65         { "external-body", MESSAGE_EXTERNAL },
66         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
67 };
68
69 /*
70 ** Structure for APPLICATION messages
71 */
72 struct k2v SubApplication[] = {
73         { "octet-stream", APPLICATION_OCTETS },
74         { "postscript",   APPLICATION_POSTSCRIPT },
75         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
76 };
77
78
79 /* mhmisc.c */
80 int part_ok(CT, int);
81 int type_ok(CT, int);
82 int make_intermediates(char *);
83 void content_error(char *, CT, char *, ...);
84
85 /* mhfree.c */
86 void free_content(CT);
87 void free_encoding(CT, int);
88
89 /*
90 ** static prototypes
91 */
92 static CT get_content(FILE *, char *, int);
93 static int get_comment(CT, unsigned char **, int);
94
95 static int InitGeneric(CT);
96 static int InitText(CT);
97 static int InitMultiPart(CT);
98 static void reverse_parts(CT);
99 static int InitMessage(CT);
100 static int InitApplication(CT);
101 static int init_encoding(CT, OpenCEFunc);
102 static unsigned long size_encoding(CT);
103 static int InitBase64(CT);
104 static int openBase64(CT, char **);
105 static int InitQuoted(CT);
106 static int openQuoted(CT, char **);
107 static int Init7Bit(CT);
108
109 struct str2init str2cts[] = {
110         { "application", CT_APPLICATION, InitApplication },
111         { "audio",       CT_AUDIO,       InitGeneric },
112         { "image",       CT_IMAGE,       InitGeneric },
113         { "message",     CT_MESSAGE,     InitMessage },
114         { "multipart",   CT_MULTIPART,   InitMultiPart },
115         { "text",        CT_TEXT,        InitText },
116         { "video",       CT_VIDEO,       InitGeneric },
117         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
118         { NULL,          CT_UNKNOWN,     NULL },
119 };
120
121 struct str2init str2ces[] = {
122         { "base64",           CE_BASE64,    InitBase64 },
123         { "quoted-printable", CE_QUOTED,    InitQuoted },
124         { "8bit",             CE_8BIT,      Init7Bit },
125         { "7bit",             CE_7BIT,      Init7Bit },
126         { "binary",           CE_BINARY,    Init7Bit },
127         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
128         { NULL,               CE_UNKNOWN,    NULL },
129 };
130
131
132 int
133 pidcheck(int status)
134 {
135         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
136                 return status;
137
138         fflush(stdout);
139         fflush(stderr);
140         exit(1);
141         return 1;
142 }
143
144
145 /*
146 ** Main entry point for parsing a MIME message or file.
147 ** It returns the Content structure for the top level
148 ** entity in the file.
149 */
150 CT
151 parse_mime(char *file)
152 {
153         int is_stdin;
154         char buffer[BUFSIZ];
155         FILE *fp;
156         CT ct;
157
158         /*
159         ** Check if file is actually standard input
160         */
161         if ((is_stdin = (strcmp(file, "-")==0))) {
162                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
163                 if (tfile == NULL) {
164                         advise("mhparse", "unable to create temporary file");
165                         return NULL;
166                 }
167                 file = getcpy(tfile);
168                 chmod(file, 0600);
169
170                 while (fgets(buffer, sizeof(buffer), stdin))
171                         fputs(buffer, fp);
172                 fflush(fp);
173
174                 if (ferror(stdin)) {
175                         unlink(file);
176                         advise("stdin", "error reading");
177                         return NULL;
178                 }
179                 if (ferror(fp)) {
180                         unlink(file);
181                         advise(file, "error writing");
182                         return NULL;
183                 }
184                 fseek(fp, 0L, SEEK_SET);
185         } else if ((fp = fopen(file, "r")) == NULL) {
186                 advise(file, "unable to read");
187                 return NULL;
188         }
189
190         if (!(ct = get_content(fp, file, 1))) {
191                 if (is_stdin)
192                         unlink(file);
193                 advise(NULL, "unable to decode %s", file);
194                 return NULL;
195         }
196
197         if (is_stdin)
198                 ct->c_unlink = 1;  /* temp file to remove */
199
200         ct->c_fp = NULL;
201
202         if (ct->c_end == 0L) {
203                 fseek(fp, 0L, SEEK_END);
204                 ct->c_end = ftell(fp);
205         }
206
207         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
208                 fclose(fp);
209                 free_content(ct);
210                 return NULL;
211         }
212
213         fclose(fp);
214         return ct;
215 }
216
217
218 /*
219 ** Main routine for reading/parsing the headers
220 ** of a message content.
221 **
222 ** toplevel =  1   # we are at the top level of the message
223 ** toplevel =  0   # we are inside message type or multipart type
224 **                 # other than multipart/digest
225 ** toplevel = -1   # we are inside multipart/digest
226 ** NB: on failure we will fclose(in)!
227 */
228
229 static CT
230 get_content(FILE *in, char *file, int toplevel)
231 {
232         int compnum, state;
233         char buf[BUFSIZ], name[NAMESZ];
234         char *np, *vp;
235         CT ct;
236         HF hp;
237
238         /* allocate the content structure */
239         if (!(ct = (CT) calloc(1, sizeof(*ct))))
240                 adios(NULL, "out of memory");
241
242         ct->c_fp = in;
243         ct->c_file = getcpy(file);
244         ct->c_begin = ftell(ct->c_fp) + 1;
245
246         /*
247         ** Parse the header fields for this
248         ** content into a linked list.
249         */
250         for (compnum = 1, state = FLD;;) {
251                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
252                 case FLD:
253                 case FLDPLUS:
254                 case FLDEOF:
255                         compnum++;
256
257                         /* get copies of the buffers */
258                         np = getcpy(name);
259                         vp = getcpy(buf);
260
261                         /* if necessary, get rest of field */
262                         while (state == FLDPLUS) {
263                                 state = m_getfld(state, name, buf,
264                                                 sizeof(buf), in);
265                                 vp = add(buf, vp);  /* add to previous value */
266                         }
267
268                         /* Now add the header data to the list */
269                         add_header(ct, np, vp);
270
271                         /* continue, if this isn't the last header field */
272                         if (state != FLDEOF) {
273                                 ct->c_begin = ftell(in) + 1;
274                                 continue;
275                         }
276                         /* else fall... */
277
278                 case BODY:
279                 case BODYEOF:
280                         ct->c_begin = ftell(in) - strlen(buf);
281                         break;
282
283                 case FILEEOF:
284                         ct->c_begin = ftell(in);
285                         break;
286
287                 case LENERR:
288                 case FMTERR:
289                         adios(NULL, "message format error in component #%d",
290                                         compnum);
291
292                 default:
293                         adios(NULL, "getfld() returned %d", state);
294                 }
295
296                 /* break out of the loop */
297                 break;
298         }
299
300         /*
301         ** Read the content headers.  We will parse the
302         ** MIME related header fields into their various
303         ** structures and set internal flags related to
304         ** content type/subtype, etc.
305         */
306
307         hp = ct->c_first_hf;  /* start at first header field */
308         while (hp) {
309                 /* Get MIME-Version field */
310                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
311                         int ucmp;
312                         char c;
313                         unsigned char *cp, *dp;
314
315                         if (ct->c_vrsn) {
316                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
317                                 goto next_header;
318                         }
319                         ct->c_vrsn = getcpy(hp->value);
320
321                         /* Now, cleanup this field */
322                         cp = ct->c_vrsn;
323
324                         while (isspace(*cp))
325                                 cp++;
326                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
327                                 *dp++ = ' ';
328                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
329                                 if (!isspace(*dp))
330                                         break;
331                         *++dp = '\0';
332                         if (debugsw)
333                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
334
335                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
336                                 goto out;
337
338                         for (dp = cp; istoken(*dp); dp++)
339                                 continue;
340                         c = *dp;
341                         *dp = '\0';
342                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
343                         *dp = c;
344                         if (!ucmp) {
345                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
346                         }
347
348                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349                         /* Get Content-Type field */
350                         struct str2init *s2i;
351                         CI ci = &ct->c_ctinfo;
352
353                         /* Check if we've already seen a Content-Type header */
354                         if (ct->c_ctline) {
355                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
356                                 goto next_header;
357                         }
358
359                         /* Parse the Content-Type field */
360                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
361                                 goto out;
362
363                         /*
364                         ** Set the Init function and the internal
365                         ** flag for this content type.
366                         */
367                         for (s2i = str2cts; s2i->si_key; s2i++)
368                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
369                                         break;
370                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
371                                 s2i++;
372                         ct->c_type = s2i->si_val;
373                         ct->c_ctinitfnx = s2i->si_init;
374
375                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376                         /* Get Content-Transfer-Encoding field */
377                         char c;
378                         unsigned char *cp, *dp;
379                         struct str2init *s2i;
380
381                         /*
382                         ** Check if we've already seen the
383                         ** Content-Transfer-Encoding field
384                         */
385                         if (ct->c_celine) {
386                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
387                                 goto next_header;
388                         }
389
390                         /* get copy of this field */
391                         ct->c_celine = cp = getcpy(hp->value);
392
393                         while (isspace(*cp))
394                                 cp++;
395                         for (dp = cp; istoken(*dp); dp++)
396                                 continue;
397                         c = *dp;
398                         *dp = '\0';
399
400                         /*
401                         ** Find the internal flag and Init function
402                         ** for this transfer encoding.
403                         */
404                         for (s2i = str2ces; s2i->si_key; s2i++)
405                                 if (!mh_strcasecmp(cp, s2i->si_key))
406                                         break;
407                         if (!s2i->si_key && !uprf(cp, "X-"))
408                                 s2i++;
409                         *dp = c;
410                         ct->c_encoding = s2i->si_val;
411
412                         /* Call the Init function for this encoding */
413                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
414                                 goto out;
415
416                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417                         /* Get Content-ID field */
418                         ct->c_id = add(hp->value, ct->c_id);
419
420                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421                         /* Get Content-Description field */
422                         ct->c_descr = add(hp->value, ct->c_descr);
423
424                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425                         /* Get Content-Disposition field */
426                         ct->c_dispo = add(hp->value, ct->c_dispo);
427                 }
428
429 next_header:
430                 hp = hp->next;  /* next header field */
431         }
432
433         /*
434         ** Check if we saw a Content-Type field.
435         ** If not, then assign a default value for
436         ** it, and the Init function.
437         */
438         if (!ct->c_ctline) {
439                 /*
440                 ** If we are inside a multipart/digest message,
441                 ** so default type is message/rfc822
442                 */
443                 if (toplevel < 0) {
444                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
445                                 goto out;
446                         ct->c_type = CT_MESSAGE;
447                         ct->c_ctinitfnx = InitMessage;
448                 } else {
449                         /*
450                         ** Else default type is text/plain
451                         */
452                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
453                                 goto out;
454                         ct->c_type = CT_TEXT;
455                         ct->c_ctinitfnx = InitText;
456                 }
457         }
458
459         /* Use default Transfer-Encoding, if necessary */
460         if (!ct->c_celine) {
461                 ct->c_encoding = CE_7BIT;
462                 Init7Bit(ct);
463         }
464
465         return ct;
466
467 out:
468         free_content(ct);
469         return NULL;
470 }
471
472
473 /*
474 ** small routine to add header field to list
475 */
476
477 int
478 add_header(CT ct, char *name, char *value)
479 {
480         HF hp;
481
482         /* allocate header field structure */
483         hp = mh_xmalloc(sizeof(*hp));
484
485         /* link data into header structure */
486         hp->name = name;
487         hp->value = value;
488         hp->next = NULL;
489
490         /* link header structure into the list */
491         if (ct->c_first_hf == NULL) {
492                 ct->c_first_hf = hp;  /* this is the first */
493                 ct->c_last_hf = hp;
494         } else {
495                 ct->c_last_hf->next = hp;  /* add it to the end */
496                 ct->c_last_hf = hp;
497         }
498
499         return 0;
500 }
501
502
503 /*
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =.  If not, insert both name and value, just after
506 ** first semicolon, if any.  Note that name should not contain a
507 ** trailing =.  And quotes will be added around the value.  Typical
508 ** usage:  make sure that a Content-Disposition header contains
509 ** filename="foo".  If it doesn't and value does, use value from
510 ** that.
511 */
512 static char *
513 incl_name_value(unsigned char *buf, char *name, char *value) {
514         char *newbuf = buf;
515
516         /* Assume that name is non-null. */
517         if (buf && value) {
518                 char *name_plus_equal = concat(name, "=", NULL);
519
520                 if (!strstr(buf, name_plus_equal)) {
521                         char *insertion;
522                         unsigned char *cp;
523                         char *prefix, *suffix;
524
525                         /* Trim trailing space, esp. newline. */
526                         for (cp = &buf[strlen(buf) - 1];
527                                          cp >= buf && isspace(*cp); --cp) {
528                                 *cp = '\0';
529                         }
530
531                         insertion = concat("; ", name, "=", "\"", value, "\"",
532                                         NULL);
533
534                         /*
535                         ** Insert at first semicolon, if any.
536                         ** If none, append to end.
537                         */
538                         prefix = getcpy(buf);
539                         if ((cp = strchr(prefix, ';'))) {
540                                 suffix = concat(cp, NULL);
541                                 *cp = '\0';
542                                 newbuf = concat(prefix, insertion, suffix,
543                                                 "\n", NULL);
544                                 free(suffix);
545                         } else {
546                                 /* Append to end. */
547                                 newbuf = concat(buf, insertion, "\n", NULL);
548                         }
549
550                         free(prefix);
551                         free(insertion);
552                         free(buf);
553                 }
554
555                 free(name_plus_equal);
556         }
557
558         return newbuf;
559 }
560
561 /*
562 ** Extract just name_suffix="foo", if any, from value.  If there isn't
563 ** one, return the entire value.  Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
565 */
566 static char *
567 extract_name_value(char *name_suffix, char *value) {
568         char *extracted_name_value = value;
569         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
570         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
571         char *cp;
572
573         free(name_suffix_plus_quote);
574         if (name_suffix_equals) {
575                 char *name_suffix_begin;
576
577                 /* Find first \". */
578                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
579                         ;
580                 name_suffix_begin = ++cp;
581                 /* Find second \". */
582                 for (; *cp != '"'; ++cp)
583                         ;
584
585                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
586                 memcpy(extracted_name_value, name_suffix_begin,
587                                 cp - name_suffix_begin);
588                 extracted_name_value[cp - name_suffix_begin] = '\0';
589         }
590
591         return extracted_name_value;
592 }
593
594 /*
595 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
596 ** directives.  Fills in the information of the CTinfo structure.
597 */
598 int
599 get_ctinfo(unsigned char *cp, CT ct, int magic)
600 {
601         int i;
602         unsigned char *dp;
603         char **ap, **ep;
604         char c;
605         CI ci;
606
607         ci = &ct->c_ctinfo;
608         i = strlen(invo_name) + 2;
609
610         /* store copy of Content-Type line */
611         cp = ct->c_ctline = getcpy(cp);
612
613         while (isspace(*cp))  /* trim leading spaces */
614                 cp++;
615
616         /* change newlines to spaces */
617         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
618                 *dp++ = ' ';
619
620         /* trim trailing spaces */
621         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
622                 if (!isspace(*dp))
623                         break;
624         *++dp = '\0';
625
626         if (debugsw)
627                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
628
629         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
630                 return NOTOK;
631
632         for (dp = cp; istoken(*dp); dp++)
633                 continue;
634         c = *dp, *dp = '\0';
635         ci->ci_type = getcpy(cp);  /* store content type */
636         *dp = c, cp = dp;
637
638         if (!*ci->ci_type) {
639                 advise(NULL, "invalid %s: field in message %s (empty type)",
640                                 TYPE_FIELD, ct->c_file);
641                 return NOTOK;
642         }
643
644         /* down case the content type string */
645         for (dp = ci->ci_type; *dp; dp++)
646                 if (isalpha(*dp) && isupper(*dp))
647                         *dp = tolower(*dp);
648
649         while (isspace(*cp))
650                 cp++;
651
652         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
653                 return NOTOK;
654
655         if (*cp != '/') {
656                 if (!magic)
657                         ci->ci_subtype = getcpy("");
658                 goto magic_skip;
659         }
660
661         cp++;
662         while (isspace(*cp))
663                 cp++;
664
665         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
666                 return NOTOK;
667
668         for (dp = cp; istoken(*dp); dp++)
669                 continue;
670         c = *dp, *dp = '\0';
671         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
672         *dp = c, cp = dp;
673
674         if (!*ci->ci_subtype) {
675                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
676                 return NOTOK;
677         }
678
679         /* down case the content subtype string */
680         for (dp = ci->ci_subtype; *dp; dp++)
681                 if (isalpha(*dp) && isupper(*dp))
682                         *dp = tolower(*dp);
683
684 magic_skip:
685         while (isspace(*cp))
686                 cp++;
687
688         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
689                 return NOTOK;
690
691         /*
692         ** Parse attribute/value pairs given with Content-Type
693         */
694         ep = (ap = ci->ci_attrs) + NPARMS;
695         while (*cp == ';') {
696                 char *vp;
697                 unsigned char *up;
698
699                 if (ap >= ep) {
700                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
701                         return NOTOK;
702                 }
703
704                 cp++;
705                 while (isspace(*cp))
706                         cp++;
707
708                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
709                         return NOTOK;
710
711                 if (*cp == 0) {
712                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
713                         return OK;
714                 }
715
716                 /* down case the attribute name */
717                 for (dp = cp; istoken(*dp); dp++)
718                         if (isalpha(*dp) && isupper(*dp))
719                                 *dp = tolower(*dp);
720
721                 for (up = dp; isspace(*dp);)
722                         dp++;
723                 if (dp == cp || *dp != '=') {
724                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
725                         return NOTOK;
726                 }
727
728                 vp = (*ap = getcpy(cp)) + (up - cp);
729                 *vp = '\0';
730                 for (dp++; isspace(*dp);)
731                         dp++;
732
733                 /* now add the attribute value */
734                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
735
736                 if (*dp == '"') {
737                         for (cp = ++dp, dp = vp;;) {
738                                 switch (c = *cp++) {
739                                 case '\0':
740 bad_quote:
741                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
742                                         return NOTOK;
743
744                                 case '\\':
745                                         *dp++ = c;
746                                         if ((c = *cp++) == '\0')
747                                                 goto bad_quote;
748                                         /* else fall... */
749
750                                 default:
751                                         *dp++ = c;
752                                         continue;
753
754                                 case '"':
755                                         *dp = '\0';
756                                         break;
757                                 }
758                                 break;
759                         }
760                 } else {
761                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
762                                 continue;
763                         *dp = '\0';
764                 }
765                 if (!*vp) {
766                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
767                         return NOTOK;
768                 }
769                 ap++;
770
771                 while (isspace(*cp))
772                         cp++;
773
774                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
775                         return NOTOK;
776         }
777
778         /*
779         ** Get any <Content-Id> given in buffer
780         */
781         if (magic && *cp == '<') {
782                 if (ct->c_id) {
783                         free(ct->c_id);
784                         ct->c_id = NULL;
785                 }
786                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
787                         advise(NULL, "invalid ID in message %s", ct->c_file);
788                         return NOTOK;
789                 }
790                 c = *dp;
791                 *dp = '\0';
792                 if (*ct->c_id)
793                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
794                 else
795                         ct->c_id = NULL;
796                 *dp++ = c;
797                 cp = dp;
798
799                 while (isspace(*cp))
800                         cp++;
801         }
802
803         /*
804         ** Get any [Content-Description] given in buffer.
805         */
806         if (magic && *cp == '[') {
807                 ct->c_descr = ++cp;
808                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
809                         if (*dp == ']')
810                                 break;
811                 if (dp < cp) {
812                         advise(NULL, "invalid description in message %s",
813                                         ct->c_file);
814                         ct->c_descr = NULL;
815                         return NOTOK;
816                 }
817
818                 c = *dp;
819                 *dp = '\0';
820                 if (*ct->c_descr)
821                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
822                 else
823                         ct->c_descr = NULL;
824                 *dp++ = c;
825                 cp = dp;
826
827                 while (isspace(*cp))
828                         cp++;
829         }
830
831         /*
832         ** Get any {Content-Disposition} given in buffer.
833         */
834         if (magic && *cp == '{') {
835                 ct->c_dispo = ++cp;
836                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
837                         if (*dp == '}')
838                                 break;
839                 if (dp < cp) {
840                         advise(NULL, "invalid disposition in message %s",
841                                         ct->c_file);
842                         ct->c_dispo = NULL;
843                         return NOTOK;
844                 }
845
846                 c = *dp;
847                 *dp = '\0';
848                 if (*ct->c_dispo)
849                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
850                 else
851                         ct->c_dispo = NULL;
852                 *dp++ = c;
853                 cp = dp;
854
855                 while (isspace(*cp))
856                         cp++;
857         }
858
859         /*
860         ** Check if anything is left over
861         */
862         if (*cp) {
863                 if (magic) {
864                         ci->ci_magic = getcpy(cp);
865
866                         /*
867                         ** If there is a Content-Disposition header and
868                         ** it doesn't have a *filename=, extract it from
869                         ** the magic contents.  The mhbasename call skips
870                         ** any leading directory components.
871                         */
872                         if (ct->c_dispo)
873                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
874                         } else
875                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
876         }
877
878         return OK;
879 }
880
881
882 static int
883 get_comment(CT ct, unsigned char **ap, int istype)
884 {
885         int i;
886         char *bp;
887         unsigned char *cp;
888         char c, buffer[BUFSIZ], *dp;
889         CI ci;
890
891         ci = &ct->c_ctinfo;
892         cp = *ap;
893         bp = buffer;
894         cp++;
895
896         for (i = 0;;) {
897                 switch (c = *cp++) {
898                 case '\0':
899 invalid:
900                 advise(NULL, "invalid comment in message %s's %s: field",
901                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
902                 return NOTOK;
903
904                 case '\\':
905                         *bp++ = c;
906                         if ((c = *cp++) == '\0')
907                                 goto invalid;
908                         *bp++ = c;
909                         continue;
910
911                 case '(':
912                         i++;
913                         /* and fall... */
914                 default:
915                         *bp++ = c;
916                         continue;
917
918                 case ')':
919                         if (--i < 0)
920                                 break;
921                         *bp++ = c;
922                         continue;
923                 }
924                 break;
925         }
926         *bp = '\0';
927
928         if (istype) {
929                 if ((dp = ci->ci_comment)) {
930                         ci->ci_comment = concat(dp, " ", buffer, NULL);
931                         free(dp);
932                 } else {
933                         ci->ci_comment = getcpy(buffer);
934                 }
935         }
936
937         while (isspace(*cp))
938                 cp++;
939
940         *ap = cp;
941         return OK;
942 }
943
944
945 /*
946 ** CONTENTS
947 **
948 ** Handles content types audio, image, and video.
949 ** There's not much to do right here.
950 */
951
952 static int
953 InitGeneric(CT ct)
954 {
955         return OK;  /* not much to do here */
956 }
957
958
959 /*
960 ** TEXT
961 */
962
963 static int
964 InitText(CT ct)
965 {
966         char **ap, **ep;
967         struct k2v *kv;
968         struct text *t;
969         CI ci = &ct->c_ctinfo;
970
971         /* check for missing subtype */
972         if (!*ci->ci_subtype)
973                 ci->ci_subtype = add("plain", ci->ci_subtype);
974
975         /* match subtype */
976         for (kv = SubText; kv->kv_key; kv++)
977                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
978                         break;
979         ct->c_subtype = kv->kv_value;
980
981         /* allocate text character set structure */
982         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
983                 adios(NULL, "out of memory");
984         ct->c_ctparams = (void *) t;
985
986         /* scan for charset parameter */
987         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
988                 if (!mh_strcasecmp(*ap, "charset"))
989                         break;
990
991         /* check if content specified a character set */
992         if (*ap) {
993                 /* store its name */
994                 ct->c_charset = getcpy(norm_charmap(*ep));
995                 /* match character set or set to CHARSET_UNKNOWN */
996                 for (kv = Charset; kv->kv_key; kv++) {
997                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
998                                 break;
999                         }
1000                 }
1001                 t->tx_charset = kv->kv_value;
1002         } else {
1003                 t->tx_charset = CHARSET_UNSPECIFIED;
1004         }
1005
1006         return OK;
1007 }
1008
1009
1010 /*
1011 ** MULTIPART
1012 */
1013
1014 static int
1015 InitMultiPart(CT ct)
1016 {
1017         int inout;
1018         long last, pos;
1019         unsigned char *cp, *dp;
1020         char **ap, **ep;
1021         char *bp, buffer[BUFSIZ];
1022         struct multipart *m;
1023         struct k2v *kv;
1024         struct part *part, **next;
1025         CI ci = &ct->c_ctinfo;
1026         CT p;
1027         FILE *fp;
1028
1029         /*
1030         ** The encoding for multipart messages must be either
1031         ** 7bit, 8bit, or binary (per RFC2045).
1032         */
1033         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1034                 && ct->c_encoding != CE_BINARY) {
1035                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1036                 return NOTOK;
1037         }
1038
1039         /* match subtype */
1040         for (kv = SubMultiPart; kv->kv_key; kv++)
1041                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1042                         break;
1043         ct->c_subtype = kv->kv_value;
1044
1045         /*
1046         ** Check for "boundary" parameter, which is
1047         ** required for multipart messages.
1048         */
1049         bp = 0;
1050         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1051                 if (!mh_strcasecmp(*ap, "boundary")) {
1052                         bp = *ep;
1053                         break;
1054                 }
1055         }
1056
1057         /* complain if boundary parameter is missing */
1058         if (!*ap) {
1059                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1060                 return NOTOK;
1061         }
1062
1063         /* allocate primary structure for multipart info */
1064         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1065                 adios(NULL, "out of memory");
1066         ct->c_ctparams = (void *) m;
1067
1068         /* check if boundary parameter contains only whitespace characters */
1069         for (cp = bp; isspace(*cp); cp++)
1070                 continue;
1071         if (!*cp) {
1072                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1073                 return NOTOK;
1074         }
1075
1076         /* remove trailing whitespace from boundary parameter */
1077         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1078                 if (!isspace(*dp))
1079                         break;
1080         *++dp = '\0';
1081
1082         /* record boundary separators */
1083         m->mp_start = concat(bp, "\n", NULL);
1084         m->mp_stop = concat(bp, "--\n", NULL);
1085
1086         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1087                 advise(ct->c_file, "unable to open for reading");
1088                 return NOTOK;
1089         }
1090
1091         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1092         last = ct->c_end;
1093         next = &m->mp_parts;
1094         part = NULL;
1095         inout = 1;
1096
1097         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1098                 if (pos > last)
1099                         break;
1100
1101                 pos += strlen(buffer);
1102                 if (buffer[0] != '-' || buffer[1] != '-')
1103                         continue;
1104                 if (inout) {
1105                         if (strcmp(buffer + 2, m->mp_start)!=0)
1106                                 continue;
1107 next_part:
1108                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1109                                         == NULL)
1110                                 adios(NULL, "out of memory");
1111                         *next = part;
1112                         next = &part->mp_next;
1113
1114                         if (!(p = get_content(fp, ct->c_file,
1115                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1116                                 ct->c_fp = NULL;
1117                                 return NOTOK;
1118                         }
1119                         p->c_fp = NULL;
1120                         part->mp_part = p;
1121                         pos = p->c_begin;
1122                         fseek(fp, pos, SEEK_SET);
1123                         inout = 0;
1124                 } else {
1125                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1126                                 inout = 1;
1127 end_part:
1128                                 p = part->mp_part;
1129                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1130                                 if (p->c_end < p->c_begin)
1131                                         p->c_begin = p->c_end;
1132                                 if (inout)
1133                                         goto next_part;
1134                                 goto last_part;
1135                         } else {
1136                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1137                                         goto end_part;
1138                         }
1139                 }
1140         }
1141
1142         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1143         if (!inout && part) {
1144                 p = part->mp_part;
1145                 p->c_end = ct->c_end;
1146
1147                 if (p->c_begin >= p->c_end) {
1148                         for (next = &m->mp_parts; *next != part;
1149                                 next = &((*next)->mp_next))
1150                                 continue;
1151                         *next = NULL;
1152                         free_content(p);
1153                         free((char *) part);
1154                 }
1155         }
1156
1157 last_part:
1158         /* reverse the order of the parts for multipart/alternative */
1159         if (ct->c_subtype == MULTI_ALTERNATE)
1160                 reverse_parts(ct);
1161
1162         /*
1163         ** label all subparts with part number, and
1164         ** then initialize the content of the subpart.
1165         */
1166         {
1167                 int partnum;
1168                 char *pp;
1169                 char partnam[BUFSIZ];
1170
1171                 if (ct->c_partno) {
1172                         snprintf(partnam, sizeof(partnam), "%s.",
1173                                         ct->c_partno);
1174                         pp = partnam + strlen(partnam);
1175                 } else {
1176                         pp = partnam;
1177                 }
1178
1179                 for (part = m->mp_parts, partnum = 1; part;
1180                         part = part->mp_next, partnum++) {
1181                         p = part->mp_part;
1182
1183                         sprintf(pp, "%d", partnum);
1184                         p->c_partno = getcpy(partnam);
1185
1186                         /* initialize the content of the subparts */
1187                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1188                                 fclose(ct->c_fp);
1189                                 ct->c_fp = NULL;
1190                                 return NOTOK;
1191                         }
1192                 }
1193         }
1194
1195         fclose(ct->c_fp);
1196         ct->c_fp = NULL;
1197         return OK;
1198 }
1199
1200
1201 /*
1202 ** reverse the order of the parts of a multipart
1203 */
1204
1205 static void
1206 reverse_parts(CT ct)
1207 {
1208         int i;
1209         struct multipart *m;
1210         struct part **base, **bmp, **next, *part;
1211
1212         m = (struct multipart *) ct->c_ctparams;
1213
1214         /* if only one part, just return */
1215         if (!m->mp_parts || !m->mp_parts->mp_next)
1216                 return;
1217
1218         /* count number of parts */
1219         i = 0;
1220         for (part = m->mp_parts; part; part = part->mp_next)
1221                 i++;
1222
1223         /* allocate array of pointers to the parts */
1224         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1225                 adios(NULL, "out of memory");
1226         bmp = base;
1227
1228         /* point at all the parts */
1229         for (part = m->mp_parts; part; part = part->mp_next)
1230                 *bmp++ = part;
1231         *bmp = NULL;
1232
1233         /* reverse the order of the parts */
1234         next = &m->mp_parts;
1235         for (bmp--; bmp >= base; bmp--) {
1236                 part = *bmp;
1237                 *next = part;
1238                 next = &part->mp_next;
1239         }
1240         *next = NULL;
1241
1242         /* free array of pointers */
1243         free((char *) base);
1244 }
1245
1246
1247 /*
1248 ** MESSAGE
1249 */
1250
1251 static int
1252 InitMessage(CT ct)
1253 {
1254         struct k2v *kv;
1255         CI ci = &ct->c_ctinfo;
1256
1257         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1258                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1259                 return NOTOK;
1260         }
1261
1262         /* check for missing subtype */
1263         if (!*ci->ci_subtype)
1264                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1265
1266         /* match subtype */
1267         for (kv = SubMessage; kv->kv_key; kv++)
1268                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1269                         break;
1270         ct->c_subtype = kv->kv_value;
1271
1272         switch (ct->c_subtype) {
1273         case MESSAGE_RFC822:
1274                 break;
1275
1276         case MESSAGE_PARTIAL:
1277                 {
1278                 char **ap, **ep;
1279                 struct partial *p;
1280
1281                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1282                         adios(NULL, "out of memory");
1283                 ct->c_ctparams = (void *) p;
1284
1285                 /*
1286                 ** scan for parameters "id", "number",
1287                 ** and "total"
1288                 */
1289                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1290                         if (!mh_strcasecmp(*ap, "id")) {
1291                                 p->pm_partid = getcpy(*ep);
1292                                 continue;
1293                         }
1294                         if (!mh_strcasecmp(*ap, "number")) {
1295                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1296 invalid_param:
1297                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1298                                         return NOTOK;
1299                                 }
1300                                 continue;
1301                         }
1302                         if (!mh_strcasecmp(*ap, "total")) {
1303                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1304                                                 p->pm_maxno < 1)
1305                                         goto invalid_param;
1306                                 continue;
1307                         }
1308                 }
1309
1310                 if (!p->pm_partid || !p->pm_partno
1311                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1312                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1313                         return NOTOK;
1314                 }
1315                 }
1316                 break;
1317
1318         case MESSAGE_EXTERNAL:
1319                 {
1320                 CT p;
1321                 FILE *fp;
1322
1323                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1324                         advise(ct->c_file, "unable to open for reading");
1325                         return NOTOK;
1326                 }
1327
1328                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1329
1330                 if (!(p = get_content(fp, ct->c_file, 0))) {
1331                         ct->c_fp = NULL;
1332                         return NOTOK;
1333                 }
1334
1335                 p->c_fp = NULL;
1336                 p->c_end = p->c_begin;
1337
1338                 fclose(ct->c_fp);
1339                 ct->c_fp = NULL;
1340
1341                 switch (p->c_type) {
1342                 case CT_MULTIPART:
1343                         break;
1344
1345                 case CT_MESSAGE:
1346                         if (p->c_subtype != MESSAGE_RFC822)
1347                                 break;
1348                         /* else fall... */
1349                 default:
1350                         if (p->c_ctinitfnx)
1351                                 (*p->c_ctinitfnx) (p);
1352                         break;
1353                 }
1354                 }
1355                 break;
1356
1357         default:
1358                 break;
1359         }
1360
1361         return OK;
1362 }
1363
1364
1365 /*
1366 ** APPLICATION
1367 */
1368
1369 static int
1370 InitApplication(CT ct)
1371 {
1372         struct k2v *kv;
1373         CI ci = &ct->c_ctinfo;
1374
1375         /* match subtype */
1376         for (kv = SubApplication; kv->kv_key; kv++)
1377                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1378                         break;
1379         ct->c_subtype = kv->kv_value;
1380
1381         return OK;
1382 }
1383
1384
1385 /*
1386 ** TRANSFER ENCODINGS
1387 */
1388
1389 static int
1390 init_encoding(CT ct, OpenCEFunc openfnx)
1391 {
1392         CE ce;
1393
1394         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1395                 adios(NULL, "out of memory");
1396
1397         ct->c_cefile     = ce;
1398         ct->c_ceopenfnx  = openfnx;
1399         ct->c_ceclosefnx = close_encoding;
1400         ct->c_cesizefnx  = size_encoding;
1401
1402         return OK;
1403 }
1404
1405
1406 void
1407 close_encoding(CT ct)
1408 {
1409         CE ce;
1410
1411         if (!(ce = ct->c_cefile))
1412                 return;
1413
1414         if (ce->ce_fp) {
1415                 fclose(ce->ce_fp);
1416                 ce->ce_fp = NULL;
1417         }
1418 }
1419
1420
1421 static unsigned long
1422 size_encoding(CT ct)
1423 {
1424         int fd;
1425         unsigned long size;
1426         char *file;
1427         CE ce;
1428         struct stat st;
1429
1430         if (!(ce = ct->c_cefile))
1431                 return (ct->c_end - ct->c_begin);
1432
1433         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1434                 return (long) st.st_size;
1435
1436         if (ce->ce_file) {
1437                 if (stat(ce->ce_file, &st) != NOTOK)
1438                         return (long) st.st_size;
1439                 else
1440                         return 0L;
1441         }
1442
1443         if (ct->c_encoding == CE_EXTERNAL)
1444                 return (ct->c_end - ct->c_begin);
1445
1446         file = NULL;
1447         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1448                 return (ct->c_end - ct->c_begin);
1449
1450         if (fstat(fd, &st) != NOTOK)
1451                 size = (long) st.st_size;
1452         else
1453                 size = 0L;
1454
1455         (*ct->c_ceclosefnx) (ct);
1456         return size;
1457 }
1458
1459
1460 /*
1461 ** BASE64
1462 */
1463
1464 static unsigned char b642nib[0x80] = {
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1471         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1472         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1474         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1475         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1476         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1477         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1478         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1479         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1480         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1481 };
1482
1483
1484 static int
1485 InitBase64(CT ct)
1486 {
1487         return init_encoding(ct, openBase64);
1488 }
1489
1490
1491 static int
1492 openBase64(CT ct, char **file)
1493 {
1494         int bitno, cc;
1495         int fd, len, skip, own_ct_fp = 0;
1496         unsigned long bits;
1497         unsigned char value, *b, *b1, *b2, *b3;
1498         unsigned char *cp, *ep;
1499         char buffer[BUFSIZ];
1500         /* sbeck -- handle suffixes */
1501         CI ci;
1502         CE ce;
1503
1504         b  = (unsigned char *) &bits;
1505         b1 = &b[endian > 0 ? 1 : 2];
1506         b2 = &b[endian > 0 ? 2 : 1];
1507         b3 = &b[endian > 0 ? 3 : 0];
1508
1509         ce = ct->c_cefile;
1510         if (ce->ce_fp) {
1511                 fseek(ce->ce_fp, 0L, SEEK_SET);
1512                 goto ready_to_go;
1513         }
1514
1515         if (ce->ce_file) {
1516                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1517                         content_error(ce->ce_file, ct,
1518                                         "unable to fopen for reading");
1519                         return NOTOK;
1520                 }
1521                 goto ready_to_go;
1522         }
1523
1524         if (*file == NULL) {
1525                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1526                 ce->ce_unlink = 1;
1527         } else {
1528                 ce->ce_file = getcpy(*file);
1529                 ce->ce_unlink = 0;
1530         }
1531
1532         /* sbeck@cise.ufl.edu -- handle suffixes */
1533         ci = &ct->c_ctinfo;
1534         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1535                         invo_name, ci->ci_type, ci->ci_subtype);
1536         cp = context_find(buffer);
1537         if (cp == NULL || *cp == '\0') {
1538                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1539                                 ci->ci_type);
1540                 cp = context_find(buffer);
1541         }
1542         if (cp != NULL && *cp != '\0') {
1543                 if (ce->ce_unlink) {
1544                         /*
1545                         ** Temporary file already exists, so we rename to
1546                         ** version with extension.
1547                         */
1548                         char *file_org = strdup(ce->ce_file);
1549                         ce->ce_file = add(cp, ce->ce_file);
1550                         if (rename(file_org, ce->ce_file)) {
1551                                 adios(ce->ce_file, "unable to rename %s to ",
1552                                                 file_org);
1553                         }
1554                         free(file_org);
1555
1556                 } else {
1557                         ce->ce_file = add(cp, ce->ce_file);
1558                 }
1559         }
1560
1561         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1562                 content_error(ce->ce_file, ct,
1563                                 "unable to fopen for reading/writing");
1564                 return NOTOK;
1565         }
1566
1567         if ((len = ct->c_end - ct->c_begin) < 0)
1568                 adios(NULL, "internal error(1)");
1569
1570         if (!ct->c_fp) {
1571                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1572                         content_error(ct->c_file, ct,
1573                                         "unable to open for reading");
1574                         return NOTOK;
1575                 }
1576                 own_ct_fp = 1;
1577         }
1578
1579         bitno = 18;
1580         bits = 0L;
1581         skip = 0;
1582
1583         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1584         while (len > 0) {
1585                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1586                 case NOTOK:
1587                         content_error(ct->c_file, ct, "error reading from");
1588                         goto clean_up;
1589
1590                 case OK:
1591                         content_error(NULL, ct, "premature eof");
1592                         goto clean_up;
1593
1594                 default:
1595                         if (cc > len)
1596                                 cc = len;
1597                         len -= cc;
1598
1599                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1600                                 switch (*cp) {
1601                                 default:
1602                                         if (isspace(*cp))
1603                                                 break;
1604                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1605                                                 if (debugsw) {
1606                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1607                                                 }
1608                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1609                                                 continue;
1610                                         }
1611
1612                                         bits |= value << bitno;
1613 test_end:
1614                                         if ((bitno -= 6) < 0) {
1615                                                 putc((char) *b1, ce->ce_fp);
1616                                                 if (skip < 2) {
1617                                                         putc((char) *b2, ce->ce_fp);
1618                                                         if (skip < 1) {
1619                                                                 putc((char) *b3, ce->ce_fp);
1620                                                         }
1621                                                 }
1622
1623                                                 if (ferror(ce->ce_fp)) {
1624                                                         content_error(ce->ce_file, ct,
1625                                                                                    "error writing to");
1626                                                         goto clean_up;
1627                                                 }
1628                                                 bitno = 18, bits = 0L, skip = 0;
1629                                         }
1630                                         break;
1631
1632                                 case '=':
1633                                         if (++skip > 3)
1634                                                 goto self_delimiting;
1635                                         goto test_end;
1636                                 }
1637                         }
1638                 }
1639         }
1640
1641         if (bitno != 18) {
1642                 if (debugsw)
1643                         fprintf(stderr, "premature ending (bitno %d)\n",
1644                                         bitno);
1645
1646                 content_error(NULL, ct, "invalid BASE64 encoding");
1647                 goto clean_up;
1648         }
1649
1650 self_delimiting:
1651         fseek(ct->c_fp, 0L, SEEK_SET);
1652
1653         if (fflush(ce->ce_fp)) {
1654                 content_error(ce->ce_file, ct, "error writing to");
1655                 goto clean_up;
1656         }
1657
1658         fseek(ce->ce_fp, 0L, SEEK_SET);
1659
1660 ready_to_go:
1661         *file = ce->ce_file;
1662         if (own_ct_fp) {
1663                 fclose(ct->c_fp);
1664                 ct->c_fp = NULL;
1665         }
1666         return fileno(ce->ce_fp);
1667
1668 clean_up:
1669         free_encoding(ct, 0);
1670         if (own_ct_fp) {
1671                 fclose(ct->c_fp);
1672                 ct->c_fp = NULL;
1673         }
1674         return NOTOK;
1675 }
1676
1677
1678 /*
1679 ** QUOTED PRINTABLE
1680 */
1681
1682 static char hex2nib[0x80] = {
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1690         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1699 };
1700
1701
1702 static int
1703 InitQuoted(CT ct)
1704 {
1705         return init_encoding(ct, openQuoted);
1706 }
1707
1708
1709 static int
1710 openQuoted(CT ct, char **file)
1711 {
1712         int cc, len, quoted, own_ct_fp = 0;
1713         unsigned char *cp, *ep;
1714         char buffer[BUFSIZ];
1715         unsigned char mask = 0;
1716         CE ce;
1717         /* sbeck -- handle suffixes */
1718         CI ci;
1719
1720         ce = ct->c_cefile;
1721         if (ce->ce_fp) {
1722                 fseek(ce->ce_fp, 0L, SEEK_SET);
1723                 goto ready_to_go;
1724         }
1725
1726         if (ce->ce_file) {
1727                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1728                         content_error(ce->ce_file, ct,
1729                                         "unable to fopen for reading");
1730                         return NOTOK;
1731                 }
1732                 goto ready_to_go;
1733         }
1734
1735         if (*file == NULL) {
1736                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1737                 ce->ce_unlink = 1;
1738         } else {
1739                 ce->ce_file = getcpy(*file);
1740                 ce->ce_unlink = 0;
1741         }
1742
1743         /* sbeck@cise.ufl.edu -- handle suffixes */
1744         ci = &ct->c_ctinfo;
1745         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1746                         invo_name, ci->ci_type, ci->ci_subtype);
1747         cp = context_find(buffer);
1748         if (cp == NULL || *cp == '\0') {
1749                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1750                                 ci->ci_type);
1751                 cp = context_find(buffer);
1752         }
1753         if (cp != NULL && *cp != '\0') {
1754                 if (ce->ce_unlink) {
1755                         /*
1756                         ** Temporary file already exists, so we rename to
1757                         ** version with extension.
1758                         */
1759                         char *file_org = strdup(ce->ce_file);
1760                         ce->ce_file = add(cp, ce->ce_file);
1761                         if (rename(file_org, ce->ce_file)) {
1762                                 adios(ce->ce_file, "unable to rename %s to ",
1763                                                 file_org);
1764                         }
1765                         free(file_org);
1766
1767                 } else {
1768                         ce->ce_file = add(cp, ce->ce_file);
1769                 }
1770         }
1771
1772         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1773                 content_error(ce->ce_file, ct,
1774                                 "unable to fopen for reading/writing");
1775                 return NOTOK;
1776         }
1777
1778         if ((len = ct->c_end - ct->c_begin) < 0)
1779                 adios(NULL, "internal error(2)");
1780
1781         if (!ct->c_fp) {
1782                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1783                         content_error(ct->c_file, ct,
1784                                         "unable to open for reading");
1785                         return NOTOK;
1786                 }
1787                 own_ct_fp = 1;
1788         }
1789
1790         quoted = 0;
1791
1792         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1793         while (len > 0) {
1794                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1795                         content_error(NULL, ct, "premature eof");
1796                         goto clean_up;
1797                 }
1798
1799                 if ((cc = strlen(buffer)) > len)
1800                         cc = len;
1801                 len -= cc;
1802
1803                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1804                         if (!isspace(*ep))
1805                                 break;
1806                 *++ep = '\n', ep++;
1807
1808                 for (; cp < ep; cp++) {
1809                         if (quoted > 0) {
1810                                 /* in an escape sequence */
1811                                 if (quoted == 1) {
1812                                         /* at byte 1 of an escape sequence */
1813                                         mask = hex2nib[*cp & 0x7f];
1814                                         /* next is byte 2 */
1815                                         quoted = 2;
1816                                 } else {
1817                                         /* at byte 2 of an escape sequence */
1818                                         mask <<= 4;
1819                                         mask |= hex2nib[*cp & 0x7f];
1820                                         putc(mask, ce->ce_fp);
1821                                         if (ferror(ce->ce_fp)) {
1822                                                 content_error(ce->ce_file, ct, "error writing to");
1823                                                 goto clean_up;
1824                                         }
1825                                         /*
1826                                         ** finished escape sequence; next may
1827                                         ** be literal or a new escape sequence
1828                                         */
1829                                         quoted = 0;
1830                                 }
1831                                 /* on to next byte */
1832                                 continue;
1833                         }
1834
1835                         /* not in an escape sequence */
1836                         if (*cp == '=') {
1837                                 /*
1838                                 ** starting an escape sequence,
1839                                 ** or invalid '='?
1840                                 */
1841                                 if (cp + 1 < ep && cp[1] == '\n') {
1842                                         /* "=\n" soft line break, eat the \n */
1843                                         cp++;
1844                                         continue;
1845                                 }
1846                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1847                                         /*
1848                                         ** We don't have 2 bytes left,
1849                                         ** so this is an invalid escape
1850                                         ** sequence; just show the raw bytes
1851                                         ** (below).
1852                                         */
1853                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1854                                         /*
1855                                         ** Next 2 bytes are hex digits,
1856                                         ** making this a valid escape
1857                                         ** sequence; let's decode it (above).
1858                                         */
1859                                         quoted = 1;
1860                                         continue;
1861                                 } else {
1862                                         /*
1863                                         ** One or both of the next 2 is
1864                                         ** out of range, making this an
1865                                         ** invalid escape sequence; just
1866                                         ** show the raw bytes (below).
1867                                         */
1868                                 }
1869                         }
1870
1871                         /* Just show the raw byte. */
1872                         putc(*cp, ce->ce_fp);
1873                         if (ferror(ce->ce_fp)) {
1874                                 content_error(ce->ce_file, ct,
1875                                                 "error writing to");
1876                                 goto clean_up;
1877                         }
1878                 }
1879         }
1880         if (quoted) {
1881                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1882                 goto clean_up;
1883         }
1884
1885         fseek(ct->c_fp, 0L, SEEK_SET);
1886
1887         if (fflush(ce->ce_fp)) {
1888                 content_error(ce->ce_file, ct, "error writing to");
1889                 goto clean_up;
1890         }
1891
1892         fseek(ce->ce_fp, 0L, SEEK_SET);
1893
1894 ready_to_go:
1895         *file = ce->ce_file;
1896         if (own_ct_fp) {
1897                 fclose(ct->c_fp);
1898                 ct->c_fp = NULL;
1899         }
1900         return fileno(ce->ce_fp);
1901
1902 clean_up:
1903         free_encoding(ct, 0);
1904         if (own_ct_fp) {
1905                 fclose(ct->c_fp);
1906                 ct->c_fp = NULL;
1907         }
1908         return NOTOK;
1909 }
1910
1911
1912 /*
1913 ** 7BIT
1914 */
1915
1916 static int
1917 Init7Bit(CT ct)
1918 {
1919         if (init_encoding(ct, open7Bit) == NOTOK)
1920                 return NOTOK;
1921
1922         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1923         return OK;
1924 }
1925
1926
1927 int
1928 open7Bit(CT ct, char **file)
1929 {
1930         int cc, fd, len, own_ct_fp = 0;
1931         char buffer[BUFSIZ];
1932         /* sbeck -- handle suffixes */
1933         char *cp;
1934         CI ci;
1935         CE ce;
1936
1937         ce = ct->c_cefile;
1938         if (ce->ce_fp) {
1939                 fseek(ce->ce_fp, 0L, SEEK_SET);
1940                 goto ready_to_go;
1941         }
1942
1943         if (ce->ce_file) {
1944                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1945                         content_error(ce->ce_file, ct,
1946                                         "unable to fopen for reading");
1947                         return NOTOK;
1948                 }
1949                 goto ready_to_go;
1950         }
1951
1952         if (*file == NULL) {
1953                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1954                 ce->ce_unlink = 1;
1955         } else {
1956                 ce->ce_file = getcpy(*file);
1957                 ce->ce_unlink = 0;
1958         }
1959
1960         /* sbeck@cise.ufl.edu -- handle suffixes */
1961         ci = &ct->c_ctinfo;
1962         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1963                         invo_name, ci->ci_type, ci->ci_subtype);
1964         cp = context_find(buffer);
1965         if (cp == NULL || *cp == '\0') {
1966                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1967                                 ci->ci_type);
1968                 cp = context_find(buffer);
1969         }
1970         if (cp != NULL && *cp != '\0') {
1971                 if (ce->ce_unlink) {
1972                         /*
1973                         ** Temporary file already exists, so we rename to
1974                         ** version with extension.
1975                         */
1976                         char *file_org = strdup(ce->ce_file);
1977                         ce->ce_file = add(cp, ce->ce_file);
1978                         if (rename(file_org, ce->ce_file)) {
1979                                 adios(ce->ce_file, "unable to rename %s to ",
1980                                                 file_org);
1981                         }
1982                         free(file_org);
1983
1984                 } else {
1985                         ce->ce_file = add(cp, ce->ce_file);
1986                 }
1987         }
1988
1989         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1990                 content_error(ce->ce_file, ct,
1991                                 "unable to fopen for reading/writing");
1992                 return NOTOK;
1993         }
1994
1995         if (ct->c_type == CT_MULTIPART) {
1996                 char **ap, **ep;
1997                 CI ci = &ct->c_ctinfo;
1998
1999                 len = 0;
2000                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2001                                 ci->ci_subtype);
2002                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2003                                 strlen(ci->ci_subtype);
2004                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2005                         putc(';', ce->ce_fp);
2006                         len++;
2007
2008                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2009                                         *ap, *ep);
2010
2011                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2012                                 fputs("\n\t", ce->ce_fp);
2013                                 len = 8;
2014                         } else {
2015                                 putc(' ', ce->ce_fp);
2016                                 len++;
2017                         }
2018                         fprintf(ce->ce_fp, "%s", buffer);
2019                         len += cc;
2020                 }
2021
2022                 if (ci->ci_comment) {
2023                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2024                                                 >= CPERLIN) {
2025                                 fputs("\n\t", ce->ce_fp);
2026                                 len = 8;
2027                         } else {
2028                                 putc(' ', ce->ce_fp);
2029                                 len++;
2030                         }
2031                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2032                         len += cc;
2033                 }
2034                 fprintf(ce->ce_fp, "\n");
2035                 if (ct->c_id)
2036                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2037                 if (ct->c_descr)
2038                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2039                 if (ct->c_dispo)
2040                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2041                 fprintf(ce->ce_fp, "\n");
2042         }
2043
2044         if ((len = ct->c_end - ct->c_begin) < 0)
2045                 adios(NULL, "internal error(3)");
2046
2047         if (!ct->c_fp) {
2048                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2049                         content_error(ct->c_file, ct,
2050                                         "unable to open for reading");
2051                         return NOTOK;
2052                 }
2053                 own_ct_fp = 1;
2054         }
2055
2056         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2057         while (len > 0)
2058                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2059                 case NOTOK:
2060                         content_error(ct->c_file, ct, "error reading from");
2061                         goto clean_up;
2062
2063                 case OK:
2064                         content_error(NULL, ct, "premature eof");
2065                         goto clean_up;
2066
2067                 default:
2068                         if (cc > len)
2069                                 cc = len;
2070                         len -= cc;
2071
2072                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2073                         if (ferror(ce->ce_fp)) {
2074                                 content_error(ce->ce_file, ct,
2075                                                 "error writing to");
2076                                 goto clean_up;
2077                         }
2078                 }
2079
2080         fseek(ct->c_fp, 0L, SEEK_SET);
2081
2082         if (fflush(ce->ce_fp)) {
2083                 content_error(ce->ce_file, ct, "error writing to");
2084                 goto clean_up;
2085         }
2086
2087         fseek(ce->ce_fp, 0L, SEEK_SET);
2088
2089 ready_to_go:
2090         *file = ce->ce_file;
2091         if (own_ct_fp) {
2092                 fclose(ct->c_fp);
2093                 ct->c_fp = NULL;
2094         }
2095         return fileno(ce->ce_fp);
2096
2097 clean_up:
2098         free_encoding(ct, 0);
2099         if (own_ct_fp) {
2100                 fclose(ct->c_fp);
2101                 ct->c_fp = NULL;
2102         }
2103         return NOTOK;
2104 }