mhshow/mhstore: Removed support for retrieving message/external-body parts.
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <h/signals.h>
12 #include <errno.h>
13 #include <setjmp.h>
14 #include <signal.h>
15 #include <h/tws.h>
16 #include <h/mime.h>
17 #include <h/mhparse.h>
18 #include <h/utils.h>
19
20 extern int debugsw;
21
22 extern int endian;  /* mhmisc.c */
23
24 extern pid_t xpid;  /* mhshowsbr.c  */
25
26 /*
27 ** Directory to place temp files.  This must
28 ** be set before these routines are called.
29 */
30 char *tmp;
31
32 /*
33 ** Structures for TEXT messages
34 */
35 struct k2v SubText[] = {
36         { "plain", TEXT_PLAIN },
37         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
38         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
39         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
40 };
41
42 struct k2v Charset[] = {
43         { "us-ascii",   CHARSET_USASCII },
44         { "iso-8859-1", CHARSET_LATIN },
45         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
46 };
47
48 /*
49 ** Structures for MULTIPART messages
50 */
51 struct k2v SubMultiPart[] = {
52         { "mixed",       MULTI_MIXED },
53         { "alternative", MULTI_ALTERNATE },
54         { "digest",      MULTI_DIGEST },
55         { "parallel",    MULTI_PARALLEL },
56         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
57 };
58
59 /*
60 ** Structures for MESSAGE messages
61 */
62 struct k2v SubMessage[] = {
63         { "rfc822",        MESSAGE_RFC822 },
64         { "partial",       MESSAGE_PARTIAL },
65         { "external-body", MESSAGE_EXTERNAL },
66         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
67 };
68
69 /*
70 ** Structure for APPLICATION messages
71 */
72 struct k2v SubApplication[] = {
73         { "octet-stream", APPLICATION_OCTETS },
74         { "postscript",   APPLICATION_POSTSCRIPT },
75         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
76 };
77
78
79 /* mhmisc.c */
80 int part_ok(CT, int);
81 int type_ok(CT, int);
82 int make_intermediates(char *);
83 void content_error(char *, CT, char *, ...);
84
85 /* mhfree.c */
86 void free_content(CT);
87 void free_encoding(CT, int);
88
89 /*
90 ** static prototypes
91 */
92 static CT get_content(FILE *, char *, int);
93 static int get_comment(CT, unsigned char **, int);
94
95 static int InitGeneric(CT);
96 static int InitText(CT);
97 static int InitMultiPart(CT);
98 static void reverse_parts(CT);
99 static int InitMessage(CT);
100 static int InitApplication(CT);
101 static int init_encoding(CT, OpenCEFunc);
102 static unsigned long size_encoding(CT);
103 static int InitBase64(CT);
104 static int openBase64(CT, char **);
105 static int InitQuoted(CT);
106 static int openQuoted(CT, char **);
107 static int Init7Bit(CT);
108
109 struct str2init str2cts[] = {
110         { "application", CT_APPLICATION, InitApplication },
111         { "audio",       CT_AUDIO,       InitGeneric },
112         { "image",       CT_IMAGE,       InitGeneric },
113         { "message",     CT_MESSAGE,     InitMessage },
114         { "multipart",   CT_MULTIPART,   InitMultiPart },
115         { "text",        CT_TEXT,        InitText },
116         { "video",       CT_VIDEO,       InitGeneric },
117         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
118         { NULL,          CT_UNKNOWN,     NULL },
119 };
120
121 struct str2init str2ces[] = {
122         { "base64",           CE_BASE64,    InitBase64 },
123         { "quoted-printable", CE_QUOTED,    InitQuoted },
124         { "8bit",             CE_8BIT,      Init7Bit },
125         { "7bit",             CE_7BIT,      Init7Bit },
126         { "binary",           CE_BINARY,    Init7Bit },
127         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
128         { NULL,               CE_UNKNOWN,    NULL },
129 };
130
131
132 int
133 pidcheck(int status)
134 {
135         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
136                 return status;
137
138         fflush(stdout);
139         fflush(stderr);
140         done(1);
141         return 1;
142 }
143
144
145 /*
146 ** Main entry point for parsing a MIME message or file.
147 ** It returns the Content structure for the top level
148 ** entity in the file.
149 */
150 CT
151 parse_mime(char *file)
152 {
153         int is_stdin;
154         char buffer[BUFSIZ];
155         FILE *fp;
156         CT ct;
157
158         /*
159         ** Check if file is actually standard input
160         */
161         if ((is_stdin = (strcmp(file, "-")==0))) {
162                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
163                 if (tfile == NULL) {
164                         advise("mhparse", "unable to create temporary file");
165                         return NULL;
166                 }
167                 file = getcpy(tfile);
168                 chmod(file, 0600);
169
170                 while (fgets(buffer, sizeof(buffer), stdin))
171                         fputs(buffer, fp);
172                 fflush(fp);
173
174                 if (ferror(stdin)) {
175                         unlink(file);
176                         advise("stdin", "error reading");
177                         return NULL;
178                 }
179                 if (ferror(fp)) {
180                         unlink(file);
181                         advise(file, "error writing");
182                         return NULL;
183                 }
184                 fseek(fp, 0L, SEEK_SET);
185         } else if ((fp = fopen(file, "r")) == NULL) {
186                 advise(file, "unable to read");
187                 return NULL;
188         }
189
190         if (!(ct = get_content(fp, file, 1))) {
191                 if (is_stdin)
192                         unlink(file);
193                 advise(NULL, "unable to decode %s", file);
194                 return NULL;
195         }
196
197         if (is_stdin)
198                 ct->c_unlink = 1;  /* temp file to remove */
199
200         ct->c_fp = NULL;
201
202         if (ct->c_end == 0L) {
203                 fseek(fp, 0L, SEEK_END);
204                 ct->c_end = ftell(fp);
205         }
206
207         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
208                 fclose(fp);
209                 free_content(ct);
210                 return NULL;
211         }
212
213         fclose(fp);
214         return ct;
215 }
216
217
218 /*
219 ** Main routine for reading/parsing the headers
220 ** of a message content.
221 **
222 ** toplevel =  1   # we are at the top level of the message
223 ** toplevel =  0   # we are inside message type or multipart type
224 **                 # other than multipart/digest
225 ** toplevel = -1   # we are inside multipart/digest
226 ** NB: on failure we will fclose(in)!
227 */
228
229 static CT
230 get_content(FILE *in, char *file, int toplevel)
231 {
232         int compnum, state;
233         char buf[BUFSIZ], name[NAMESZ];
234         char *np, *vp;
235         CT ct;
236         HF hp;
237
238         /* allocate the content structure */
239         if (!(ct = (CT) calloc(1, sizeof(*ct))))
240                 adios(NULL, "out of memory");
241
242         ct->c_fp = in;
243         ct->c_file = getcpy(file);
244         ct->c_begin = ftell(ct->c_fp) + 1;
245
246         /*
247         ** Parse the header fields for this
248         ** content into a linked list.
249         */
250         for (compnum = 1, state = FLD;;) {
251                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
252                 case FLD:
253                 case FLDPLUS:
254                 case FLDEOF:
255                         compnum++;
256
257                         /* get copies of the buffers */
258                         np = getcpy(name);
259                         vp = getcpy(buf);
260
261                         /* if necessary, get rest of field */
262                         while (state == FLDPLUS) {
263                                 state = m_getfld(state, name, buf,
264                                                 sizeof(buf), in);
265                                 vp = add(buf, vp);  /* add to previous value */
266                         }
267
268                         /* Now add the header data to the list */
269                         add_header(ct, np, vp);
270
271                         /* continue, if this isn't the last header field */
272                         if (state != FLDEOF) {
273                                 ct->c_begin = ftell(in) + 1;
274                                 continue;
275                         }
276                         /* else fall... */
277
278                 case BODY:
279                 case BODYEOF:
280                         ct->c_begin = ftell(in) - strlen(buf);
281                         break;
282
283                 case FILEEOF:
284                         ct->c_begin = ftell(in);
285                         break;
286
287                 case LENERR:
288                 case FMTERR:
289                         adios(NULL, "message format error in component #%d",
290                                         compnum);
291
292                 default:
293                         adios(NULL, "getfld() returned %d", state);
294                 }
295
296                 /* break out of the loop */
297                 break;
298         }
299
300         /*
301         ** Read the content headers.  We will parse the
302         ** MIME related header fields into their various
303         ** structures and set internal flags related to
304         ** content type/subtype, etc.
305         */
306
307         hp = ct->c_first_hf;  /* start at first header field */
308         while (hp) {
309                 /* Get MIME-Version field */
310                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
311                         int ucmp;
312                         char c;
313                         unsigned char *cp, *dp;
314
315                         if (ct->c_vrsn) {
316                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
317                                 goto next_header;
318                         }
319                         ct->c_vrsn = getcpy(hp->value);
320
321                         /* Now, cleanup this field */
322                         cp = ct->c_vrsn;
323
324                         while (isspace(*cp))
325                                 cp++;
326                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
327                                 *dp++ = ' ';
328                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
329                                 if (!isspace(*dp))
330                                         break;
331                         *++dp = '\0';
332                         if (debugsw)
333                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
334
335                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
336                                 goto out;
337
338                         for (dp = cp; istoken(*dp); dp++)
339                                 continue;
340                         c = *dp;
341                         *dp = '\0';
342                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
343                         *dp = c;
344                         if (!ucmp) {
345                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
346                         }
347
348                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349                         /* Get Content-Type field */
350                         struct str2init *s2i;
351                         CI ci = &ct->c_ctinfo;
352
353                         /* Check if we've already seen a Content-Type header */
354                         if (ct->c_ctline) {
355                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
356                                 goto next_header;
357                         }
358
359                         /* Parse the Content-Type field */
360                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
361                                 goto out;
362
363                         /*
364                         ** Set the Init function and the internal
365                         ** flag for this content type.
366                         */
367                         for (s2i = str2cts; s2i->si_key; s2i++)
368                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
369                                         break;
370                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
371                                 s2i++;
372                         ct->c_type = s2i->si_val;
373                         ct->c_ctinitfnx = s2i->si_init;
374
375                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376                         /* Get Content-Transfer-Encoding field */
377                         char c;
378                         unsigned char *cp, *dp;
379                         struct str2init *s2i;
380
381                         /*
382                         ** Check if we've already seen the
383                         ** Content-Transfer-Encoding field
384                         */
385                         if (ct->c_celine) {
386                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
387                                 goto next_header;
388                         }
389
390                         /* get copy of this field */
391                         ct->c_celine = cp = getcpy(hp->value);
392
393                         while (isspace(*cp))
394                                 cp++;
395                         for (dp = cp; istoken(*dp); dp++)
396                                 continue;
397                         c = *dp;
398                         *dp = '\0';
399
400                         /*
401                         ** Find the internal flag and Init function
402                         ** for this transfer encoding.
403                         */
404                         for (s2i = str2ces; s2i->si_key; s2i++)
405                                 if (!mh_strcasecmp(cp, s2i->si_key))
406                                         break;
407                         if (!s2i->si_key && !uprf(cp, "X-"))
408                                 s2i++;
409                         *dp = c;
410                         ct->c_encoding = s2i->si_val;
411
412                         /* Call the Init function for this encoding */
413                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
414                                 goto out;
415
416                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417                         /* Get Content-ID field */
418                         ct->c_id = add(hp->value, ct->c_id);
419
420                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421                         /* Get Content-Description field */
422                         ct->c_descr = add(hp->value, ct->c_descr);
423
424                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425                         /* Get Content-Disposition field */
426                         ct->c_dispo = add(hp->value, ct->c_dispo);
427                 }
428
429 next_header:
430                 hp = hp->next;  /* next header field */
431         }
432
433         /*
434         ** Check if we saw a Content-Type field.
435         ** If not, then assign a default value for
436         ** it, and the Init function.
437         */
438         if (!ct->c_ctline) {
439                 /*
440                 ** If we are inside a multipart/digest message,
441                 ** so default type is message/rfc822
442                 */
443                 if (toplevel < 0) {
444                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
445                                 goto out;
446                         ct->c_type = CT_MESSAGE;
447                         ct->c_ctinitfnx = InitMessage;
448                 } else {
449                         /*
450                         ** Else default type is text/plain
451                         */
452                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
453                                 goto out;
454                         ct->c_type = CT_TEXT;
455                         ct->c_ctinitfnx = InitText;
456                 }
457         }
458
459         /* Use default Transfer-Encoding, if necessary */
460         if (!ct->c_celine) {
461                 ct->c_encoding = CE_7BIT;
462                 Init7Bit(ct);
463         }
464
465         return ct;
466
467 out:
468         free_content(ct);
469         return NULL;
470 }
471
472
473 /*
474 ** small routine to add header field to list
475 */
476
477 int
478 add_header(CT ct, char *name, char *value)
479 {
480         HF hp;
481
482         /* allocate header field structure */
483         hp = mh_xmalloc(sizeof(*hp));
484
485         /* link data into header structure */
486         hp->name = name;
487         hp->value = value;
488         hp->next = NULL;
489
490         /* link header structure into the list */
491         if (ct->c_first_hf == NULL) {
492                 ct->c_first_hf = hp;  /* this is the first */
493                 ct->c_last_hf = hp;
494         } else {
495                 ct->c_last_hf->next = hp;  /* add it to the end */
496                 ct->c_last_hf = hp;
497         }
498
499         return 0;
500 }
501
502
503 /*
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =.  If not, insert both name and value, just after
506 ** first semicolon, if any.  Note that name should not contain a
507 ** trailing =.  And quotes will be added around the value.  Typical
508 ** usage:  make sure that a Content-Disposition header contains
509 ** filename="foo".  If it doesn't and value does, use value from
510 ** that.
511 */
512 static char *
513 incl_name_value(unsigned char *buf, char *name, char *value) {
514         char *newbuf = buf;
515
516         /* Assume that name is non-null. */
517         if (buf && value) {
518                 char *name_plus_equal = concat(name, "=", NULL);
519
520                 if (!strstr(buf, name_plus_equal)) {
521                         char *insertion;
522                         unsigned char *cp;
523                         char *prefix, *suffix;
524
525                         /* Trim trailing space, esp. newline. */
526                         for (cp = &buf[strlen(buf) - 1];
527                                          cp >= buf && isspace(*cp); --cp) {
528                                 *cp = '\0';
529                         }
530
531                         insertion = concat("; ", name, "=", "\"", value, "\"",
532                                         NULL);
533
534                         /*
535                         ** Insert at first semicolon, if any.
536                         ** If none, append to end.
537                         */
538                         prefix = getcpy(buf);
539                         if ((cp = strchr(prefix, ';'))) {
540                                 suffix = concat(cp, NULL);
541                                 *cp = '\0';
542                                 newbuf = concat(prefix, insertion, suffix,
543                                                 "\n", NULL);
544                                 free(suffix);
545                         } else {
546                                 /* Append to end. */
547                                 newbuf = concat(buf, insertion, "\n", NULL);
548                         }
549
550                         free(prefix);
551                         free(insertion);
552                         free(buf);
553                 }
554
555                 free(name_plus_equal);
556         }
557
558         return newbuf;
559 }
560
561 /*
562 ** Extract just name_suffix="foo", if any, from value.  If there isn't
563 ** one, return the entire value.  Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
565 */
566 static char *
567 extract_name_value(char *name_suffix, char *value) {
568         char *extracted_name_value = value;
569         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
570         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
571         char *cp;
572
573         free(name_suffix_plus_quote);
574         if (name_suffix_equals) {
575                 char *name_suffix_begin;
576
577                 /* Find first \". */
578                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
579                         ;
580                 name_suffix_begin = ++cp;
581                 /* Find second \". */
582                 for (; *cp != '"'; ++cp)
583                         ;
584
585                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
586                 memcpy(extracted_name_value, name_suffix_begin,
587                                 cp - name_suffix_begin);
588                 extracted_name_value[cp - name_suffix_begin] = '\0';
589         }
590
591         return extracted_name_value;
592 }
593
594 /*
595 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
596 ** directives.  Fills in the information of the CTinfo structure.
597 */
598 int
599 get_ctinfo(unsigned char *cp, CT ct, int magic)
600 {
601         int i;
602         unsigned char *dp;
603         char **ap, **ep;
604         char c;
605         CI ci;
606
607         ci = &ct->c_ctinfo;
608         i = strlen(invo_name) + 2;
609
610         /* store copy of Content-Type line */
611         cp = ct->c_ctline = getcpy(cp);
612
613         while (isspace(*cp))  /* trim leading spaces */
614                 cp++;
615
616         /* change newlines to spaces */
617         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
618                 *dp++ = ' ';
619
620         /* trim trailing spaces */
621         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
622                 if (!isspace(*dp))
623                         break;
624         *++dp = '\0';
625
626         if (debugsw)
627                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
628
629         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
630                 return NOTOK;
631
632         for (dp = cp; istoken(*dp); dp++)
633                 continue;
634         c = *dp, *dp = '\0';
635         ci->ci_type = getcpy(cp);  /* store content type */
636         *dp = c, cp = dp;
637
638         if (!*ci->ci_type) {
639                 advise(NULL, "invalid %s: field in message %s (empty type)",
640                                 TYPE_FIELD, ct->c_file);
641                 return NOTOK;
642         }
643
644         /* down case the content type string */
645         for (dp = ci->ci_type; *dp; dp++)
646                 if (isalpha(*dp) && isupper(*dp))
647                         *dp = tolower(*dp);
648
649         while (isspace(*cp))
650                 cp++;
651
652         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
653                 return NOTOK;
654
655         if (*cp != '/') {
656                 if (!magic)
657                         ci->ci_subtype = getcpy("");
658                 goto magic_skip;
659         }
660
661         cp++;
662         while (isspace(*cp))
663                 cp++;
664
665         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
666                 return NOTOK;
667
668         for (dp = cp; istoken(*dp); dp++)
669                 continue;
670         c = *dp, *dp = '\0';
671         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
672         *dp = c, cp = dp;
673
674         if (!*ci->ci_subtype) {
675                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
676                 return NOTOK;
677         }
678
679         /* down case the content subtype string */
680         for (dp = ci->ci_subtype; *dp; dp++)
681                 if (isalpha(*dp) && isupper(*dp))
682                         *dp = tolower(*dp);
683
684 magic_skip:
685         while (isspace(*cp))
686                 cp++;
687
688         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
689                 return NOTOK;
690
691         /*
692         ** Parse attribute/value pairs given with Content-Type
693         */
694         ep = (ap = ci->ci_attrs) + NPARMS;
695         while (*cp == ';') {
696                 char *vp;
697                 unsigned char *up;
698
699                 if (ap >= ep) {
700                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
701                         return NOTOK;
702                 }
703
704                 cp++;
705                 while (isspace(*cp))
706                         cp++;
707
708                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
709                         return NOTOK;
710
711                 if (*cp == 0) {
712                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
713                         return OK;
714                 }
715
716                 /* down case the attribute name */
717                 for (dp = cp; istoken(*dp); dp++)
718                         if (isalpha(*dp) && isupper(*dp))
719                                 *dp = tolower(*dp);
720
721                 for (up = dp; isspace(*dp);)
722                         dp++;
723                 if (dp == cp || *dp != '=') {
724                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
725                         return NOTOK;
726                 }
727
728                 vp = (*ap = getcpy(cp)) + (up - cp);
729                 *vp = '\0';
730                 for (dp++; isspace(*dp);)
731                         dp++;
732
733                 /* now add the attribute value */
734                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
735
736                 if (*dp == '"') {
737                         for (cp = ++dp, dp = vp;;) {
738                                 switch (c = *cp++) {
739                                 case '\0':
740 bad_quote:
741                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
742                                         return NOTOK;
743
744                                 case '\\':
745                                         *dp++ = c;
746                                         if ((c = *cp++) == '\0')
747                                                 goto bad_quote;
748                                         /* else fall... */
749
750                                 default:
751                                         *dp++ = c;
752                                         continue;
753
754                                 case '"':
755                                         *dp = '\0';
756                                         break;
757                                 }
758                                 break;
759                         }
760                 } else {
761                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
762                                 continue;
763                         *dp = '\0';
764                 }
765                 if (!*vp) {
766                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
767                         return NOTOK;
768                 }
769                 ap++;
770
771                 while (isspace(*cp))
772                         cp++;
773
774                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
775                         return NOTOK;
776         }
777
778         /*
779         ** Get any <Content-Id> given in buffer
780         */
781         if (magic && *cp == '<') {
782                 if (ct->c_id) {
783                         free(ct->c_id);
784                         ct->c_id = NULL;
785                 }
786                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
787                         advise(NULL, "invalid ID in message %s", ct->c_file);
788                         return NOTOK;
789                 }
790                 c = *dp;
791                 *dp = '\0';
792                 if (*ct->c_id)
793                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
794                 else
795                         ct->c_id = NULL;
796                 *dp++ = c;
797                 cp = dp;
798
799                 while (isspace(*cp))
800                         cp++;
801         }
802
803         /*
804         ** Get any [Content-Description] given in buffer.
805         */
806         if (magic && *cp == '[') {
807                 ct->c_descr = ++cp;
808                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
809                         if (*dp == ']')
810                                 break;
811                 if (dp < cp) {
812                         advise(NULL, "invalid description in message %s",
813                                         ct->c_file);
814                         ct->c_descr = NULL;
815                         return NOTOK;
816                 }
817
818                 c = *dp;
819                 *dp = '\0';
820                 if (*ct->c_descr)
821                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
822                 else
823                         ct->c_descr = NULL;
824                 *dp++ = c;
825                 cp = dp;
826
827                 while (isspace(*cp))
828                         cp++;
829         }
830
831         /*
832         ** Get any {Content-Disposition} given in buffer.
833         */
834         if (magic && *cp == '{') {
835                 ct->c_dispo = ++cp;
836                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
837                         if (*dp == '}')
838                                 break;
839                 if (dp < cp) {
840                         advise(NULL, "invalid disposition in message %s",
841                                         ct->c_file);
842                         ct->c_dispo = NULL;
843                         return NOTOK;
844                 }
845
846                 c = *dp;
847                 *dp = '\0';
848                 if (*ct->c_dispo)
849                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
850                 else
851                         ct->c_dispo = NULL;
852                 *dp++ = c;
853                 cp = dp;
854
855                 while (isspace(*cp))
856                         cp++;
857         }
858
859         /*
860         ** Check if anything is left over
861         */
862         if (*cp) {
863                 if (magic) {
864                         ci->ci_magic = getcpy(cp);
865
866                         /*
867                         ** If there is a Content-Disposition header and
868                         ** it doesn't have a *filename=, extract it from
869                         ** the magic contents.  The mhbasename call skips
870                         ** any leading directory components.
871                         */
872                         if (ct->c_dispo)
873                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
874                         } else
875                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
876         }
877
878         return OK;
879 }
880
881
882 static int
883 get_comment(CT ct, unsigned char **ap, int istype)
884 {
885         int i;
886         char *bp;
887         unsigned char *cp;
888         char c, buffer[BUFSIZ], *dp;
889         CI ci;
890
891         ci = &ct->c_ctinfo;
892         cp = *ap;
893         bp = buffer;
894         cp++;
895
896         for (i = 0;;) {
897                 switch (c = *cp++) {
898                 case '\0':
899 invalid:
900                 advise(NULL, "invalid comment in message %s's %s: field",
901                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
902                 return NOTOK;
903
904                 case '\\':
905                         *bp++ = c;
906                         if ((c = *cp++) == '\0')
907                                 goto invalid;
908                         *bp++ = c;
909                         continue;
910
911                 case '(':
912                         i++;
913                         /* and fall... */
914                 default:
915                         *bp++ = c;
916                         continue;
917
918                 case ')':
919                         if (--i < 0)
920                                 break;
921                         *bp++ = c;
922                         continue;
923                 }
924                 break;
925         }
926         *bp = '\0';
927
928         if (istype) {
929                 if ((dp = ci->ci_comment)) {
930                         ci->ci_comment = concat(dp, " ", buffer, NULL);
931                         free(dp);
932                 } else {
933                         ci->ci_comment = getcpy(buffer);
934                 }
935         }
936
937         while (isspace(*cp))
938                 cp++;
939
940         *ap = cp;
941         return OK;
942 }
943
944
945 /*
946 ** CONTENTS
947 **
948 ** Handles content types audio, image, and video.
949 ** There's not much to do right here.
950 */
951
952 static int
953 InitGeneric(CT ct)
954 {
955         return OK;  /* not much to do here */
956 }
957
958
959 /*
960 ** TEXT
961 */
962
963 static int
964 InitText(CT ct)
965 {
966         char buffer[BUFSIZ];
967         char *chset = NULL;
968         char **ap, **ep, *cp;
969         struct k2v *kv;
970         struct text *t;
971         CI ci = &ct->c_ctinfo;
972
973         /* check for missing subtype */
974         if (!*ci->ci_subtype)
975                 ci->ci_subtype = add("plain", ci->ci_subtype);
976
977         /* match subtype */
978         for (kv = SubText; kv->kv_key; kv++)
979                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
980                         break;
981         ct->c_subtype = kv->kv_value;
982
983         /* allocate text character set structure */
984         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
985                 adios(NULL, "out of memory");
986         ct->c_ctparams = (void *) t;
987
988         /* scan for charset parameter */
989         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
990                 if (!mh_strcasecmp(*ap, "charset"))
991                         break;
992
993         /* check if content specified a character set */
994         if (*ap) {
995                 /* match character set or set to CHARSET_UNKNOWN */
996                 for (kv = Charset; kv->kv_key; kv++) {
997                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
998                                 chset = *ep;
999                                 break;
1000                         }
1001                 }
1002                 t->tx_charset = kv->kv_value;
1003         } else {
1004                 t->tx_charset = CHARSET_UNSPECIFIED;
1005         }
1006
1007         /*
1008         ** If we can not handle character set natively,
1009         ** then check profile for string to modify the
1010         ** terminal or display method.
1011         **
1012         ** termproc is for mhshow, though mhlist -debug prints it, too.
1013         */
1014         if (chset != NULL && !check_charset(chset, strlen(chset))) {
1015                 snprintf(buffer, sizeof(buffer), "%s-charset-%s",
1016                                 invo_name, chset);
1017                 if ((cp = context_find(buffer)))
1018                         ct->c_termproc = getcpy(cp);
1019         }
1020
1021         return OK;
1022 }
1023
1024
1025 /*
1026 ** MULTIPART
1027 */
1028
1029 static int
1030 InitMultiPart(CT ct)
1031 {
1032         int inout;
1033         long last, pos;
1034         unsigned char *cp, *dp;
1035         char **ap, **ep;
1036         char *bp, buffer[BUFSIZ];
1037         struct multipart *m;
1038         struct k2v *kv;
1039         struct part *part, **next;
1040         CI ci = &ct->c_ctinfo;
1041         CT p;
1042         FILE *fp;
1043
1044         /*
1045         ** The encoding for multipart messages must be either
1046         ** 7bit, 8bit, or binary (per RFC2045).
1047         */
1048         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1049                 && ct->c_encoding != CE_BINARY) {
1050                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1051                 return NOTOK;
1052         }
1053
1054         /* match subtype */
1055         for (kv = SubMultiPart; kv->kv_key; kv++)
1056                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1057                         break;
1058         ct->c_subtype = kv->kv_value;
1059
1060         /*
1061         ** Check for "boundary" parameter, which is
1062         ** required for multipart messages.
1063         */
1064         bp = 0;
1065         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1066                 if (!mh_strcasecmp(*ap, "boundary")) {
1067                         bp = *ep;
1068                         break;
1069                 }
1070         }
1071
1072         /* complain if boundary parameter is missing */
1073         if (!*ap) {
1074                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1075                 return NOTOK;
1076         }
1077
1078         /* allocate primary structure for multipart info */
1079         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1080                 adios(NULL, "out of memory");
1081         ct->c_ctparams = (void *) m;
1082
1083         /* check if boundary parameter contains only whitespace characters */
1084         for (cp = bp; isspace(*cp); cp++)
1085                 continue;
1086         if (!*cp) {
1087                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1088                 return NOTOK;
1089         }
1090
1091         /* remove trailing whitespace from boundary parameter */
1092         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1093                 if (!isspace(*dp))
1094                         break;
1095         *++dp = '\0';
1096
1097         /* record boundary separators */
1098         m->mp_start = concat(bp, "\n", NULL);
1099         m->mp_stop = concat(bp, "--\n", NULL);
1100
1101         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1102                 advise(ct->c_file, "unable to open for reading");
1103                 return NOTOK;
1104         }
1105
1106         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1107         last = ct->c_end;
1108         next = &m->mp_parts;
1109         part = NULL;
1110         inout = 1;
1111
1112         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1113                 if (pos > last)
1114                         break;
1115
1116                 pos += strlen(buffer);
1117                 if (buffer[0] != '-' || buffer[1] != '-')
1118                         continue;
1119                 if (inout) {
1120                         if (strcmp(buffer + 2, m->mp_start)!=0)
1121                                 continue;
1122 next_part:
1123                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1124                                         == NULL)
1125                                 adios(NULL, "out of memory");
1126                         *next = part;
1127                         next = &part->mp_next;
1128
1129                         if (!(p = get_content(fp, ct->c_file,
1130                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1131                                 ct->c_fp = NULL;
1132                                 return NOTOK;
1133                         }
1134                         p->c_fp = NULL;
1135                         part->mp_part = p;
1136                         pos = p->c_begin;
1137                         fseek(fp, pos, SEEK_SET);
1138                         inout = 0;
1139                 } else {
1140                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1141                                 inout = 1;
1142 end_part:
1143                                 p = part->mp_part;
1144                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1145                                 if (p->c_end < p->c_begin)
1146                                         p->c_begin = p->c_end;
1147                                 if (inout)
1148                                         goto next_part;
1149                                 goto last_part;
1150                         } else {
1151                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1152                                         goto end_part;
1153                         }
1154                 }
1155         }
1156
1157         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1158         if (!inout && part) {
1159                 p = part->mp_part;
1160                 p->c_end = ct->c_end;
1161
1162                 if (p->c_begin >= p->c_end) {
1163                         for (next = &m->mp_parts; *next != part;
1164                                 next = &((*next)->mp_next))
1165                                 continue;
1166                         *next = NULL;
1167                         free_content(p);
1168                         free((char *) part);
1169                 }
1170         }
1171
1172 last_part:
1173         /* reverse the order of the parts for multipart/alternative */
1174         if (ct->c_subtype == MULTI_ALTERNATE)
1175                 reverse_parts(ct);
1176
1177         /*
1178         ** label all subparts with part number, and
1179         ** then initialize the content of the subpart.
1180         */
1181         {
1182                 int partnum;
1183                 char *pp;
1184                 char partnam[BUFSIZ];
1185
1186                 if (ct->c_partno) {
1187                         snprintf(partnam, sizeof(partnam), "%s.",
1188                                         ct->c_partno);
1189                         pp = partnam + strlen(partnam);
1190                 } else {
1191                         pp = partnam;
1192                 }
1193
1194                 for (part = m->mp_parts, partnum = 1; part;
1195                         part = part->mp_next, partnum++) {
1196                         p = part->mp_part;
1197
1198                         sprintf(pp, "%d", partnum);
1199                         p->c_partno = getcpy(partnam);
1200
1201                         /* initialize the content of the subparts */
1202                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1203                                 fclose(ct->c_fp);
1204                                 ct->c_fp = NULL;
1205                                 return NOTOK;
1206                         }
1207                 }
1208         }
1209
1210         fclose(ct->c_fp);
1211         ct->c_fp = NULL;
1212         return OK;
1213 }
1214
1215
1216 /*
1217 ** reverse the order of the parts of a multipart
1218 */
1219
1220 static void
1221 reverse_parts(CT ct)
1222 {
1223         int i;
1224         struct multipart *m;
1225         struct part **base, **bmp, **next, *part;
1226
1227         m = (struct multipart *) ct->c_ctparams;
1228
1229         /* if only one part, just return */
1230         if (!m->mp_parts || !m->mp_parts->mp_next)
1231                 return;
1232
1233         /* count number of parts */
1234         i = 0;
1235         for (part = m->mp_parts; part; part = part->mp_next)
1236                 i++;
1237
1238         /* allocate array of pointers to the parts */
1239         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1240                 adios(NULL, "out of memory");
1241         bmp = base;
1242
1243         /* point at all the parts */
1244         for (part = m->mp_parts; part; part = part->mp_next)
1245                 *bmp++ = part;
1246         *bmp = NULL;
1247
1248         /* reverse the order of the parts */
1249         next = &m->mp_parts;
1250         for (bmp--; bmp >= base; bmp--) {
1251                 part = *bmp;
1252                 *next = part;
1253                 next = &part->mp_next;
1254         }
1255         *next = NULL;
1256
1257         /* free array of pointers */
1258         free((char *) base);
1259 }
1260
1261
1262 /*
1263 ** MESSAGE
1264 */
1265
1266 static int
1267 InitMessage(CT ct)
1268 {
1269         struct k2v *kv;
1270         CI ci = &ct->c_ctinfo;
1271
1272         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1273                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1274                 return NOTOK;
1275         }
1276
1277         /* check for missing subtype */
1278         if (!*ci->ci_subtype)
1279                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1280
1281         /* match subtype */
1282         for (kv = SubMessage; kv->kv_key; kv++)
1283                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1284                         break;
1285         ct->c_subtype = kv->kv_value;
1286
1287         switch (ct->c_subtype) {
1288         case MESSAGE_RFC822:
1289                 break;
1290
1291         case MESSAGE_PARTIAL:
1292                 {
1293                 char **ap, **ep;
1294                 struct partial *p;
1295
1296                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1297                         adios(NULL, "out of memory");
1298                 ct->c_ctparams = (void *) p;
1299
1300                 /*
1301                 ** scan for parameters "id", "number",
1302                 ** and "total"
1303                 */
1304                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1305                         if (!mh_strcasecmp(*ap, "id")) {
1306                                 p->pm_partid = getcpy(*ep);
1307                                 continue;
1308                         }
1309                         if (!mh_strcasecmp(*ap, "number")) {
1310                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1311 invalid_param:
1312                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1313                                         return NOTOK;
1314                                 }
1315                                 continue;
1316                         }
1317                         if (!mh_strcasecmp(*ap, "total")) {
1318                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1319                                                 p->pm_maxno < 1)
1320                                         goto invalid_param;
1321                                 continue;
1322                         }
1323                 }
1324
1325                 if (!p->pm_partid || !p->pm_partno
1326                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1327                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1328                         return NOTOK;
1329                 }
1330                 }
1331                 break;
1332
1333         case MESSAGE_EXTERNAL:
1334                 {
1335                 CT p;
1336                 FILE *fp;
1337
1338                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1339                         advise(ct->c_file, "unable to open for reading");
1340                         return NOTOK;
1341                 }
1342
1343                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1344
1345                 if (!(p = get_content(fp, ct->c_file, 0))) {
1346                         ct->c_fp = NULL;
1347                         return NOTOK;
1348                 }
1349
1350                 p->c_fp = NULL;
1351                 p->c_end = p->c_begin;
1352
1353                 fclose(ct->c_fp);
1354                 ct->c_fp = NULL;
1355
1356                 switch (p->c_type) {
1357                 case CT_MULTIPART:
1358                         break;
1359
1360                 case CT_MESSAGE:
1361                         if (p->c_subtype != MESSAGE_RFC822)
1362                                 break;
1363                         /* else fall... */
1364                 default:
1365                         if (p->c_ctinitfnx)
1366                                 (*p->c_ctinitfnx) (p);
1367                         break;
1368                 }
1369                 }
1370                 break;
1371
1372         default:
1373                 break;
1374         }
1375
1376         return OK;
1377 }
1378
1379
1380 /*
1381 ** APPLICATION
1382 */
1383
1384 static int
1385 InitApplication(CT ct)
1386 {
1387         struct k2v *kv;
1388         CI ci = &ct->c_ctinfo;
1389
1390         /* match subtype */
1391         for (kv = SubApplication; kv->kv_key; kv++)
1392                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1393                         break;
1394         ct->c_subtype = kv->kv_value;
1395
1396         return OK;
1397 }
1398
1399
1400 /*
1401 ** TRANSFER ENCODINGS
1402 */
1403
1404 static int
1405 init_encoding(CT ct, OpenCEFunc openfnx)
1406 {
1407         CE ce;
1408
1409         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1410                 adios(NULL, "out of memory");
1411
1412         ct->c_cefile     = ce;
1413         ct->c_ceopenfnx  = openfnx;
1414         ct->c_ceclosefnx = close_encoding;
1415         ct->c_cesizefnx  = size_encoding;
1416
1417         return OK;
1418 }
1419
1420
1421 void
1422 close_encoding(CT ct)
1423 {
1424         CE ce;
1425
1426         if (!(ce = ct->c_cefile))
1427                 return;
1428
1429         if (ce->ce_fp) {
1430                 fclose(ce->ce_fp);
1431                 ce->ce_fp = NULL;
1432         }
1433 }
1434
1435
1436 static unsigned long
1437 size_encoding(CT ct)
1438 {
1439         int fd;
1440         unsigned long size;
1441         char *file;
1442         CE ce;
1443         struct stat st;
1444
1445         if (!(ce = ct->c_cefile))
1446                 return (ct->c_end - ct->c_begin);
1447
1448         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1449                 return (long) st.st_size;
1450
1451         if (ce->ce_file) {
1452                 if (stat(ce->ce_file, &st) != NOTOK)
1453                         return (long) st.st_size;
1454                 else
1455                         return 0L;
1456         }
1457
1458         if (ct->c_encoding == CE_EXTERNAL)
1459                 return (ct->c_end - ct->c_begin);
1460
1461         file = NULL;
1462         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1463                 return (ct->c_end - ct->c_begin);
1464
1465         if (fstat(fd, &st) != NOTOK)
1466                 size = (long) st.st_size;
1467         else
1468                 size = 0L;
1469
1470         (*ct->c_ceclosefnx) (ct);
1471         return size;
1472 }
1473
1474
1475 /*
1476 ** BASE64
1477 */
1478
1479 static unsigned char b642nib[0x80] = {
1480         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1481         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1482         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1483         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1484         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1485         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1486         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1487         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1488         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1489         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1490         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1491         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1492         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1493         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1494         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1495         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1496 };
1497
1498
1499 static int
1500 InitBase64(CT ct)
1501 {
1502         return init_encoding(ct, openBase64);
1503 }
1504
1505
1506 static int
1507 openBase64(CT ct, char **file)
1508 {
1509         int bitno, cc;
1510         int fd, len, skip, own_ct_fp = 0;
1511         unsigned long bits;
1512         unsigned char value, *b, *b1, *b2, *b3;
1513         unsigned char *cp, *ep;
1514         char buffer[BUFSIZ];
1515         /* sbeck -- handle suffixes */
1516         CI ci;
1517         CE ce;
1518
1519         b  = (unsigned char *) &bits;
1520         b1 = &b[endian > 0 ? 1 : 2];
1521         b2 = &b[endian > 0 ? 2 : 1];
1522         b3 = &b[endian > 0 ? 3 : 0];
1523
1524         ce = ct->c_cefile;
1525         if (ce->ce_fp) {
1526                 fseek(ce->ce_fp, 0L, SEEK_SET);
1527                 goto ready_to_go;
1528         }
1529
1530         if (ce->ce_file) {
1531                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1532                         content_error(ce->ce_file, ct,
1533                                         "unable to fopen for reading");
1534                         return NOTOK;
1535                 }
1536                 goto ready_to_go;
1537         }
1538
1539         if (*file == NULL) {
1540                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1541                 ce->ce_unlink = 1;
1542         } else {
1543                 ce->ce_file = getcpy(*file);
1544                 ce->ce_unlink = 0;
1545         }
1546
1547         /* sbeck@cise.ufl.edu -- handle suffixes */
1548         ci = &ct->c_ctinfo;
1549         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1550                         invo_name, ci->ci_type, ci->ci_subtype);
1551         cp = context_find(buffer);
1552         if (cp == NULL || *cp == '\0') {
1553                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1554                           ci->ci_type);
1555                 cp = context_find(buffer);
1556         }
1557         if (cp != NULL && *cp != '\0') {
1558                 if (ce->ce_unlink) {
1559                         /*
1560                         ** Temporary file already exists, so we rename to
1561                         ** version with extension.
1562                         */
1563                         char *file_org = strdup(ce->ce_file);
1564                         ce->ce_file = add(cp, ce->ce_file);
1565                         if (rename(file_org, ce->ce_file)) {
1566                                 adios(ce->ce_file, "unable to rename %s to ",
1567                                                 file_org);
1568                         }
1569                         free(file_org);
1570
1571                 } else {
1572                         ce->ce_file = add(cp, ce->ce_file);
1573                 }
1574         }
1575
1576         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1577                 content_error(ce->ce_file, ct,
1578                                 "unable to fopen for reading/writing");
1579                 return NOTOK;
1580         }
1581
1582         if ((len = ct->c_end - ct->c_begin) < 0)
1583                 adios(NULL, "internal error(1)");
1584
1585         if (!ct->c_fp) {
1586                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1587                         content_error(ct->c_file, ct,
1588                                         "unable to open for reading");
1589                         return NOTOK;
1590                 }
1591                 own_ct_fp = 1;
1592         }
1593
1594         bitno = 18;
1595         bits = 0L;
1596         skip = 0;
1597
1598         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1599         while (len > 0) {
1600                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1601                 case NOTOK:
1602                         content_error(ct->c_file, ct, "error reading from");
1603                         goto clean_up;
1604
1605                 case OK:
1606                         content_error(NULL, ct, "premature eof");
1607                         goto clean_up;
1608
1609                 default:
1610                         if (cc > len)
1611                                 cc = len;
1612                         len -= cc;
1613
1614                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1615                                 switch (*cp) {
1616                                 default:
1617                                         if (isspace(*cp))
1618                                                 break;
1619                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1620                                                 if (debugsw) {
1621                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1622                                                 }
1623                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1624                                                 continue;
1625                                         }
1626
1627                                         bits |= value << bitno;
1628 test_end:
1629                                         if ((bitno -= 6) < 0) {
1630                                                 putc((char) *b1, ce->ce_fp);
1631                                                 if (skip < 2) {
1632                                                         putc((char) *b2, ce->ce_fp);
1633                                                         if (skip < 1) {
1634                                                                 putc((char) *b3, ce->ce_fp);
1635                                                         }
1636                                                 }
1637
1638                                                 if (ferror(ce->ce_fp)) {
1639                                                         content_error(ce->ce_file, ct,
1640                                                                                    "error writing to");
1641                                                         goto clean_up;
1642                                                 }
1643                                                 bitno = 18, bits = 0L, skip = 0;
1644                                         }
1645                                         break;
1646
1647                                 case '=':
1648                                         if (++skip > 3)
1649                                                 goto self_delimiting;
1650                                         goto test_end;
1651                                 }
1652                         }
1653                 }
1654         }
1655
1656         if (bitno != 18) {
1657                 if (debugsw)
1658                         fprintf(stderr, "premature ending (bitno %d)\n",
1659                                         bitno);
1660
1661                 content_error(NULL, ct, "invalid BASE64 encoding");
1662                 goto clean_up;
1663         }
1664
1665 self_delimiting:
1666         fseek(ct->c_fp, 0L, SEEK_SET);
1667
1668         if (fflush(ce->ce_fp)) {
1669                 content_error(ce->ce_file, ct, "error writing to");
1670                 goto clean_up;
1671         }
1672
1673         fseek(ce->ce_fp, 0L, SEEK_SET);
1674
1675 ready_to_go:
1676         *file = ce->ce_file;
1677         if (own_ct_fp) {
1678                 fclose(ct->c_fp);
1679                 ct->c_fp = NULL;
1680         }
1681         return fileno(ce->ce_fp);
1682
1683 clean_up:
1684         free_encoding(ct, 0);
1685         if (own_ct_fp) {
1686                 fclose(ct->c_fp);
1687                 ct->c_fp = NULL;
1688         }
1689         return NOTOK;
1690 }
1691
1692
1693 /*
1694 ** QUOTED PRINTABLE
1695 */
1696
1697 static char hex2nib[0x80] = {
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1702         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1703         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1704         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1705         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1706         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1707         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1708         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1709         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1710         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1711         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1712         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1713         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1714 };
1715
1716
1717 static int
1718 InitQuoted(CT ct)
1719 {
1720         return init_encoding(ct, openQuoted);
1721 }
1722
1723
1724 static int
1725 openQuoted(CT ct, char **file)
1726 {
1727         int cc, len, quoted, own_ct_fp = 0;
1728         unsigned char *cp, *ep;
1729         char buffer[BUFSIZ];
1730         unsigned char mask = 0;
1731         CE ce;
1732         /* sbeck -- handle suffixes */
1733         CI ci;
1734
1735         ce = ct->c_cefile;
1736         if (ce->ce_fp) {
1737                 fseek(ce->ce_fp, 0L, SEEK_SET);
1738                 goto ready_to_go;
1739         }
1740
1741         if (ce->ce_file) {
1742                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1743                         content_error(ce->ce_file, ct,
1744                                         "unable to fopen for reading");
1745                         return NOTOK;
1746                 }
1747                 goto ready_to_go;
1748         }
1749
1750         if (*file == NULL) {
1751                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1752                 ce->ce_unlink = 1;
1753         } else {
1754                 ce->ce_file = getcpy(*file);
1755                 ce->ce_unlink = 0;
1756         }
1757
1758         /* sbeck@cise.ufl.edu -- handle suffixes */
1759         ci = &ct->c_ctinfo;
1760         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1761                         invo_name, ci->ci_type, ci->ci_subtype);
1762         cp = context_find(buffer);
1763         if (cp == NULL || *cp == '\0') {
1764                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1765                                 ci->ci_type);
1766                 cp = context_find(buffer);
1767         }
1768         if (cp != NULL && *cp != '\0') {
1769                 if (ce->ce_unlink) {
1770                         // Temporary file already exists, so we rename to
1771                         // version with extension.
1772                         char *file_org = strdup(ce->ce_file);
1773                         ce->ce_file = add(cp, ce->ce_file);
1774                         if (rename(file_org, ce->ce_file)) {
1775                                 adios(ce->ce_file, "unable to rename %s to ",
1776                                                 file_org);
1777                         }
1778                         free(file_org);
1779
1780                 } else {
1781                         ce->ce_file = add(cp, ce->ce_file);
1782                 }
1783         }
1784
1785         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1786                 content_error(ce->ce_file, ct,
1787                                 "unable to fopen for reading/writing");
1788                 return NOTOK;
1789         }
1790
1791         if ((len = ct->c_end - ct->c_begin) < 0)
1792                 adios(NULL, "internal error(2)");
1793
1794         if (!ct->c_fp) {
1795                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1796                         content_error(ct->c_file, ct,
1797                                         "unable to open for reading");
1798                         return NOTOK;
1799                 }
1800                 own_ct_fp = 1;
1801         }
1802
1803         quoted = 0;
1804
1805         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1806         while (len > 0) {
1807                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1808                         content_error(NULL, ct, "premature eof");
1809                         goto clean_up;
1810                 }
1811
1812                 if ((cc = strlen(buffer)) > len)
1813                         cc = len;
1814                 len -= cc;
1815
1816                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1817                         if (!isspace(*ep))
1818                                 break;
1819                 *++ep = '\n', ep++;
1820
1821                 for (; cp < ep; cp++) {
1822                         if (quoted > 0) {
1823                                 /* in an escape sequence */
1824                                 if (quoted == 1) {
1825                                         /* at byte 1 of an escape sequence */
1826                                         mask = hex2nib[*cp & 0x7f];
1827                                         /* next is byte 2 */
1828                                         quoted = 2;
1829                                 } else {
1830                                         /* at byte 2 of an escape sequence */
1831                                         mask <<= 4;
1832                                         mask |= hex2nib[*cp & 0x7f];
1833                                         putc(mask, ce->ce_fp);
1834                                         if (ferror(ce->ce_fp)) {
1835                                                 content_error(ce->ce_file, ct, "error writing to");
1836                                                 goto clean_up;
1837                                         }
1838                                         /*
1839                                         ** finished escape sequence; next may
1840                                         ** be literal or a new escape sequence
1841                                         */
1842                                         quoted = 0;
1843                                 }
1844                                 /* on to next byte */
1845                                 continue;
1846                         }
1847
1848                         /* not in an escape sequence */
1849                         if (*cp == '=') {
1850                                 /*
1851                                 ** starting an escape sequence,
1852                                 ** or invalid '='?
1853                                 */
1854                                 if (cp + 1 < ep && cp[1] == '\n') {
1855                                         /* "=\n" soft line break, eat the \n */
1856                                         cp++;
1857                                         continue;
1858                                 }
1859                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1860                                         /*
1861                                         ** We don't have 2 bytes left,
1862                                         ** so this is an invalid escape
1863                                         ** sequence; just show the raw bytes
1864                                         ** (below).
1865                                         */
1866                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1867                                         /*
1868                                         ** Next 2 bytes are hex digits,
1869                                         ** making this a valid escape
1870                                         ** sequence; let's decode it (above).
1871                                         */
1872                                         quoted = 1;
1873                                         continue;
1874                                 } else {
1875                                         /*
1876                                         ** One or both of the next 2 is
1877                                         ** out of range, making this an
1878                                         ** invalid escape sequence; just
1879                                         ** show the raw bytes (below).
1880                                         */
1881                                 }
1882                         }
1883
1884                         /* Just show the raw byte. */
1885                         putc(*cp, ce->ce_fp);
1886                         if (ferror(ce->ce_fp)) {
1887                                 content_error(ce->ce_file, ct,
1888                                                 "error writing to");
1889                                 goto clean_up;
1890                         }
1891                 }
1892         }
1893         if (quoted) {
1894                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1895                 goto clean_up;
1896         }
1897
1898         fseek(ct->c_fp, 0L, SEEK_SET);
1899
1900         if (fflush(ce->ce_fp)) {
1901                 content_error(ce->ce_file, ct, "error writing to");
1902                 goto clean_up;
1903         }
1904
1905         fseek(ce->ce_fp, 0L, SEEK_SET);
1906
1907 ready_to_go:
1908         *file = ce->ce_file;
1909         if (own_ct_fp) {
1910                 fclose(ct->c_fp);
1911                 ct->c_fp = NULL;
1912         }
1913         return fileno(ce->ce_fp);
1914
1915 clean_up:
1916         free_encoding(ct, 0);
1917         if (own_ct_fp) {
1918                 fclose(ct->c_fp);
1919                 ct->c_fp = NULL;
1920         }
1921         return NOTOK;
1922 }
1923
1924
1925 /*
1926 ** 7BIT
1927 */
1928
1929 static int
1930 Init7Bit(CT ct)
1931 {
1932         if (init_encoding(ct, open7Bit) == NOTOK)
1933                 return NOTOK;
1934
1935         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1936         return OK;
1937 }
1938
1939
1940 int
1941 open7Bit(CT ct, char **file)
1942 {
1943         int cc, fd, len, own_ct_fp = 0;
1944         char buffer[BUFSIZ];
1945         /* sbeck -- handle suffixes */
1946         char *cp;
1947         CI ci;
1948         CE ce;
1949
1950         ce = ct->c_cefile;
1951         if (ce->ce_fp) {
1952                 fseek(ce->ce_fp, 0L, SEEK_SET);
1953                 goto ready_to_go;
1954         }
1955
1956         if (ce->ce_file) {
1957                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1958                         content_error(ce->ce_file, ct,
1959                                         "unable to fopen for reading");
1960                         return NOTOK;
1961                 }
1962                 goto ready_to_go;
1963         }
1964
1965         if (*file == NULL) {
1966                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1967                 ce->ce_unlink = 1;
1968         } else {
1969                 ce->ce_file = getcpy(*file);
1970                 ce->ce_unlink = 0;
1971         }
1972
1973         /* sbeck@cise.ufl.edu -- handle suffixes */
1974         ci = &ct->c_ctinfo;
1975         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1976                         invo_name, ci->ci_type, ci->ci_subtype);
1977         cp = context_find(buffer);
1978         if (cp == NULL || *cp == '\0') {
1979                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1980                         ci->ci_type);
1981                 cp = context_find(buffer);
1982         }
1983         if (cp != NULL && *cp != '\0') {
1984                 if (ce->ce_unlink) {
1985                         /*
1986                         ** Temporary file already exists, so we rename to
1987                         ** version with extension.
1988                         */
1989                         char *file_org = strdup(ce->ce_file);
1990                         ce->ce_file = add(cp, ce->ce_file);
1991                         if (rename(file_org, ce->ce_file)) {
1992                                 adios(ce->ce_file, "unable to rename %s to ",
1993                                                 file_org);
1994                         }
1995                         free(file_org);
1996
1997                 } else {
1998                         ce->ce_file = add(cp, ce->ce_file);
1999                 }
2000         }
2001
2002         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
2003                 content_error(ce->ce_file, ct,
2004                                 "unable to fopen for reading/writing");
2005                 return NOTOK;
2006         }
2007
2008         if (ct->c_type == CT_MULTIPART) {
2009                 char **ap, **ep;
2010                 CI ci = &ct->c_ctinfo;
2011
2012                 len = 0;
2013                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2014                                 ci->ci_subtype);
2015                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2016                                 strlen(ci->ci_subtype);
2017                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2018                         putc(';', ce->ce_fp);
2019                         len++;
2020
2021                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2022                                         *ap, *ep);
2023
2024                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2025                                 fputs("\n\t", ce->ce_fp);
2026                                 len = 8;
2027                         } else {
2028                                 putc(' ', ce->ce_fp);
2029                                 len++;
2030                         }
2031                         fprintf(ce->ce_fp, "%s", buffer);
2032                         len += cc;
2033                 }
2034
2035                 if (ci->ci_comment) {
2036                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2037                                                 >= CPERLIN) {
2038                                 fputs("\n\t", ce->ce_fp);
2039                                 len = 8;
2040                         } else {
2041                                 putc(' ', ce->ce_fp);
2042                                 len++;
2043                         }
2044                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2045                         len += cc;
2046                 }
2047                 fprintf(ce->ce_fp, "\n");
2048                 if (ct->c_id)
2049                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2050                 if (ct->c_descr)
2051                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2052                 if (ct->c_dispo)
2053                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2054                 fprintf(ce->ce_fp, "\n");
2055         }
2056
2057         if ((len = ct->c_end - ct->c_begin) < 0)
2058                 adios(NULL, "internal error(3)");
2059
2060         if (!ct->c_fp) {
2061                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2062                         content_error(ct->c_file, ct,
2063                                         "unable to open for reading");
2064                         return NOTOK;
2065                 }
2066                 own_ct_fp = 1;
2067         }
2068
2069         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2070         while (len > 0)
2071                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2072                 case NOTOK:
2073                         content_error(ct->c_file, ct, "error reading from");
2074                         goto clean_up;
2075
2076                 case OK:
2077                         content_error(NULL, ct, "premature eof");
2078                         goto clean_up;
2079
2080                 default:
2081                         if (cc > len)
2082                                 cc = len;
2083                         len -= cc;
2084
2085                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2086                         if (ferror(ce->ce_fp)) {
2087                                 content_error(ce->ce_file, ct,
2088                                                 "error writing to");
2089                                 goto clean_up;
2090                         }
2091                 }
2092
2093         fseek(ct->c_fp, 0L, SEEK_SET);
2094
2095         if (fflush(ce->ce_fp)) {
2096                 content_error(ce->ce_file, ct, "error writing to");
2097                 goto clean_up;
2098         }
2099
2100         fseek(ce->ce_fp, 0L, SEEK_SET);
2101
2102 ready_to_go:
2103         *file = ce->ce_file;
2104         if (own_ct_fp) {
2105                 fclose(ct->c_fp);
2106                 ct->c_fp = NULL;
2107         }
2108         return fileno(ce->ce_fp);
2109
2110 clean_up:
2111         free_encoding(ct, 0);
2112         if (own_ct_fp) {
2113                 fclose(ct->c_fp);
2114                 ct->c_fp = NULL;
2115         }
2116         return NOTOK;
2117 }