Merge branch 'm_getfld2-meillo' into master
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = mh_xstrdup(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         enum state state;
235         struct field f = {{0}};
236         int compnum;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         ct = mh_xcalloc(1, sizeof(*ct));
242
243         ct->c_fp = in;
244         ct->c_file = mh_xstrdup(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD2;;) {
252                 switch (state = m_getfld2(state, &f, in)) {
253                 case FLD2:
254                         compnum++;
255
256                         /* add the header data to the list */
257                         add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
258
259                         ct->c_begin = ftell(in) + 1;
260                         continue;
261
262                 case BODY2:
263                         ct->c_begin = ftell(in) - strlen(f.value);
264                         break;
265
266                 case FILEEOF2:
267                         ct->c_begin = ftell(in);
268                         break;
269
270                 case LENERR2:
271                 case FMTERR2:
272                 case IOERR2:
273                         adios(EX_DATAERR, NULL, "message format error in component #%d",
274                                         compnum);
275
276                 default:
277                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
278                 }
279                 break;
280         }
281
282         /*
283         ** Read the content headers.  We will parse the
284         ** MIME related header fields into their various
285         ** structures and set internal flags related to
286         ** content type/subtype, etc.
287         */
288
289         hp = ct->c_first_hf;  /* start at first header field */
290         while (hp) {
291                 /* Get MIME-Version field */
292                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
293                         int ucmp;
294                         char c;
295                         unsigned char *cp, *dp;
296
297                         if (ct->c_vrsn) {
298                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
299                                 goto next_header;
300                         }
301                         ct->c_vrsn = mh_xstrdup(hp->value);
302
303                         /* Now, cleanup this field */
304                         cp = ct->c_vrsn;
305
306                         while (isspace(*cp))
307                                 cp++;
308                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
309                                 *dp++ = ' ';
310                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
311                                 if (!isspace(*dp))
312                                         break;
313                         *++dp = '\0';
314                         if (debugsw)
315                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
316
317                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
318                                 goto out;
319
320                         for (dp = cp; istoken(*dp); dp++)
321                                 continue;
322                         c = *dp;
323                         *dp = '\0';
324                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
325                         *dp = c;
326                         if (!ucmp) {
327                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
328                         }
329
330                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
331                         /* Get Content-Type field */
332                         struct str2init *s2i;
333                         CI ci = &ct->c_ctinfo;
334
335                         /* Check if we've already seen a Content-Type header */
336                         if (ct->c_ctline) {
337                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
338                                 goto next_header;
339                         }
340
341                         /* Parse the Content-Type field */
342                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
343                                 goto out;
344
345                         /*
346                         ** Set the Init function and the internal
347                         ** flag for this content type.
348                         */
349                         for (s2i = str2cts; s2i->si_key; s2i++)
350                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
351                                         break;
352                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
353                                 s2i++;
354                         ct->c_type = s2i->si_val;
355                         ct->c_ctinitfnx = s2i->si_init;
356
357                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
358                         /* Get Content-Transfer-Encoding field */
359                         char c;
360                         unsigned char *cp, *dp;
361                         struct str2init *s2i;
362
363                         /*
364                         ** Check if we've already seen the
365                         ** Content-Transfer-Encoding field
366                         */
367                         if (ct->c_celine) {
368                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
369                                 goto next_header;
370                         }
371
372                         /* get copy of this field */
373                         ct->c_celine = cp = mh_xstrdup(hp->value);
374
375                         while (isspace(*cp))
376                                 cp++;
377                         for (dp = cp; istoken(*dp); dp++)
378                                 continue;
379                         c = *dp;
380                         *dp = '\0';
381
382                         /*
383                         ** Find the internal flag and Init function
384                         ** for this transfer encoding.
385                         */
386                         for (s2i = str2ces; s2i->si_key; s2i++)
387                                 if (!mh_strcasecmp(cp, s2i->si_key))
388                                         break;
389                         if (!s2i->si_key && !uprf(cp, "X-"))
390                                 s2i++;
391                         *dp = c;
392                         ct->c_encoding = s2i->si_val;
393
394                         /* Call the Init function for this encoding */
395                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
396                                 goto out;
397
398                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
399                         /* Get Content-ID field */
400                         ct->c_id = add(hp->value, ct->c_id);
401
402                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
403                         /* Get Content-Description field */
404                         ct->c_descr = add(hp->value, ct->c_descr);
405
406                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
407                         /* Get Content-Disposition field */
408                         ct->c_dispo = add(hp->value, ct->c_dispo);
409                 }
410
411 next_header:
412                 hp = hp->next;  /* next header field */
413         }
414
415         /*
416         ** Check if we saw a Content-Type field.
417         ** If not, then assign a default value for
418         ** it, and the Init function.
419         */
420         if (!ct->c_ctline) {
421                 /*
422                 ** If we are inside a multipart/digest message,
423                 ** so default type is message/rfc822
424                 */
425                 if (toplevel < 0) {
426                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
427                                 goto out;
428                         ct->c_type = CT_MESSAGE;
429                         ct->c_ctinitfnx = InitMessage;
430                 } else {
431                         /*
432                         ** Else default type is text/plain
433                         */
434                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
435                                 goto out;
436                         ct->c_type = CT_TEXT;
437                         ct->c_ctinitfnx = InitText;
438                 }
439         }
440
441         /* Use default Transfer-Encoding, if necessary */
442         if (!ct->c_celine) {
443                 ct->c_encoding = CE_7BIT;
444                 Init7Bit(ct);
445         }
446
447         return ct;
448
449 out:
450         free_content(ct);
451         return NULL;
452 }
453
454
455 /*
456 ** small routine to add header field to list
457 */
458
459 int
460 add_header(CT ct, char *name, char *value)
461 {
462         HF hp;
463
464         /* allocate header field structure */
465         hp = mh_xcalloc(1, sizeof(*hp));
466
467         /* link data into header structure */
468         hp->name = name;
469         hp->value = value;
470         hp->next = NULL;
471
472         /* link header structure into the list */
473         if (ct->c_first_hf == NULL) {
474                 ct->c_first_hf = hp;  /* this is the first */
475                 ct->c_last_hf = hp;
476         } else {
477                 ct->c_last_hf->next = hp;  /* add it to the end */
478                 ct->c_last_hf = hp;
479         }
480
481         return 0;
482 }
483
484
485 /*
486 ** Make sure that buf contains at least one appearance of name,
487 ** followed by =.  If not, insert both name and value, just after
488 ** first semicolon, if any.  Note that name should not contain a
489 ** trailing =.  And quotes will be added around the value.  Typical
490 ** usage:  make sure that a Content-Disposition header contains
491 ** filename="foo".  If it doesn't and value does, use value from
492 ** that.
493 */
494 static char *
495 incl_name_value(unsigned char *buf, char *name, char *value) {
496         char *newbuf = buf;
497
498         /* Assume that name is non-null. */
499         if (buf && value) {
500                 char *name_plus_equal = concat(name, "=", NULL);
501
502                 if (!strstr(buf, name_plus_equal)) {
503                         char *insertion;
504                         unsigned char *cp;
505                         char *prefix, *suffix;
506
507                         /* Trim trailing space, esp. newline. */
508                         for (cp = &buf[strlen(buf) - 1];
509                                          cp >= buf && isspace(*cp); --cp) {
510                                 *cp = '\0';
511                         }
512
513                         insertion = concat("; ", name, "=", "\"", value, "\"",
514                                         NULL);
515
516                         /*
517                         ** Insert at first semicolon, if any.
518                         ** If none, append to end.
519                         */
520                         prefix = mh_xstrdup(buf);
521                         if ((cp = strchr(prefix, ';'))) {
522                                 suffix = concat(cp, NULL);
523                                 *cp = '\0';
524                                 newbuf = concat(prefix, insertion, suffix,
525                                                 "\n", NULL);
526                                 mh_free0(&suffix);
527                         } else {
528                                 /* Append to end. */
529                                 newbuf = concat(buf, insertion, "\n", NULL);
530                         }
531
532                         mh_free0(&prefix);
533                         mh_free0(&insertion);
534                         mh_free0(&buf);
535                 }
536
537                 mh_free0(&name_plus_equal);
538         }
539
540         return newbuf;
541 }
542
543 /*
544 ** Extract just name_suffix="foo", if any, from value.  If there isn't
545 ** one, return the entire value.  Note that, for example, a name_suffix
546 ** of name will match filename="foo", and return foo.
547 */
548 static char *
549 extract_name_value(char *name_suffix, char *value) {
550         char *extracted_name_value = value;
551         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
552         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
553         char *cp;
554
555         mh_free0(&name_suffix_plus_quote);
556         if (name_suffix_equals) {
557                 char *name_suffix_begin;
558
559                 /* Find first \". */
560                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
561                         ;
562                 name_suffix_begin = ++cp;
563                 /* Find second \". */
564                 for (; *cp != '"'; ++cp)
565                         ;
566
567                 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
568                 memcpy(extracted_name_value, name_suffix_begin,
569                                 cp - name_suffix_begin);
570                 extracted_name_value[cp - name_suffix_begin] = '\0';
571         }
572
573         return extracted_name_value;
574 }
575
576 /*
577 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
578 ** directives.  Fills in the information of the CTinfo structure.
579 */
580 int
581 get_ctinfo(unsigned char *cp, CT ct, int magic)
582 {
583         int i;
584         unsigned char *dp;
585         char **ap, **ep;
586         char c;
587         CI ci;
588
589         ci = &ct->c_ctinfo;
590         i = strlen(invo_name) + 2;
591
592         /* store copy of Content-Type line */
593         cp = ct->c_ctline = mh_xstrdup(cp);
594
595         while (isspace(*cp))  /* trim leading spaces */
596                 cp++;
597
598         /* change newlines to spaces */
599         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
600                 *dp++ = ' ';
601
602         /* trim trailing spaces */
603         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
604                 if (!isspace(*dp))
605                         break;
606         *++dp = '\0';
607
608         if (debugsw)
609                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
610
611         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
612                 return NOTOK;
613
614         for (dp = cp; istoken(*dp); dp++)
615                 continue;
616         c = *dp, *dp = '\0';
617         ci->ci_type = mh_xstrdup(cp);  /* store content type */
618         *dp = c, cp = dp;
619
620         if (!*ci->ci_type) {
621                 advise(NULL, "invalid %s: field in message %s (empty type)",
622                                 TYPE_FIELD, ct->c_file);
623                 return NOTOK;
624         }
625
626         /* down case the content type string */
627         for (dp = ci->ci_type; *dp; dp++)
628                 if (isalpha(*dp) && isupper(*dp))
629                         *dp = tolower(*dp);
630
631         while (isspace(*cp))
632                 cp++;
633
634         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
635                 return NOTOK;
636
637         if (*cp != '/') {
638                 if (!magic)
639                         ci->ci_subtype = mh_xstrdup("");
640                 goto magic_skip;
641         }
642
643         cp++;
644         while (isspace(*cp))
645                 cp++;
646
647         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
648                 return NOTOK;
649
650         for (dp = cp; istoken(*dp); dp++)
651                 continue;
652         c = *dp, *dp = '\0';
653         ci->ci_subtype = mh_xstrdup(cp);  /* store the content subtype */
654         *dp = c, cp = dp;
655
656         if (!*ci->ci_subtype) {
657                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
658                 return NOTOK;
659         }
660
661         /* down case the content subtype string */
662         for (dp = ci->ci_subtype; *dp; dp++)
663                 if (isalpha(*dp) && isupper(*dp))
664                         *dp = tolower(*dp);
665
666 magic_skip:
667         while (isspace(*cp))
668                 cp++;
669
670         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
671                 return NOTOK;
672
673         /*
674         ** Parse attribute/value pairs given with Content-Type
675         */
676         ep = (ap = ci->ci_attrs) + NPARMS;
677         while (*cp == ';') {
678                 char *vp;
679                 unsigned char *up;
680
681                 if (ap >= ep) {
682                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
683                         return NOTOK;
684                 }
685
686                 cp++;
687                 while (isspace(*cp))
688                         cp++;
689
690                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
691                         return NOTOK;
692
693                 if (*cp == 0) {
694                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
695                         return OK;
696                 }
697
698                 /* down case the attribute name */
699                 for (dp = cp; istoken(*dp); dp++)
700                         if (isalpha(*dp) && isupper(*dp))
701                                 *dp = tolower(*dp);
702
703                 for (up = dp; isspace(*dp);)
704                         dp++;
705                 if (dp == cp || *dp != '=') {
706                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
707                         return NOTOK;
708                 }
709
710                 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
711                 *vp = '\0';
712                 for (dp++; isspace(*dp);)
713                         dp++;
714
715                 /* now add the attribute value */
716                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
717
718                 if (*dp == '"') {
719                         for (cp = ++dp, dp = vp;;) {
720                                 switch (c = *cp++) {
721                                 case '\0':
722 bad_quote:
723                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
724                                         return NOTOK;
725
726                                 case '\\':
727                                         *dp++ = c;
728                                         if ((c = *cp++) == '\0')
729                                                 goto bad_quote;
730                                         /* else fall... */
731
732                                 default:
733                                         *dp++ = c;
734                                         continue;
735
736                                 case '"':
737                                         *dp = '\0';
738                                         break;
739                                 }
740                                 break;
741                         }
742                 } else {
743                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
744                                 continue;
745                         *dp = '\0';
746                 }
747                 if (!*vp) {
748                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
749                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
750                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
751                         continue;
752                 }
753                 ap++;
754
755                 while (isspace(*cp))
756                         cp++;
757
758                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
759                         return NOTOK;
760         }
761
762         /*
763         ** Get any <Content-Id> given in buffer
764         */
765         if (magic && *cp == '<') {
766                 if (ct->c_id) {
767                         mh_free0(&(ct->c_id));
768                 }
769                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
770                         advise(NULL, "invalid ID in message %s", ct->c_file);
771                         return NOTOK;
772                 }
773                 c = *dp;
774                 *dp = '\0';
775                 if (*ct->c_id)
776                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
777                 else
778                         ct->c_id = NULL;
779                 *dp++ = c;
780                 cp = dp;
781
782                 while (isspace(*cp))
783                         cp++;
784         }
785
786         /*
787         ** Get any [Content-Description] given in buffer.
788         */
789         if (magic && *cp == '[') {
790                 ct->c_descr = ++cp;
791                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
792                         if (*dp == ']')
793                                 break;
794                 if (dp < cp) {
795                         advise(NULL, "invalid description in message %s",
796                                         ct->c_file);
797                         ct->c_descr = NULL;
798                         return NOTOK;
799                 }
800
801                 c = *dp;
802                 *dp = '\0';
803                 if (*ct->c_descr)
804                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
805                 else
806                         ct->c_descr = NULL;
807                 *dp++ = c;
808                 cp = dp;
809
810                 while (isspace(*cp))
811                         cp++;
812         }
813
814         /*
815         ** Get any {Content-Disposition} given in buffer.
816         */
817         if (magic && *cp == '{') {
818                 ct->c_dispo = ++cp;
819                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
820                         if (*dp == '}')
821                                 break;
822                 if (dp < cp) {
823                         advise(NULL, "invalid disposition in message %s",
824                                         ct->c_file);
825                         ct->c_dispo = NULL;
826                         return NOTOK;
827                 }
828
829                 c = *dp;
830                 *dp = '\0';
831                 if (*ct->c_dispo)
832                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
833                 else
834                         ct->c_dispo = NULL;
835                 *dp++ = c;
836                 cp = dp;
837
838                 while (isspace(*cp))
839                         cp++;
840         }
841
842         /*
843         ** Check if anything is left over
844         */
845         if (*cp) {
846                 if (magic) {
847                         ci->ci_magic = mh_xstrdup(cp);
848
849                         /*
850                         ** If there is a Content-Disposition header and
851                         ** it doesn't have a *filename=, extract it from
852                         ** the magic contents.  The mhbasename call skips
853                         ** any leading directory components.
854                         */
855                         if (ct->c_dispo)
856                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
857                         } else
858                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
859         }
860
861         return OK;
862 }
863
864
865 static int
866 get_comment(CT ct, unsigned char **ap, int istype)
867 {
868         int i;
869         char *bp;
870         unsigned char *cp;
871         char c, buffer[BUFSIZ], *dp;
872         CI ci;
873
874         ci = &ct->c_ctinfo;
875         cp = *ap;
876         bp = buffer;
877         cp++;
878
879         for (i = 0;;) {
880                 switch (c = *cp++) {
881                 case '\0':
882 invalid:
883                 advise(NULL, "invalid comment in message %s's %s: field",
884                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
885                 return NOTOK;
886
887                 case '\\':
888                         *bp++ = c;
889                         if ((c = *cp++) == '\0')
890                                 goto invalid;
891                         *bp++ = c;
892                         continue;
893
894                 case '(':
895                         i++;
896                         /* and fall... */
897                 default:
898                         *bp++ = c;
899                         continue;
900
901                 case ')':
902                         if (--i < 0)
903                                 break;
904                         *bp++ = c;
905                         continue;
906                 }
907                 break;
908         }
909         *bp = '\0';
910
911         if (istype) {
912                 if ((dp = ci->ci_comment)) {
913                         ci->ci_comment = concat(dp, " ", buffer, NULL);
914                         mh_free0(&dp);
915                 } else {
916                         ci->ci_comment = mh_xstrdup(buffer);
917                 }
918         }
919
920         while (isspace(*cp))
921                 cp++;
922
923         *ap = cp;
924         return OK;
925 }
926
927
928 /*
929 ** CONTENTS
930 **
931 ** Handles content types audio, image, and video.
932 ** There's not much to do right here.
933 */
934
935 static int
936 InitGeneric(CT ct)
937 {
938         return OK;  /* not much to do here */
939 }
940
941
942 /*
943 ** TEXT
944 */
945
946 static int
947 InitText(CT ct)
948 {
949         char **ap, **ep;
950         struct k2v *kv;
951         struct text *t;
952         CI ci = &ct->c_ctinfo;
953
954         /* check for missing subtype */
955         if (!*ci->ci_subtype)
956                 ci->ci_subtype = add("plain", ci->ci_subtype);
957
958         /* match subtype */
959         for (kv = SubText; kv->kv_key; kv++)
960                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
961                         break;
962         ct->c_subtype = kv->kv_value;
963
964         /* allocate text character set structure */
965         t = mh_xcalloc(1, sizeof(*t));
966         ct->c_ctparams = (void *) t;
967
968         /* scan for charset parameter */
969         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
970                 if (!mh_strcasecmp(*ap, "charset"))
971                         break;
972
973         /* check if content specified a character set */
974         if (*ap) {
975                 /* store its name */
976                 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
977                 /* match character set or set to CHARSET_UNKNOWN */
978                 for (kv = Charset; kv->kv_key; kv++) {
979                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
980                                 break;
981                         }
982                 }
983                 t->tx_charset = kv->kv_value;
984         } else {
985                 t->tx_charset = CHARSET_UNSPECIFIED;
986         }
987
988         return OK;
989 }
990
991
992 /*
993 ** MULTIPART
994 */
995
996 static int
997 InitMultiPart(CT ct)
998 {
999         int inout;
1000         long last, pos;
1001         unsigned char *cp, *dp;
1002         char **ap, **ep;
1003         char *bp, buffer[BUFSIZ];
1004         struct multipart *m;
1005         struct k2v *kv;
1006         struct part *part, **next;
1007         CI ci = &ct->c_ctinfo;
1008         CT p;
1009         FILE *fp;
1010
1011         /*
1012         ** The encoding for multipart messages must be either
1013         ** 7bit, 8bit, or binary (per RFC2045).
1014         */
1015         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1016                 && ct->c_encoding != CE_BINARY) {
1017                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1018                 ct->c_encoding = CE_7BIT;
1019         }
1020
1021         /* match subtype */
1022         for (kv = SubMultiPart; kv->kv_key; kv++)
1023                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1024                         break;
1025         ct->c_subtype = kv->kv_value;
1026
1027         /*
1028         ** Check for "boundary" parameter, which is
1029         ** required for multipart messages.
1030         */
1031         bp = 0;
1032         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1033                 if (!mh_strcasecmp(*ap, "boundary")) {
1034                         bp = *ep;
1035                         break;
1036                 }
1037         }
1038
1039         /* complain if boundary parameter is missing */
1040         if (!*ap) {
1041                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1042                 return NOTOK;
1043         }
1044
1045         /* allocate primary structure for multipart info */
1046         m = mh_xcalloc(1, sizeof(*m));
1047         ct->c_ctparams = (void *) m;
1048
1049         /* check if boundary parameter contains only whitespace characters */
1050         for (cp = bp; isspace(*cp); cp++)
1051                 continue;
1052         if (!*cp) {
1053                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1054                 return NOTOK;
1055         }
1056
1057         /* remove trailing whitespace from boundary parameter */
1058         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1059                 if (!isspace(*dp))
1060                         break;
1061         *++dp = '\0';
1062
1063         /* record boundary separators */
1064         m->mp_start = concat(bp, "\n", NULL);
1065         m->mp_stop = concat(bp, "--\n", NULL);
1066
1067         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1068                 advise(ct->c_file, "unable to open for reading");
1069                 return NOTOK;
1070         }
1071
1072         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1073         last = ct->c_end;
1074         next = &m->mp_parts;
1075         part = NULL;
1076         inout = 1;
1077
1078         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1079                 if (pos > last)
1080                         break;
1081
1082                 pos += strlen(buffer);
1083                 if (buffer[0] != '-' || buffer[1] != '-')
1084                         continue;
1085                 if (inout) {
1086                         if (strcmp(buffer + 2, m->mp_start)!=0)
1087                                 continue;
1088 next_part:
1089                         part = mh_xcalloc(1, sizeof(*part));
1090                         *next = part;
1091                         next = &part->mp_next;
1092
1093                         if (!(p = get_content(fp, ct->c_file,
1094                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1095                                 ct->c_fp = NULL;
1096                                 return NOTOK;
1097                         }
1098                         p->c_fp = NULL;
1099                         part->mp_part = p;
1100                         pos = p->c_begin;
1101                         fseek(fp, pos, SEEK_SET);
1102                         inout = 0;
1103                 } else {
1104                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1105                                 inout = 1;
1106 end_part:
1107                                 p = part->mp_part;
1108                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1109                                 if (p->c_end < p->c_begin)
1110                                         p->c_begin = p->c_end;
1111                                 if (inout)
1112                                         goto next_part;
1113                                 goto last_part;
1114                         } else {
1115                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1116                                         goto end_part;
1117                         }
1118                 }
1119         }
1120
1121         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1122         if (!inout && part) {
1123                 p = part->mp_part;
1124                 p->c_end = ct->c_end;
1125
1126                 if (p->c_begin >= p->c_end) {
1127                         for (next = &m->mp_parts; *next != part;
1128                                 next = &((*next)->mp_next))
1129                                 continue;
1130                         *next = NULL;
1131                         free_content(p);
1132                         mh_free0(&part);
1133                 }
1134         }
1135
1136 last_part:
1137         /* reverse the order of the parts for multipart/alternative */
1138         if (ct->c_subtype == MULTI_ALTERNATE)
1139                 reverse_parts(ct);
1140
1141         /*
1142         ** label all subparts with part number, and
1143         ** then initialize the content of the subpart.
1144         */
1145         {
1146                 int partnum;
1147                 char *pp;
1148                 char partnam[BUFSIZ];
1149
1150                 if (ct->c_partno) {
1151                         snprintf(partnam, sizeof(partnam), "%s.",
1152                                         ct->c_partno);
1153                         pp = partnam + strlen(partnam);
1154                 } else {
1155                         pp = partnam;
1156                 }
1157
1158                 for (part = m->mp_parts, partnum = 1; part;
1159                         part = part->mp_next, partnum++) {
1160                         p = part->mp_part;
1161
1162                         sprintf(pp, "%d", partnum);
1163                         p->c_partno = mh_xstrdup(partnam);
1164
1165                         /* initialize the content of the subparts */
1166                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1167                                 fclose(ct->c_fp);
1168                                 ct->c_fp = NULL;
1169                                 return NOTOK;
1170                         }
1171                 }
1172         }
1173
1174         fclose(ct->c_fp);
1175         ct->c_fp = NULL;
1176         return OK;
1177 }
1178
1179
1180 /*
1181 ** reverse the order of the parts of a multipart
1182 */
1183
1184 static void
1185 reverse_parts(CT ct)
1186 {
1187         int i;
1188         struct multipart *m;
1189         struct part **base, **bmp, **next, *part;
1190
1191         m = (struct multipart *) ct->c_ctparams;
1192
1193         /* if only one part, just return */
1194         if (!m->mp_parts || !m->mp_parts->mp_next)
1195                 return;
1196
1197         /* count number of parts */
1198         i = 0;
1199         for (part = m->mp_parts; part; part = part->mp_next)
1200                 i++;
1201
1202         /* allocate array of pointers to the parts */
1203         base = mh_xcalloc(i + 1, sizeof(*base));
1204         bmp = base;
1205
1206         /* point at all the parts */
1207         for (part = m->mp_parts; part; part = part->mp_next)
1208                 *bmp++ = part;
1209         *bmp = NULL;
1210
1211         /* reverse the order of the parts */
1212         next = &m->mp_parts;
1213         for (bmp--; bmp >= base; bmp--) {
1214                 part = *bmp;
1215                 *next = part;
1216                 next = &part->mp_next;
1217         }
1218         *next = NULL;
1219
1220         /* free array of pointers */
1221         mh_free0(&base);
1222 }
1223
1224
1225 /*
1226 ** MESSAGE
1227 */
1228
1229 static int
1230 InitMessage(CT ct)
1231 {
1232         struct k2v *kv;
1233         CI ci = &ct->c_ctinfo;
1234
1235         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1236                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1237                 return NOTOK;
1238         }
1239
1240         /* check for missing subtype */
1241         if (!*ci->ci_subtype)
1242                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1243
1244         /* match subtype */
1245         for (kv = SubMessage; kv->kv_key; kv++)
1246                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1247                         break;
1248         ct->c_subtype = kv->kv_value;
1249
1250         switch (ct->c_subtype) {
1251         case MESSAGE_RFC822:
1252                 break;
1253
1254         case MESSAGE_PARTIAL:
1255                 {
1256                 char **ap, **ep;
1257                 struct partial *p;
1258
1259                 p = mh_xcalloc(1, sizeof(*p));
1260                 ct->c_ctparams = (void *) p;
1261
1262                 /*
1263                 ** scan for parameters "id", "number",
1264                 ** and "total"
1265                 */
1266                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1267                         if (!mh_strcasecmp(*ap, "id")) {
1268                                 p->pm_partid = mh_xstrdup(*ep);
1269                                 continue;
1270                         }
1271                         if (!mh_strcasecmp(*ap, "number")) {
1272                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1273 invalid_param:
1274                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1275                                         return NOTOK;
1276                                 }
1277                                 continue;
1278                         }
1279                         if (!mh_strcasecmp(*ap, "total")) {
1280                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1281                                                 p->pm_maxno < 1)
1282                                         goto invalid_param;
1283                                 continue;
1284                         }
1285                 }
1286
1287                 if (!p->pm_partid || !p->pm_partno
1288                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1289                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1290                         return NOTOK;
1291                 }
1292                 }
1293                 break;
1294
1295         case MESSAGE_EXTERNAL:
1296                 {
1297                 CT p;
1298                 FILE *fp;
1299
1300                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1301                         advise(ct->c_file, "unable to open for reading");
1302                         return NOTOK;
1303                 }
1304
1305                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1306
1307                 if (!(p = get_content(fp, ct->c_file, 0))) {
1308                         ct->c_fp = NULL;
1309                         return NOTOK;
1310                 }
1311
1312                 p->c_fp = NULL;
1313                 p->c_end = p->c_begin;
1314
1315                 fclose(ct->c_fp);
1316                 ct->c_fp = NULL;
1317
1318                 switch (p->c_type) {
1319                 case CT_MULTIPART:
1320                         break;
1321
1322                 case CT_MESSAGE:
1323                         if (p->c_subtype != MESSAGE_RFC822)
1324                                 break;
1325                         /* else fall... */
1326                 default:
1327                         if (p->c_ctinitfnx)
1328                                 (*p->c_ctinitfnx) (p);
1329                         break;
1330                 }
1331                 }
1332                 break;
1333
1334         default:
1335                 break;
1336         }
1337
1338         return OK;
1339 }
1340
1341
1342 /*
1343 ** APPLICATION
1344 */
1345
1346 static int
1347 InitApplication(CT ct)
1348 {
1349         struct k2v *kv;
1350         CI ci = &ct->c_ctinfo;
1351
1352         /* match subtype */
1353         for (kv = SubApplication; kv->kv_key; kv++)
1354                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1355                         break;
1356         ct->c_subtype = kv->kv_value;
1357
1358         return OK;
1359 }
1360
1361
1362 /*
1363 ** TRANSFER ENCODINGS
1364 */
1365
1366 static int
1367 init_encoding(CT ct, OpenCEFunc openfnx)
1368 {
1369         CE ce;
1370
1371         ce = mh_xcalloc(1, sizeof(*ce));
1372
1373         ct->c_cefile     = ce;
1374         ct->c_ceopenfnx  = openfnx;
1375         ct->c_ceclosefnx = close_encoding;
1376         ct->c_cesizefnx  = size_encoding;
1377
1378         return OK;
1379 }
1380
1381
1382 void
1383 close_encoding(CT ct)
1384 {
1385         CE ce;
1386
1387         if (!(ce = ct->c_cefile))
1388                 return;
1389
1390         if (ce->ce_fp) {
1391                 fclose(ce->ce_fp);
1392                 ce->ce_fp = NULL;
1393         }
1394 }
1395
1396
1397 static unsigned long
1398 size_encoding(CT ct)
1399 {
1400         int fd;
1401         unsigned long size;
1402         char *file;
1403         CE ce;
1404         struct stat st;
1405
1406         if (!(ce = ct->c_cefile))
1407                 return (ct->c_end - ct->c_begin);
1408
1409         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1410                 return (long) st.st_size;
1411
1412         if (ce->ce_file) {
1413                 if (stat(ce->ce_file, &st) != NOTOK)
1414                         return (long) st.st_size;
1415                 else
1416                         return 0L;
1417         }
1418
1419         if (ct->c_encoding == CE_EXTERNAL)
1420                 return (ct->c_end - ct->c_begin);
1421
1422         file = NULL;
1423         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1424                 return (ct->c_end - ct->c_begin);
1425
1426         if (fstat(fd, &st) != NOTOK)
1427                 size = (long) st.st_size;
1428         else
1429                 size = 0L;
1430
1431         (*ct->c_ceclosefnx) (ct);
1432         return size;
1433 }
1434
1435
1436 /*
1437 ** BASE64
1438 */
1439
1440 static unsigned char b642nib[0x80] = {
1441         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1442         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1443         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1444         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1445         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1446         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1447         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1448         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1450         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1451         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1452         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1453         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1454         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1455         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1456         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1457 };
1458
1459
1460 static int
1461 InitBase64(CT ct)
1462 {
1463         return init_encoding(ct, openBase64);
1464 }
1465
1466
1467 static int
1468 openBase64(CT ct, char **file)
1469 {
1470         int bitno, cc;
1471         int fd, len, skip, own_ct_fp = 0;
1472         unsigned long bits;
1473         unsigned char value, *b, *b1, *b2, *b3;
1474         unsigned char *cp, *ep;
1475         char buffer[BUFSIZ];
1476         /* sbeck -- handle suffixes */
1477         CI ci;
1478         CE ce;
1479
1480         b  = (unsigned char *) &bits;
1481         b1 = &b[endian > 0 ? 1 : 2];
1482         b2 = &b[endian > 0 ? 2 : 1];
1483         b3 = &b[endian > 0 ? 3 : 0];
1484
1485         ce = ct->c_cefile;
1486         if (ce->ce_fp) {
1487                 fseek(ce->ce_fp, 0L, SEEK_SET);
1488                 goto ready_to_go;
1489         }
1490
1491         if (ce->ce_file) {
1492                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1493                         content_error(ce->ce_file, ct,
1494                                         "unable to fopen for reading");
1495                         return NOTOK;
1496                 }
1497                 goto ready_to_go;
1498         }
1499
1500         if (*file == NULL) {
1501                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1502                 ce->ce_unlink = 1;
1503         } else {
1504                 ce->ce_file = mh_xstrdup(*file);
1505                 ce->ce_unlink = 0;
1506         }
1507
1508         /* sbeck@cise.ufl.edu -- handle suffixes */
1509         ci = &ct->c_ctinfo;
1510         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1511                         invo_name, ci->ci_type, ci->ci_subtype);
1512         cp = context_find(buffer);
1513         if (cp == NULL || *cp == '\0') {
1514                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1515                                 ci->ci_type);
1516                 cp = context_find(buffer);
1517         }
1518         if (cp != NULL && *cp != '\0') {
1519                 if (ce->ce_unlink) {
1520                         /*
1521                         ** Temporary file already exists, so we rename to
1522                         ** version with extension.
1523                         */
1524                         char *file_org = mh_xstrdup(ce->ce_file);
1525                         ce->ce_file = add(cp, ce->ce_file);
1526                         if (rename(file_org, ce->ce_file)) {
1527                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1528                                                 file_org);
1529                         }
1530                         mh_free0(&file_org);
1531
1532                 } else {
1533                         ce->ce_file = add(cp, ce->ce_file);
1534                 }
1535         }
1536
1537         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1538                 content_error(ce->ce_file, ct,
1539                                 "unable to fopen for reading/writing");
1540                 return NOTOK;
1541         }
1542
1543         if ((len = ct->c_end - ct->c_begin) < 0)
1544                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1545
1546         if (!ct->c_fp) {
1547                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1548                         content_error(ct->c_file, ct,
1549                                         "unable to open for reading");
1550                         return NOTOK;
1551                 }
1552                 own_ct_fp = 1;
1553         }
1554
1555         bitno = 18;
1556         bits = 0L;
1557         skip = 0;
1558
1559         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1560         while (len > 0) {
1561                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1562                 case NOTOK:
1563                         content_error(ct->c_file, ct, "error reading from");
1564                         goto clean_up;
1565
1566                 case OK:
1567                         content_error(NULL, ct, "premature eof");
1568                         goto clean_up;
1569
1570                 default:
1571                         if (cc > len)
1572                                 cc = len;
1573                         len -= cc;
1574
1575                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1576                                 switch (*cp) {
1577                                 default:
1578                                         if (isspace(*cp))
1579                                                 break;
1580                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1581                                                 if (debugsw) {
1582                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1583                                                 }
1584                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1585                                                 continue;
1586                                         }
1587
1588                                         bits |= value << bitno;
1589 test_end:
1590                                         if ((bitno -= 6) < 0) {
1591                                                 putc((char) *b1, ce->ce_fp);
1592                                                 if (skip < 2) {
1593                                                         putc((char) *b2, ce->ce_fp);
1594                                                         if (skip < 1) {
1595                                                                 putc((char) *b3, ce->ce_fp);
1596                                                         }
1597                                                 }
1598
1599                                                 if (ferror(ce->ce_fp)) {
1600                                                         content_error(ce->ce_file, ct,
1601                                                                                    "error writing to");
1602                                                         goto clean_up;
1603                                                 }
1604                                                 bitno = 18, bits = 0L, skip = 0;
1605                                         }
1606                                         break;
1607
1608                                 case '=':
1609                                         if (++skip > 3)
1610                                                 goto self_delimiting;
1611                                         goto test_end;
1612                                 }
1613                         }
1614                 }
1615         }
1616
1617         if (bitno != 18) {
1618                 if (debugsw)
1619                         fprintf(stderr, "premature ending (bitno %d)\n",
1620                                         bitno);
1621
1622                 content_error(NULL, ct, "invalid BASE64 encoding");
1623                 goto clean_up;
1624         }
1625
1626 self_delimiting:
1627         fseek(ct->c_fp, 0L, SEEK_SET);
1628
1629         if (fflush(ce->ce_fp)) {
1630                 content_error(ce->ce_file, ct, "error writing to");
1631                 goto clean_up;
1632         }
1633
1634         fseek(ce->ce_fp, 0L, SEEK_SET);
1635
1636 ready_to_go:
1637         *file = ce->ce_file;
1638         if (own_ct_fp) {
1639                 fclose(ct->c_fp);
1640                 ct->c_fp = NULL;
1641         }
1642         return fileno(ce->ce_fp);
1643
1644 clean_up:
1645         free_encoding(ct, 0);
1646         if (own_ct_fp) {
1647                 fclose(ct->c_fp);
1648                 ct->c_fp = NULL;
1649         }
1650         return NOTOK;
1651 }
1652
1653
1654 /*
1655 ** QUOTED PRINTABLE
1656 */
1657
1658 static char hex2nib[0x80] = {
1659         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1660         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1661         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1662         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1663         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1664         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1666         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1668         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1672         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1675 };
1676
1677
1678 static int
1679 InitQuoted(CT ct)
1680 {
1681         return init_encoding(ct, openQuoted);
1682 }
1683
1684
1685 static int
1686 openQuoted(CT ct, char **file)
1687 {
1688         int cc, len, quoted, own_ct_fp = 0;
1689         unsigned char *cp, *ep;
1690         char buffer[BUFSIZ];
1691         unsigned char mask = 0;
1692         CE ce;
1693         /* sbeck -- handle suffixes */
1694         CI ci;
1695
1696         ce = ct->c_cefile;
1697         if (ce->ce_fp) {
1698                 fseek(ce->ce_fp, 0L, SEEK_SET);
1699                 goto ready_to_go;
1700         }
1701
1702         if (ce->ce_file) {
1703                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1704                         content_error(ce->ce_file, ct,
1705                                         "unable to fopen for reading");
1706                         return NOTOK;
1707                 }
1708                 goto ready_to_go;
1709         }
1710
1711         if (*file == NULL) {
1712                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1713                 ce->ce_unlink = 1;
1714         } else {
1715                 ce->ce_file = mh_xstrdup(*file);
1716                 ce->ce_unlink = 0;
1717         }
1718
1719         /* sbeck@cise.ufl.edu -- handle suffixes */
1720         ci = &ct->c_ctinfo;
1721         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1722                         invo_name, ci->ci_type, ci->ci_subtype);
1723         cp = context_find(buffer);
1724         if (cp == NULL || *cp == '\0') {
1725                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1726                                 ci->ci_type);
1727                 cp = context_find(buffer);
1728         }
1729         if (cp != NULL && *cp != '\0') {
1730                 if (ce->ce_unlink) {
1731                         /*
1732                         ** Temporary file already exists, so we rename to
1733                         ** version with extension.
1734                         */
1735                         char *file_org = mh_xstrdup(ce->ce_file);
1736                         ce->ce_file = add(cp, ce->ce_file);
1737                         if (rename(file_org, ce->ce_file)) {
1738                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1739                                                 file_org);
1740                         }
1741                         mh_free0(&file_org);
1742
1743                 } else {
1744                         ce->ce_file = add(cp, ce->ce_file);
1745                 }
1746         }
1747
1748         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1749                 content_error(ce->ce_file, ct,
1750                                 "unable to fopen for reading/writing");
1751                 return NOTOK;
1752         }
1753
1754         if ((len = ct->c_end - ct->c_begin) < 0)
1755                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1756
1757         if (!ct->c_fp) {
1758                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1759                         content_error(ct->c_file, ct,
1760                                         "unable to open for reading");
1761                         return NOTOK;
1762                 }
1763                 own_ct_fp = 1;
1764         }
1765
1766         quoted = 0;
1767
1768         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1769         while (len > 0) {
1770                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1771                         content_error(NULL, ct, "premature eof");
1772                         goto clean_up;
1773                 }
1774
1775                 if ((cc = strlen(buffer)) > len)
1776                         cc = len;
1777                 len -= cc;
1778
1779                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1780                         if (!isspace(*ep))
1781                                 break;
1782                 *++ep = '\n', ep++;
1783
1784                 for (; cp < ep; cp++) {
1785                         if (quoted > 0) {
1786                                 /* in an escape sequence */
1787                                 if (quoted == 1) {
1788                                         /* at byte 1 of an escape sequence */
1789                                         mask = hex2nib[*cp & 0x7f];
1790                                         /* next is byte 2 */
1791                                         quoted = 2;
1792                                 } else {
1793                                         /* at byte 2 of an escape sequence */
1794                                         mask <<= 4;
1795                                         mask |= hex2nib[*cp & 0x7f];
1796                                         putc(mask, ce->ce_fp);
1797                                         if (ferror(ce->ce_fp)) {
1798                                                 content_error(ce->ce_file, ct, "error writing to");
1799                                                 goto clean_up;
1800                                         }
1801                                         /*
1802                                         ** finished escape sequence; next may
1803                                         ** be literal or a new escape sequence
1804                                         */
1805                                         quoted = 0;
1806                                 }
1807                                 /* on to next byte */
1808                                 continue;
1809                         }
1810
1811                         /* not in an escape sequence */
1812                         if (*cp == '=') {
1813                                 /*
1814                                 ** starting an escape sequence,
1815                                 ** or invalid '='?
1816                                 */
1817                                 if (cp + 1 < ep && cp[1] == '\n') {
1818                                         /* "=\n" soft line break, eat the \n */
1819                                         cp++;
1820                                         continue;
1821                                 }
1822                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1823                                         /*
1824                                         ** We don't have 2 bytes left,
1825                                         ** so this is an invalid escape
1826                                         ** sequence; just show the raw bytes
1827                                         ** (below).
1828                                         */
1829                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1830                                         /*
1831                                         ** Next 2 bytes are hex digits,
1832                                         ** making this a valid escape
1833                                         ** sequence; let's decode it (above).
1834                                         */
1835                                         quoted = 1;
1836                                         continue;
1837                                 } else {
1838                                         /*
1839                                         ** One or both of the next 2 is
1840                                         ** out of range, making this an
1841                                         ** invalid escape sequence; just
1842                                         ** show the raw bytes (below).
1843                                         */
1844                                 }
1845                         }
1846
1847                         /* Just show the raw byte. */
1848                         putc(*cp, ce->ce_fp);
1849                         if (ferror(ce->ce_fp)) {
1850                                 content_error(ce->ce_file, ct,
1851                                                 "error writing to");
1852                                 goto clean_up;
1853                         }
1854                 }
1855         }
1856         if (quoted) {
1857                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1858                 goto clean_up;
1859         }
1860
1861         fseek(ct->c_fp, 0L, SEEK_SET);
1862
1863         if (fflush(ce->ce_fp)) {
1864                 content_error(ce->ce_file, ct, "error writing to");
1865                 goto clean_up;
1866         }
1867
1868         fseek(ce->ce_fp, 0L, SEEK_SET);
1869
1870 ready_to_go:
1871         *file = ce->ce_file;
1872         if (own_ct_fp) {
1873                 fclose(ct->c_fp);
1874                 ct->c_fp = NULL;
1875         }
1876         return fileno(ce->ce_fp);
1877
1878 clean_up:
1879         free_encoding(ct, 0);
1880         if (own_ct_fp) {
1881                 fclose(ct->c_fp);
1882                 ct->c_fp = NULL;
1883         }
1884         return NOTOK;
1885 }
1886
1887
1888 /*
1889 ** 7BIT
1890 */
1891
1892 static int
1893 Init7Bit(CT ct)
1894 {
1895         if (init_encoding(ct, open7Bit) == NOTOK)
1896                 return NOTOK;
1897
1898         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1899         return OK;
1900 }
1901
1902
1903 int
1904 open7Bit(CT ct, char **file)
1905 {
1906         int cc, fd, len, own_ct_fp = 0;
1907         char buffer[BUFSIZ];
1908         /* sbeck -- handle suffixes */
1909         char *cp;
1910         CI ci;
1911         CE ce;
1912
1913         ce = ct->c_cefile;
1914         if (ce->ce_fp) {
1915                 fseek(ce->ce_fp, 0L, SEEK_SET);
1916                 goto ready_to_go;
1917         }
1918
1919         if (ce->ce_file) {
1920                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1921                         content_error(ce->ce_file, ct,
1922                                         "unable to fopen for reading");
1923                         return NOTOK;
1924                 }
1925                 goto ready_to_go;
1926         }
1927
1928         if (*file == NULL) {
1929                 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1930                 ce->ce_unlink = 1;
1931         } else {
1932                 ce->ce_file = mh_xstrdup(*file);
1933                 ce->ce_unlink = 0;
1934         }
1935
1936         /* sbeck@cise.ufl.edu -- handle suffixes */
1937         ci = &ct->c_ctinfo;
1938         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1939                         invo_name, ci->ci_type, ci->ci_subtype);
1940         cp = context_find(buffer);
1941         if (cp == NULL || *cp == '\0') {
1942                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1943                                 ci->ci_type);
1944                 cp = context_find(buffer);
1945         }
1946         if (cp != NULL && *cp != '\0') {
1947                 if (ce->ce_unlink) {
1948                         /*
1949                         ** Temporary file already exists, so we rename to
1950                         ** version with extension.
1951                         */
1952                         char *file_org = mh_xstrdup(ce->ce_file);
1953                         ce->ce_file = add(cp, ce->ce_file);
1954                         if (rename(file_org, ce->ce_file)) {
1955                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1956                                                 file_org);
1957                         }
1958                         mh_free0(&file_org);
1959
1960                 } else {
1961                         ce->ce_file = add(cp, ce->ce_file);
1962                 }
1963         }
1964
1965         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1966                 content_error(ce->ce_file, ct,
1967                                 "unable to fopen for reading/writing");
1968                 return NOTOK;
1969         }
1970
1971         if (ct->c_type == CT_MULTIPART) {
1972                 char **ap, **ep;
1973                 CI ci = &ct->c_ctinfo;
1974
1975                 len = 0;
1976                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1977                                 ci->ci_subtype);
1978                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1979                                 strlen(ci->ci_subtype);
1980                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1981                         putc(';', ce->ce_fp);
1982                         len++;
1983
1984                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1985                                         *ap, *ep);
1986
1987                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1988                                 fputs("\n\t", ce->ce_fp);
1989                                 len = 8;
1990                         } else {
1991                                 putc(' ', ce->ce_fp);
1992                                 len++;
1993                         }
1994                         fprintf(ce->ce_fp, "%s", buffer);
1995                         len += cc;
1996                 }
1997
1998                 if (ci->ci_comment) {
1999                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2000                                                 >= CPERLIN) {
2001                                 fputs("\n\t", ce->ce_fp);
2002                                 len = 8;
2003                         } else {
2004                                 putc(' ', ce->ce_fp);
2005                                 len++;
2006                         }
2007                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2008                         len += cc;
2009                 }
2010                 fprintf(ce->ce_fp, "\n");
2011                 if (ct->c_id)
2012                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2013                 if (ct->c_descr)
2014                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2015                 if (ct->c_dispo)
2016                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2017                 fprintf(ce->ce_fp, "\n");
2018         }
2019
2020         if ((len = ct->c_end - ct->c_begin) < 0)
2021                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2022
2023         if (!ct->c_fp) {
2024                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2025                         content_error(ct->c_file, ct,
2026                                         "unable to open for reading");
2027                         return NOTOK;
2028                 }
2029                 own_ct_fp = 1;
2030         }
2031
2032         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2033         while (len > 0)
2034                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2035                 case NOTOK:
2036                         content_error(ct->c_file, ct, "error reading from");
2037                         goto clean_up;
2038
2039                 case OK:
2040                         content_error(NULL, ct, "premature eof");
2041                         goto clean_up;
2042
2043                 default:
2044                         if (cc > len)
2045                                 cc = len;
2046                         len -= cc;
2047
2048                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2049                         if (ferror(ce->ce_fp)) {
2050                                 content_error(ce->ce_file, ct,
2051                                                 "error writing to");
2052                                 goto clean_up;
2053                         }
2054                 }
2055
2056         fseek(ct->c_fp, 0L, SEEK_SET);
2057
2058         if (fflush(ce->ce_fp)) {
2059                 content_error(ce->ce_file, ct, "error writing to");
2060                 goto clean_up;
2061         }
2062
2063         fseek(ce->ce_fp, 0L, SEEK_SET);
2064
2065 ready_to_go:
2066         *file = ce->ce_file;
2067         if (own_ct_fp) {
2068                 fclose(ct->c_fp);
2069                 ct->c_fp = NULL;
2070         }
2071         return fileno(ce->ce_fp);
2072
2073 clean_up:
2074         free_encoding(ct, 0);
2075         if (own_ct_fp) {
2076                 fclose(ct->c_fp);
2077                 ct->c_fp = NULL;
2078         }
2079         return NOTOK;
2080 }