Fix FTBFS bug introduced by 600379c
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <h/signals.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17
18 extern int debugsw;
19
20 extern int endian;  /* mhmisc.c */
21
22 extern pid_t xpid;  /* mhshowsbr.c  */
23
24 /*
25 ** Directory to place temp files.  This must
26 ** be set before these routines are called.
27 */
28 char *tmp;
29
30 /*
31 ** Structures for TEXT messages
32 */
33 struct k2v SubText[] = {
34         { "plain", TEXT_PLAIN },
35         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
36         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
37         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
38 };
39
40 struct k2v Charset[] = {
41         { "us-ascii",   CHARSET_USASCII },
42         { "iso-8859-1", CHARSET_LATIN },
43         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
44 };
45
46 /*
47 ** Structures for MULTIPART messages
48 */
49 struct k2v SubMultiPart[] = {
50         { "mixed",       MULTI_MIXED },
51         { "alternative", MULTI_ALTERNATE },
52         { "digest",      MULTI_DIGEST },
53         { "parallel",    MULTI_PARALLEL },
54         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
55 };
56
57 /*
58 ** Structures for MESSAGE messages
59 */
60 struct k2v SubMessage[] = {
61         { "rfc822",        MESSAGE_RFC822 },
62         { "partial",       MESSAGE_PARTIAL },
63         { "external-body", MESSAGE_EXTERNAL },
64         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
65 };
66
67 /*
68 ** Structure for APPLICATION messages
69 */
70 struct k2v SubApplication[] = {
71         { "octet-stream", APPLICATION_OCTETS },
72         { "postscript",   APPLICATION_POSTSCRIPT },
73         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
74 };
75
76
77 /* mhmisc.c */
78 int part_ok(CT, int);
79 int type_ok(CT, int);
80 int make_intermediates(char *);
81 void content_error(char *, CT, char *, ...);
82
83 /* mhfree.c */
84 void free_content(CT);
85 void free_encoding(CT, int);
86
87 /*
88 ** static prototypes
89 */
90 static CT get_content(FILE *, char *, int);
91 static int get_comment(CT, unsigned char **, int);
92
93 static int InitGeneric(CT);
94 static int InitText(CT);
95 static int InitMultiPart(CT);
96 static void reverse_parts(CT);
97 static int InitMessage(CT);
98 static int InitApplication(CT);
99 static int init_encoding(CT, OpenCEFunc);
100 static unsigned long size_encoding(CT);
101 static int InitBase64(CT);
102 static int openBase64(CT, char **);
103 static int InitQuoted(CT);
104 static int openQuoted(CT, char **);
105 static int Init7Bit(CT);
106
107 struct str2init str2cts[] = {
108         { "application", CT_APPLICATION, InitApplication },
109         { "audio",       CT_AUDIO,       InitGeneric },
110         { "image",       CT_IMAGE,       InitGeneric },
111         { "message",     CT_MESSAGE,     InitMessage },
112         { "multipart",   CT_MULTIPART,   InitMultiPart },
113         { "text",        CT_TEXT,        InitText },
114         { "video",       CT_VIDEO,       InitGeneric },
115         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
116         { NULL,          CT_UNKNOWN,     NULL },
117 };
118
119 struct str2init str2ces[] = {
120         { "base64",           CE_BASE64,    InitBase64 },
121         { "quoted-printable", CE_QUOTED,    InitQuoted },
122         { "8bit",             CE_8BIT,      Init7Bit },
123         { "7bit",             CE_7BIT,      Init7Bit },
124         { "binary",           CE_BINARY,    Init7Bit },
125         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
126         { NULL,               CE_UNKNOWN,    NULL },
127 };
128
129
130 int
131 pidcheck(int status)
132 {
133         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
134                 return status;
135
136         fflush(stdout);
137         fflush(stderr);
138         exit(1);
139         return 1;
140 }
141
142
143 /*
144 ** Main entry point for parsing a MIME message or file.
145 ** It returns the Content structure for the top level
146 ** entity in the file.
147 */
148 CT
149 parse_mime(char *file)
150 {
151         int is_stdin;
152         char buffer[BUFSIZ];
153         FILE *fp;
154         CT ct;
155
156         /*
157         ** Check if file is actually standard input
158         */
159         if ((is_stdin = (strcmp(file, "-")==0))) {
160                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
161                 if (tfile == NULL) {
162                         advise("mhparse", "unable to create temporary file");
163                         return NULL;
164                 }
165                 file = getcpy(tfile);
166                 chmod(file, 0600);
167
168                 while (fgets(buffer, sizeof(buffer), stdin))
169                         fputs(buffer, fp);
170                 fflush(fp);
171
172                 if (ferror(stdin)) {
173                         unlink(file);
174                         advise("stdin", "error reading");
175                         return NULL;
176                 }
177                 if (ferror(fp)) {
178                         unlink(file);
179                         advise(file, "error writing");
180                         return NULL;
181                 }
182                 fseek(fp, 0L, SEEK_SET);
183         } else if ((fp = fopen(file, "r")) == NULL) {
184                 advise(file, "unable to read");
185                 return NULL;
186         }
187
188         if (!(ct = get_content(fp, file, 1))) {
189                 if (is_stdin)
190                         unlink(file);
191                 advise(NULL, "unable to decode %s", file);
192                 return NULL;
193         }
194
195         if (is_stdin)
196                 ct->c_unlink = 1;  /* temp file to remove */
197
198         ct->c_fp = NULL;
199
200         if (ct->c_end == 0L) {
201                 fseek(fp, 0L, SEEK_END);
202                 ct->c_end = ftell(fp);
203         }
204
205         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
206                 fclose(fp);
207                 free_content(ct);
208                 return NULL;
209         }
210
211         fclose(fp);
212         return ct;
213 }
214
215
216 /*
217 ** Main routine for reading/parsing the headers
218 ** of a message content.
219 **
220 ** toplevel =  1   # we are at the top level of the message
221 ** toplevel =  0   # we are inside message type or multipart type
222 **                 # other than multipart/digest
223 ** toplevel = -1   # we are inside multipart/digest
224 ** NB: on failure we will fclose(in)!
225 */
226
227 static CT
228 get_content(FILE *in, char *file, int toplevel)
229 {
230         int compnum, state;
231         char buf[BUFSIZ], name[NAMESZ];
232         char *np, *vp;
233         CT ct;
234         HF hp;
235
236         /* allocate the content structure */
237         if (!(ct = (CT) calloc(1, sizeof(*ct))))
238                 adios(NULL, "out of memory");
239
240         ct->c_fp = in;
241         ct->c_file = getcpy(file);
242         ct->c_begin = ftell(ct->c_fp) + 1;
243
244         /*
245         ** Parse the header fields for this
246         ** content into a linked list.
247         */
248         for (compnum = 1, state = FLD;;) {
249                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
250                 case FLD:
251                 case FLDPLUS:
252                 case FLDEOF:
253                         compnum++;
254
255                         /* get copies of the buffers */
256                         np = getcpy(name);
257                         vp = getcpy(buf);
258
259                         /* if necessary, get rest of field */
260                         while (state == FLDPLUS) {
261                                 state = m_getfld(state, name, buf,
262                                                 sizeof(buf), in);
263                                 vp = add(buf, vp);  /* add to previous value */
264                         }
265
266                         /* Now add the header data to the list */
267                         add_header(ct, np, vp);
268
269                         /* continue, if this isn't the last header field */
270                         if (state != FLDEOF) {
271                                 ct->c_begin = ftell(in) + 1;
272                                 continue;
273                         }
274                         /* else fall... */
275
276                 case BODY:
277                 case BODYEOF:
278                         ct->c_begin = ftell(in) - strlen(buf);
279                         break;
280
281                 case FILEEOF:
282                         ct->c_begin = ftell(in);
283                         break;
284
285                 case LENERR:
286                 case FMTERR:
287                         adios(NULL, "message format error in component #%d",
288                                         compnum);
289
290                 default:
291                         adios(NULL, "getfld() returned %d", state);
292                 }
293
294                 /* break out of the loop */
295                 break;
296         }
297
298         /*
299         ** Read the content headers.  We will parse the
300         ** MIME related header fields into their various
301         ** structures and set internal flags related to
302         ** content type/subtype, etc.
303         */
304
305         hp = ct->c_first_hf;  /* start at first header field */
306         while (hp) {
307                 /* Get MIME-Version field */
308                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
309                         int ucmp;
310                         char c;
311                         unsigned char *cp, *dp;
312
313                         if (ct->c_vrsn) {
314                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
315                                 goto next_header;
316                         }
317                         ct->c_vrsn = getcpy(hp->value);
318
319                         /* Now, cleanup this field */
320                         cp = ct->c_vrsn;
321
322                         while (isspace(*cp))
323                                 cp++;
324                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
325                                 *dp++ = ' ';
326                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
327                                 if (!isspace(*dp))
328                                         break;
329                         *++dp = '\0';
330                         if (debugsw)
331                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
332
333                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
334                                 goto out;
335
336                         for (dp = cp; istoken(*dp); dp++)
337                                 continue;
338                         c = *dp;
339                         *dp = '\0';
340                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
341                         *dp = c;
342                         if (!ucmp) {
343                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
344                         }
345
346                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
347                         /* Get Content-Type field */
348                         struct str2init *s2i;
349                         CI ci = &ct->c_ctinfo;
350
351                         /* Check if we've already seen a Content-Type header */
352                         if (ct->c_ctline) {
353                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
354                                 goto next_header;
355                         }
356
357                         /* Parse the Content-Type field */
358                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
359                                 goto out;
360
361                         /*
362                         ** Set the Init function and the internal
363                         ** flag for this content type.
364                         */
365                         for (s2i = str2cts; s2i->si_key; s2i++)
366                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
367                                         break;
368                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
369                                 s2i++;
370                         ct->c_type = s2i->si_val;
371                         ct->c_ctinitfnx = s2i->si_init;
372
373                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
374                         /* Get Content-Transfer-Encoding field */
375                         char c;
376                         unsigned char *cp, *dp;
377                         struct str2init *s2i;
378
379                         /*
380                         ** Check if we've already seen the
381                         ** Content-Transfer-Encoding field
382                         */
383                         if (ct->c_celine) {
384                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
385                                 goto next_header;
386                         }
387
388                         /* get copy of this field */
389                         ct->c_celine = cp = getcpy(hp->value);
390
391                         while (isspace(*cp))
392                                 cp++;
393                         for (dp = cp; istoken(*dp); dp++)
394                                 continue;
395                         c = *dp;
396                         *dp = '\0';
397
398                         /*
399                         ** Find the internal flag and Init function
400                         ** for this transfer encoding.
401                         */
402                         for (s2i = str2ces; s2i->si_key; s2i++)
403                                 if (!mh_strcasecmp(cp, s2i->si_key))
404                                         break;
405                         if (!s2i->si_key && !uprf(cp, "X-"))
406                                 s2i++;
407                         *dp = c;
408                         ct->c_encoding = s2i->si_val;
409
410                         /* Call the Init function for this encoding */
411                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
412                                 goto out;
413
414                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
415                         /* Get Content-ID field */
416                         ct->c_id = add(hp->value, ct->c_id);
417
418                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
419                         /* Get Content-Description field */
420                         ct->c_descr = add(hp->value, ct->c_descr);
421
422                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
423                         /* Get Content-Disposition field */
424                         ct->c_dispo = add(hp->value, ct->c_dispo);
425                 }
426
427 next_header:
428                 hp = hp->next;  /* next header field */
429         }
430
431         /*
432         ** Check if we saw a Content-Type field.
433         ** If not, then assign a default value for
434         ** it, and the Init function.
435         */
436         if (!ct->c_ctline) {
437                 /*
438                 ** If we are inside a multipart/digest message,
439                 ** so default type is message/rfc822
440                 */
441                 if (toplevel < 0) {
442                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
443                                 goto out;
444                         ct->c_type = CT_MESSAGE;
445                         ct->c_ctinitfnx = InitMessage;
446                 } else {
447                         /*
448                         ** Else default type is text/plain
449                         */
450                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
451                                 goto out;
452                         ct->c_type = CT_TEXT;
453                         ct->c_ctinitfnx = InitText;
454                 }
455         }
456
457         /* Use default Transfer-Encoding, if necessary */
458         if (!ct->c_celine) {
459                 ct->c_encoding = CE_7BIT;
460                 Init7Bit(ct);
461         }
462
463         return ct;
464
465 out:
466         free_content(ct);
467         return NULL;
468 }
469
470
471 /*
472 ** small routine to add header field to list
473 */
474
475 int
476 add_header(CT ct, char *name, char *value)
477 {
478         HF hp;
479
480         /* allocate header field structure */
481         hp = mh_xmalloc(sizeof(*hp));
482
483         /* link data into header structure */
484         hp->name = name;
485         hp->value = value;
486         hp->next = NULL;
487
488         /* link header structure into the list */
489         if (ct->c_first_hf == NULL) {
490                 ct->c_first_hf = hp;  /* this is the first */
491                 ct->c_last_hf = hp;
492         } else {
493                 ct->c_last_hf->next = hp;  /* add it to the end */
494                 ct->c_last_hf = hp;
495         }
496
497         return 0;
498 }
499
500
501 /*
502 ** Make sure that buf contains at least one appearance of name,
503 ** followed by =.  If not, insert both name and value, just after
504 ** first semicolon, if any.  Note that name should not contain a
505 ** trailing =.  And quotes will be added around the value.  Typical
506 ** usage:  make sure that a Content-Disposition header contains
507 ** filename="foo".  If it doesn't and value does, use value from
508 ** that.
509 */
510 static char *
511 incl_name_value(unsigned char *buf, char *name, char *value) {
512         char *newbuf = buf;
513
514         /* Assume that name is non-null. */
515         if (buf && value) {
516                 char *name_plus_equal = concat(name, "=", NULL);
517
518                 if (!strstr(buf, name_plus_equal)) {
519                         char *insertion;
520                         unsigned char *cp;
521                         char *prefix, *suffix;
522
523                         /* Trim trailing space, esp. newline. */
524                         for (cp = &buf[strlen(buf) - 1];
525                                          cp >= buf && isspace(*cp); --cp) {
526                                 *cp = '\0';
527                         }
528
529                         insertion = concat("; ", name, "=", "\"", value, "\"",
530                                         NULL);
531
532                         /*
533                         ** Insert at first semicolon, if any.
534                         ** If none, append to end.
535                         */
536                         prefix = getcpy(buf);
537                         if ((cp = strchr(prefix, ';'))) {
538                                 suffix = concat(cp, NULL);
539                                 *cp = '\0';
540                                 newbuf = concat(prefix, insertion, suffix,
541                                                 "\n", NULL);
542                                 free(suffix);
543                         } else {
544                                 /* Append to end. */
545                                 newbuf = concat(buf, insertion, "\n", NULL);
546                         }
547
548                         free(prefix);
549                         free(insertion);
550                         free(buf);
551                 }
552
553                 free(name_plus_equal);
554         }
555
556         return newbuf;
557 }
558
559 /*
560 ** Extract just name_suffix="foo", if any, from value.  If there isn't
561 ** one, return the entire value.  Note that, for example, a name_suffix
562 ** of name will match filename="foo", and return foo.
563 */
564 static char *
565 extract_name_value(char *name_suffix, char *value) {
566         char *extracted_name_value = value;
567         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
568         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
569         char *cp;
570
571         free(name_suffix_plus_quote);
572         if (name_suffix_equals) {
573                 char *name_suffix_begin;
574
575                 /* Find first \". */
576                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
577                         ;
578                 name_suffix_begin = ++cp;
579                 /* Find second \". */
580                 for (; *cp != '"'; ++cp)
581                         ;
582
583                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
584                 memcpy(extracted_name_value, name_suffix_begin,
585                                 cp - name_suffix_begin);
586                 extracted_name_value[cp - name_suffix_begin] = '\0';
587         }
588
589         return extracted_name_value;
590 }
591
592 /*
593 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
594 ** directives.  Fills in the information of the CTinfo structure.
595 */
596 int
597 get_ctinfo(unsigned char *cp, CT ct, int magic)
598 {
599         int i;
600         unsigned char *dp;
601         char **ap, **ep;
602         char c;
603         CI ci;
604
605         ci = &ct->c_ctinfo;
606         i = strlen(invo_name) + 2;
607
608         /* store copy of Content-Type line */
609         cp = ct->c_ctline = getcpy(cp);
610
611         while (isspace(*cp))  /* trim leading spaces */
612                 cp++;
613
614         /* change newlines to spaces */
615         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
616                 *dp++ = ' ';
617
618         /* trim trailing spaces */
619         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
620                 if (!isspace(*dp))
621                         break;
622         *++dp = '\0';
623
624         if (debugsw)
625                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
626
627         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
628                 return NOTOK;
629
630         for (dp = cp; istoken(*dp); dp++)
631                 continue;
632         c = *dp, *dp = '\0';
633         ci->ci_type = getcpy(cp);  /* store content type */
634         *dp = c, cp = dp;
635
636         if (!*ci->ci_type) {
637                 advise(NULL, "invalid %s: field in message %s (empty type)",
638                                 TYPE_FIELD, ct->c_file);
639                 return NOTOK;
640         }
641
642         /* down case the content type string */
643         for (dp = ci->ci_type; *dp; dp++)
644                 if (isalpha(*dp) && isupper(*dp))
645                         *dp = tolower(*dp);
646
647         while (isspace(*cp))
648                 cp++;
649
650         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
651                 return NOTOK;
652
653         if (*cp != '/') {
654                 if (!magic)
655                         ci->ci_subtype = getcpy("");
656                 goto magic_skip;
657         }
658
659         cp++;
660         while (isspace(*cp))
661                 cp++;
662
663         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
664                 return NOTOK;
665
666         for (dp = cp; istoken(*dp); dp++)
667                 continue;
668         c = *dp, *dp = '\0';
669         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
670         *dp = c, cp = dp;
671
672         if (!*ci->ci_subtype) {
673                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
674                 return NOTOK;
675         }
676
677         /* down case the content subtype string */
678         for (dp = ci->ci_subtype; *dp; dp++)
679                 if (isalpha(*dp) && isupper(*dp))
680                         *dp = tolower(*dp);
681
682 magic_skip:
683         while (isspace(*cp))
684                 cp++;
685
686         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
687                 return NOTOK;
688
689         /*
690         ** Parse attribute/value pairs given with Content-Type
691         */
692         ep = (ap = ci->ci_attrs) + NPARMS;
693         while (*cp == ';') {
694                 char *vp;
695                 unsigned char *up;
696
697                 if (ap >= ep) {
698                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
699                         return NOTOK;
700                 }
701
702                 cp++;
703                 while (isspace(*cp))
704                         cp++;
705
706                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
707                         return NOTOK;
708
709                 if (*cp == 0) {
710                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
711                         return OK;
712                 }
713
714                 /* down case the attribute name */
715                 for (dp = cp; istoken(*dp); dp++)
716                         if (isalpha(*dp) && isupper(*dp))
717                                 *dp = tolower(*dp);
718
719                 for (up = dp; isspace(*dp);)
720                         dp++;
721                 if (dp == cp || *dp != '=') {
722                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
723                         return NOTOK;
724                 }
725
726                 vp = (*ap = getcpy(cp)) + (up - cp);
727                 *vp = '\0';
728                 for (dp++; isspace(*dp);)
729                         dp++;
730
731                 /* now add the attribute value */
732                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
733
734                 if (*dp == '"') {
735                         for (cp = ++dp, dp = vp;;) {
736                                 switch (c = *cp++) {
737                                 case '\0':
738 bad_quote:
739                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
740                                         return NOTOK;
741
742                                 case '\\':
743                                         *dp++ = c;
744                                         if ((c = *cp++) == '\0')
745                                                 goto bad_quote;
746                                         /* else fall... */
747
748                                 default:
749                                         *dp++ = c;
750                                         continue;
751
752                                 case '"':
753                                         *dp = '\0';
754                                         break;
755                                 }
756                                 break;
757                         }
758                 } else {
759                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
760                                 continue;
761                         *dp = '\0';
762                 }
763                 if (!*vp) {
764                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
765                         return NOTOK;
766                 }
767                 ap++;
768
769                 while (isspace(*cp))
770                         cp++;
771
772                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
773                         return NOTOK;
774         }
775
776         /*
777         ** Get any <Content-Id> given in buffer
778         */
779         if (magic && *cp == '<') {
780                 if (ct->c_id) {
781                         free(ct->c_id);
782                         ct->c_id = NULL;
783                 }
784                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
785                         advise(NULL, "invalid ID in message %s", ct->c_file);
786                         return NOTOK;
787                 }
788                 c = *dp;
789                 *dp = '\0';
790                 if (*ct->c_id)
791                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
792                 else
793                         ct->c_id = NULL;
794                 *dp++ = c;
795                 cp = dp;
796
797                 while (isspace(*cp))
798                         cp++;
799         }
800
801         /*
802         ** Get any [Content-Description] given in buffer.
803         */
804         if (magic && *cp == '[') {
805                 ct->c_descr = ++cp;
806                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
807                         if (*dp == ']')
808                                 break;
809                 if (dp < cp) {
810                         advise(NULL, "invalid description in message %s",
811                                         ct->c_file);
812                         ct->c_descr = NULL;
813                         return NOTOK;
814                 }
815
816                 c = *dp;
817                 *dp = '\0';
818                 if (*ct->c_descr)
819                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
820                 else
821                         ct->c_descr = NULL;
822                 *dp++ = c;
823                 cp = dp;
824
825                 while (isspace(*cp))
826                         cp++;
827         }
828
829         /*
830         ** Get any {Content-Disposition} given in buffer.
831         */
832         if (magic && *cp == '{') {
833                 ct->c_dispo = ++cp;
834                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
835                         if (*dp == '}')
836                                 break;
837                 if (dp < cp) {
838                         advise(NULL, "invalid disposition in message %s",
839                                         ct->c_file);
840                         ct->c_dispo = NULL;
841                         return NOTOK;
842                 }
843
844                 c = *dp;
845                 *dp = '\0';
846                 if (*ct->c_dispo)
847                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
848                 else
849                         ct->c_dispo = NULL;
850                 *dp++ = c;
851                 cp = dp;
852
853                 while (isspace(*cp))
854                         cp++;
855         }
856
857         /*
858         ** Check if anything is left over
859         */
860         if (*cp) {
861                 if (magic) {
862                         ci->ci_magic = getcpy(cp);
863
864                         /*
865                         ** If there is a Content-Disposition header and
866                         ** it doesn't have a *filename=, extract it from
867                         ** the magic contents.  The mhbasename call skips
868                         ** any leading directory components.
869                         */
870                         if (ct->c_dispo)
871                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
872                         } else
873                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
874         }
875
876         return OK;
877 }
878
879
880 static int
881 get_comment(CT ct, unsigned char **ap, int istype)
882 {
883         int i;
884         char *bp;
885         unsigned char *cp;
886         char c, buffer[BUFSIZ], *dp;
887         CI ci;
888
889         ci = &ct->c_ctinfo;
890         cp = *ap;
891         bp = buffer;
892         cp++;
893
894         for (i = 0;;) {
895                 switch (c = *cp++) {
896                 case '\0':
897 invalid:
898                 advise(NULL, "invalid comment in message %s's %s: field",
899                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
900                 return NOTOK;
901
902                 case '\\':
903                         *bp++ = c;
904                         if ((c = *cp++) == '\0')
905                                 goto invalid;
906                         *bp++ = c;
907                         continue;
908
909                 case '(':
910                         i++;
911                         /* and fall... */
912                 default:
913                         *bp++ = c;
914                         continue;
915
916                 case ')':
917                         if (--i < 0)
918                                 break;
919                         *bp++ = c;
920                         continue;
921                 }
922                 break;
923         }
924         *bp = '\0';
925
926         if (istype) {
927                 if ((dp = ci->ci_comment)) {
928                         ci->ci_comment = concat(dp, " ", buffer, NULL);
929                         free(dp);
930                 } else {
931                         ci->ci_comment = getcpy(buffer);
932                 }
933         }
934
935         while (isspace(*cp))
936                 cp++;
937
938         *ap = cp;
939         return OK;
940 }
941
942
943 /*
944 ** CONTENTS
945 **
946 ** Handles content types audio, image, and video.
947 ** There's not much to do right here.
948 */
949
950 static int
951 InitGeneric(CT ct)
952 {
953         return OK;  /* not much to do here */
954 }
955
956
957 /*
958 ** TEXT
959 */
960
961 static int
962 InitText(CT ct)
963 {
964         char **ap, **ep;
965         struct k2v *kv;
966         struct text *t;
967         CI ci = &ct->c_ctinfo;
968
969         /* check for missing subtype */
970         if (!*ci->ci_subtype)
971                 ci->ci_subtype = add("plain", ci->ci_subtype);
972
973         /* match subtype */
974         for (kv = SubText; kv->kv_key; kv++)
975                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
976                         break;
977         ct->c_subtype = kv->kv_value;
978
979         /* allocate text character set structure */
980         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
981                 adios(NULL, "out of memory");
982         ct->c_ctparams = (void *) t;
983
984         /* scan for charset parameter */
985         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
986                 if (!mh_strcasecmp(*ap, "charset"))
987                         break;
988
989         /* check if content specified a character set */
990         if (*ap) {
991                 /* store its name */
992                 ct->c_charset = getcpy(norm_charmap(*ep));
993                 /* match character set or set to CHARSET_UNKNOWN */
994                 for (kv = Charset; kv->kv_key; kv++) {
995                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
996                                 break;
997                         }
998                 }
999                 t->tx_charset = kv->kv_value;
1000         } else {
1001                 t->tx_charset = CHARSET_UNSPECIFIED;
1002         }
1003
1004         return OK;
1005 }
1006
1007
1008 /*
1009 ** MULTIPART
1010 */
1011
1012 static int
1013 InitMultiPart(CT ct)
1014 {
1015         int inout;
1016         long last, pos;
1017         unsigned char *cp, *dp;
1018         char **ap, **ep;
1019         char *bp, buffer[BUFSIZ];
1020         struct multipart *m;
1021         struct k2v *kv;
1022         struct part *part, **next;
1023         CI ci = &ct->c_ctinfo;
1024         CT p;
1025         FILE *fp;
1026
1027         /*
1028         ** The encoding for multipart messages must be either
1029         ** 7bit, 8bit, or binary (per RFC2045).
1030         */
1031         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1032                 && ct->c_encoding != CE_BINARY) {
1033                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1034                 return NOTOK;
1035         }
1036
1037         /* match subtype */
1038         for (kv = SubMultiPart; kv->kv_key; kv++)
1039                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1040                         break;
1041         ct->c_subtype = kv->kv_value;
1042
1043         /*
1044         ** Check for "boundary" parameter, which is
1045         ** required for multipart messages.
1046         */
1047         bp = 0;
1048         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1049                 if (!mh_strcasecmp(*ap, "boundary")) {
1050                         bp = *ep;
1051                         break;
1052                 }
1053         }
1054
1055         /* complain if boundary parameter is missing */
1056         if (!*ap) {
1057                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1058                 return NOTOK;
1059         }
1060
1061         /* allocate primary structure for multipart info */
1062         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1063                 adios(NULL, "out of memory");
1064         ct->c_ctparams = (void *) m;
1065
1066         /* check if boundary parameter contains only whitespace characters */
1067         for (cp = bp; isspace(*cp); cp++)
1068                 continue;
1069         if (!*cp) {
1070                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1071                 return NOTOK;
1072         }
1073
1074         /* remove trailing whitespace from boundary parameter */
1075         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1076                 if (!isspace(*dp))
1077                         break;
1078         *++dp = '\0';
1079
1080         /* record boundary separators */
1081         m->mp_start = concat(bp, "\n", NULL);
1082         m->mp_stop = concat(bp, "--\n", NULL);
1083
1084         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1085                 advise(ct->c_file, "unable to open for reading");
1086                 return NOTOK;
1087         }
1088
1089         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1090         last = ct->c_end;
1091         next = &m->mp_parts;
1092         part = NULL;
1093         inout = 1;
1094
1095         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1096                 if (pos > last)
1097                         break;
1098
1099                 pos += strlen(buffer);
1100                 if (buffer[0] != '-' || buffer[1] != '-')
1101                         continue;
1102                 if (inout) {
1103                         if (strcmp(buffer + 2, m->mp_start)!=0)
1104                                 continue;
1105 next_part:
1106                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1107                                         == NULL)
1108                                 adios(NULL, "out of memory");
1109                         *next = part;
1110                         next = &part->mp_next;
1111
1112                         if (!(p = get_content(fp, ct->c_file,
1113                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1114                                 ct->c_fp = NULL;
1115                                 return NOTOK;
1116                         }
1117                         p->c_fp = NULL;
1118                         part->mp_part = p;
1119                         pos = p->c_begin;
1120                         fseek(fp, pos, SEEK_SET);
1121                         inout = 0;
1122                 } else {
1123                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1124                                 inout = 1;
1125 end_part:
1126                                 p = part->mp_part;
1127                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1128                                 if (p->c_end < p->c_begin)
1129                                         p->c_begin = p->c_end;
1130                                 if (inout)
1131                                         goto next_part;
1132                                 goto last_part;
1133                         } else {
1134                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1135                                         goto end_part;
1136                         }
1137                 }
1138         }
1139
1140         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1141         if (!inout && part) {
1142                 p = part->mp_part;
1143                 p->c_end = ct->c_end;
1144
1145                 if (p->c_begin >= p->c_end) {
1146                         for (next = &m->mp_parts; *next != part;
1147                                 next = &((*next)->mp_next))
1148                                 continue;
1149                         *next = NULL;
1150                         free_content(p);
1151                         free((char *) part);
1152                 }
1153         }
1154
1155 last_part:
1156         /* reverse the order of the parts for multipart/alternative */
1157         if (ct->c_subtype == MULTI_ALTERNATE)
1158                 reverse_parts(ct);
1159
1160         /*
1161         ** label all subparts with part number, and
1162         ** then initialize the content of the subpart.
1163         */
1164         {
1165                 int partnum;
1166                 char *pp;
1167                 char partnam[BUFSIZ];
1168
1169                 if (ct->c_partno) {
1170                         snprintf(partnam, sizeof(partnam), "%s.",
1171                                         ct->c_partno);
1172                         pp = partnam + strlen(partnam);
1173                 } else {
1174                         pp = partnam;
1175                 }
1176
1177                 for (part = m->mp_parts, partnum = 1; part;
1178                         part = part->mp_next, partnum++) {
1179                         p = part->mp_part;
1180
1181                         sprintf(pp, "%d", partnum);
1182                         p->c_partno = getcpy(partnam);
1183
1184                         /* initialize the content of the subparts */
1185                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1186                                 fclose(ct->c_fp);
1187                                 ct->c_fp = NULL;
1188                                 return NOTOK;
1189                         }
1190                 }
1191         }
1192
1193         fclose(ct->c_fp);
1194         ct->c_fp = NULL;
1195         return OK;
1196 }
1197
1198
1199 /*
1200 ** reverse the order of the parts of a multipart
1201 */
1202
1203 static void
1204 reverse_parts(CT ct)
1205 {
1206         int i;
1207         struct multipart *m;
1208         struct part **base, **bmp, **next, *part;
1209
1210         m = (struct multipart *) ct->c_ctparams;
1211
1212         /* if only one part, just return */
1213         if (!m->mp_parts || !m->mp_parts->mp_next)
1214                 return;
1215
1216         /* count number of parts */
1217         i = 0;
1218         for (part = m->mp_parts; part; part = part->mp_next)
1219                 i++;
1220
1221         /* allocate array of pointers to the parts */
1222         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1223                 adios(NULL, "out of memory");
1224         bmp = base;
1225
1226         /* point at all the parts */
1227         for (part = m->mp_parts; part; part = part->mp_next)
1228                 *bmp++ = part;
1229         *bmp = NULL;
1230
1231         /* reverse the order of the parts */
1232         next = &m->mp_parts;
1233         for (bmp--; bmp >= base; bmp--) {
1234                 part = *bmp;
1235                 *next = part;
1236                 next = &part->mp_next;
1237         }
1238         *next = NULL;
1239
1240         /* free array of pointers */
1241         free((char *) base);
1242 }
1243
1244
1245 /*
1246 ** MESSAGE
1247 */
1248
1249 static int
1250 InitMessage(CT ct)
1251 {
1252         struct k2v *kv;
1253         CI ci = &ct->c_ctinfo;
1254
1255         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1256                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1257                 return NOTOK;
1258         }
1259
1260         /* check for missing subtype */
1261         if (!*ci->ci_subtype)
1262                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1263
1264         /* match subtype */
1265         for (kv = SubMessage; kv->kv_key; kv++)
1266                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1267                         break;
1268         ct->c_subtype = kv->kv_value;
1269
1270         switch (ct->c_subtype) {
1271         case MESSAGE_RFC822:
1272                 break;
1273
1274         case MESSAGE_PARTIAL:
1275                 {
1276                 char **ap, **ep;
1277                 struct partial *p;
1278
1279                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1280                         adios(NULL, "out of memory");
1281                 ct->c_ctparams = (void *) p;
1282
1283                 /*
1284                 ** scan for parameters "id", "number",
1285                 ** and "total"
1286                 */
1287                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1288                         if (!mh_strcasecmp(*ap, "id")) {
1289                                 p->pm_partid = getcpy(*ep);
1290                                 continue;
1291                         }
1292                         if (!mh_strcasecmp(*ap, "number")) {
1293                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1294 invalid_param:
1295                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1296                                         return NOTOK;
1297                                 }
1298                                 continue;
1299                         }
1300                         if (!mh_strcasecmp(*ap, "total")) {
1301                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1302                                                 p->pm_maxno < 1)
1303                                         goto invalid_param;
1304                                 continue;
1305                         }
1306                 }
1307
1308                 if (!p->pm_partid || !p->pm_partno
1309                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1310                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1311                         return NOTOK;
1312                 }
1313                 }
1314                 break;
1315
1316         case MESSAGE_EXTERNAL:
1317                 {
1318                 CT p;
1319                 FILE *fp;
1320
1321                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1322                         advise(ct->c_file, "unable to open for reading");
1323                         return NOTOK;
1324                 }
1325
1326                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1327
1328                 if (!(p = get_content(fp, ct->c_file, 0))) {
1329                         ct->c_fp = NULL;
1330                         return NOTOK;
1331                 }
1332
1333                 p->c_fp = NULL;
1334                 p->c_end = p->c_begin;
1335
1336                 fclose(ct->c_fp);
1337                 ct->c_fp = NULL;
1338
1339                 switch (p->c_type) {
1340                 case CT_MULTIPART:
1341                         break;
1342
1343                 case CT_MESSAGE:
1344                         if (p->c_subtype != MESSAGE_RFC822)
1345                                 break;
1346                         /* else fall... */
1347                 default:
1348                         if (p->c_ctinitfnx)
1349                                 (*p->c_ctinitfnx) (p);
1350                         break;
1351                 }
1352                 }
1353                 break;
1354
1355         default:
1356                 break;
1357         }
1358
1359         return OK;
1360 }
1361
1362
1363 /*
1364 ** APPLICATION
1365 */
1366
1367 static int
1368 InitApplication(CT ct)
1369 {
1370         struct k2v *kv;
1371         CI ci = &ct->c_ctinfo;
1372
1373         /* match subtype */
1374         for (kv = SubApplication; kv->kv_key; kv++)
1375                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1376                         break;
1377         ct->c_subtype = kv->kv_value;
1378
1379         return OK;
1380 }
1381
1382
1383 /*
1384 ** TRANSFER ENCODINGS
1385 */
1386
1387 static int
1388 init_encoding(CT ct, OpenCEFunc openfnx)
1389 {
1390         CE ce;
1391
1392         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1393                 adios(NULL, "out of memory");
1394
1395         ct->c_cefile     = ce;
1396         ct->c_ceopenfnx  = openfnx;
1397         ct->c_ceclosefnx = close_encoding;
1398         ct->c_cesizefnx  = size_encoding;
1399
1400         return OK;
1401 }
1402
1403
1404 void
1405 close_encoding(CT ct)
1406 {
1407         CE ce;
1408
1409         if (!(ce = ct->c_cefile))
1410                 return;
1411
1412         if (ce->ce_fp) {
1413                 fclose(ce->ce_fp);
1414                 ce->ce_fp = NULL;
1415         }
1416 }
1417
1418
1419 static unsigned long
1420 size_encoding(CT ct)
1421 {
1422         int fd;
1423         unsigned long size;
1424         char *file;
1425         CE ce;
1426         struct stat st;
1427
1428         if (!(ce = ct->c_cefile))
1429                 return (ct->c_end - ct->c_begin);
1430
1431         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1432                 return (long) st.st_size;
1433
1434         if (ce->ce_file) {
1435                 if (stat(ce->ce_file, &st) != NOTOK)
1436                         return (long) st.st_size;
1437                 else
1438                         return 0L;
1439         }
1440
1441         if (ct->c_encoding == CE_EXTERNAL)
1442                 return (ct->c_end - ct->c_begin);
1443
1444         file = NULL;
1445         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1446                 return (ct->c_end - ct->c_begin);
1447
1448         if (fstat(fd, &st) != NOTOK)
1449                 size = (long) st.st_size;
1450         else
1451                 size = 0L;
1452
1453         (*ct->c_ceclosefnx) (ct);
1454         return size;
1455 }
1456
1457
1458 /*
1459 ** BASE64
1460 */
1461
1462 static unsigned char b642nib[0x80] = {
1463         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1469         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1470         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1472         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1473         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1474         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1475         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1476         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1477         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1478         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1479 };
1480
1481
1482 static int
1483 InitBase64(CT ct)
1484 {
1485         return init_encoding(ct, openBase64);
1486 }
1487
1488
1489 static int
1490 openBase64(CT ct, char **file)
1491 {
1492         int bitno, cc;
1493         int fd, len, skip, own_ct_fp = 0;
1494         unsigned long bits;
1495         unsigned char value, *b, *b1, *b2, *b3;
1496         unsigned char *cp, *ep;
1497         char buffer[BUFSIZ];
1498         /* sbeck -- handle suffixes */
1499         CI ci;
1500         CE ce;
1501
1502         b  = (unsigned char *) &bits;
1503         b1 = &b[endian > 0 ? 1 : 2];
1504         b2 = &b[endian > 0 ? 2 : 1];
1505         b3 = &b[endian > 0 ? 3 : 0];
1506
1507         ce = ct->c_cefile;
1508         if (ce->ce_fp) {
1509                 fseek(ce->ce_fp, 0L, SEEK_SET);
1510                 goto ready_to_go;
1511         }
1512
1513         if (ce->ce_file) {
1514                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1515                         content_error(ce->ce_file, ct,
1516                                         "unable to fopen for reading");
1517                         return NOTOK;
1518                 }
1519                 goto ready_to_go;
1520         }
1521
1522         if (*file == NULL) {
1523                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1524                 ce->ce_unlink = 1;
1525         } else {
1526                 ce->ce_file = getcpy(*file);
1527                 ce->ce_unlink = 0;
1528         }
1529
1530         /* sbeck@cise.ufl.edu -- handle suffixes */
1531         ci = &ct->c_ctinfo;
1532         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1533                         invo_name, ci->ci_type, ci->ci_subtype);
1534         cp = context_find(buffer);
1535         if (cp == NULL || *cp == '\0') {
1536                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1537                                 ci->ci_type);
1538                 cp = context_find(buffer);
1539         }
1540         if (cp != NULL && *cp != '\0') {
1541                 if (ce->ce_unlink) {
1542                         /*
1543                         ** Temporary file already exists, so we rename to
1544                         ** version with extension.
1545                         */
1546                         char *file_org = strdup(ce->ce_file);
1547                         ce->ce_file = add(cp, ce->ce_file);
1548                         if (rename(file_org, ce->ce_file)) {
1549                                 adios(ce->ce_file, "unable to rename %s to ",
1550                                                 file_org);
1551                         }
1552                         free(file_org);
1553
1554                 } else {
1555                         ce->ce_file = add(cp, ce->ce_file);
1556                 }
1557         }
1558
1559         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1560                 content_error(ce->ce_file, ct,
1561                                 "unable to fopen for reading/writing");
1562                 return NOTOK;
1563         }
1564
1565         if ((len = ct->c_end - ct->c_begin) < 0)
1566                 adios(NULL, "internal error(1)");
1567
1568         if (!ct->c_fp) {
1569                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1570                         content_error(ct->c_file, ct,
1571                                         "unable to open for reading");
1572                         return NOTOK;
1573                 }
1574                 own_ct_fp = 1;
1575         }
1576
1577         bitno = 18;
1578         bits = 0L;
1579         skip = 0;
1580
1581         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1582         while (len > 0) {
1583                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1584                 case NOTOK:
1585                         content_error(ct->c_file, ct, "error reading from");
1586                         goto clean_up;
1587
1588                 case OK:
1589                         content_error(NULL, ct, "premature eof");
1590                         goto clean_up;
1591
1592                 default:
1593                         if (cc > len)
1594                                 cc = len;
1595                         len -= cc;
1596
1597                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1598                                 switch (*cp) {
1599                                 default:
1600                                         if (isspace(*cp))
1601                                                 break;
1602                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1603                                                 if (debugsw) {
1604                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1605                                                 }
1606                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1607                                                 continue;
1608                                         }
1609
1610                                         bits |= value << bitno;
1611 test_end:
1612                                         if ((bitno -= 6) < 0) {
1613                                                 putc((char) *b1, ce->ce_fp);
1614                                                 if (skip < 2) {
1615                                                         putc((char) *b2, ce->ce_fp);
1616                                                         if (skip < 1) {
1617                                                                 putc((char) *b3, ce->ce_fp);
1618                                                         }
1619                                                 }
1620
1621                                                 if (ferror(ce->ce_fp)) {
1622                                                         content_error(ce->ce_file, ct,
1623                                                                                    "error writing to");
1624                                                         goto clean_up;
1625                                                 }
1626                                                 bitno = 18, bits = 0L, skip = 0;
1627                                         }
1628                                         break;
1629
1630                                 case '=':
1631                                         if (++skip > 3)
1632                                                 goto self_delimiting;
1633                                         goto test_end;
1634                                 }
1635                         }
1636                 }
1637         }
1638
1639         if (bitno != 18) {
1640                 if (debugsw)
1641                         fprintf(stderr, "premature ending (bitno %d)\n",
1642                                         bitno);
1643
1644                 content_error(NULL, ct, "invalid BASE64 encoding");
1645                 goto clean_up;
1646         }
1647
1648 self_delimiting:
1649         fseek(ct->c_fp, 0L, SEEK_SET);
1650
1651         if (fflush(ce->ce_fp)) {
1652                 content_error(ce->ce_file, ct, "error writing to");
1653                 goto clean_up;
1654         }
1655
1656         fseek(ce->ce_fp, 0L, SEEK_SET);
1657
1658 ready_to_go:
1659         *file = ce->ce_file;
1660         if (own_ct_fp) {
1661                 fclose(ct->c_fp);
1662                 ct->c_fp = NULL;
1663         }
1664         return fileno(ce->ce_fp);
1665
1666 clean_up:
1667         free_encoding(ct, 0);
1668         if (own_ct_fp) {
1669                 fclose(ct->c_fp);
1670                 ct->c_fp = NULL;
1671         }
1672         return NOTOK;
1673 }
1674
1675
1676 /*
1677 ** QUOTED PRINTABLE
1678 */
1679
1680 static char hex2nib[0x80] = {
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1688         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1697 };
1698
1699
1700 static int
1701 InitQuoted(CT ct)
1702 {
1703         return init_encoding(ct, openQuoted);
1704 }
1705
1706
1707 static int
1708 openQuoted(CT ct, char **file)
1709 {
1710         int cc, len, quoted, own_ct_fp = 0;
1711         unsigned char *cp, *ep;
1712         char buffer[BUFSIZ];
1713         unsigned char mask = 0;
1714         CE ce;
1715         /* sbeck -- handle suffixes */
1716         CI ci;
1717
1718         ce = ct->c_cefile;
1719         if (ce->ce_fp) {
1720                 fseek(ce->ce_fp, 0L, SEEK_SET);
1721                 goto ready_to_go;
1722         }
1723
1724         if (ce->ce_file) {
1725                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1726                         content_error(ce->ce_file, ct,
1727                                         "unable to fopen for reading");
1728                         return NOTOK;
1729                 }
1730                 goto ready_to_go;
1731         }
1732
1733         if (*file == NULL) {
1734                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1735                 ce->ce_unlink = 1;
1736         } else {
1737                 ce->ce_file = getcpy(*file);
1738                 ce->ce_unlink = 0;
1739         }
1740
1741         /* sbeck@cise.ufl.edu -- handle suffixes */
1742         ci = &ct->c_ctinfo;
1743         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1744                         invo_name, ci->ci_type, ci->ci_subtype);
1745         cp = context_find(buffer);
1746         if (cp == NULL || *cp == '\0') {
1747                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1748                                 ci->ci_type);
1749                 cp = context_find(buffer);
1750         }
1751         if (cp != NULL && *cp != '\0') {
1752                 if (ce->ce_unlink) {
1753                         /*
1754                         ** Temporary file already exists, so we rename to
1755                         ** version with extension.
1756                         */
1757                         char *file_org = strdup(ce->ce_file);
1758                         ce->ce_file = add(cp, ce->ce_file);
1759                         if (rename(file_org, ce->ce_file)) {
1760                                 adios(ce->ce_file, "unable to rename %s to ",
1761                                                 file_org);
1762                         }
1763                         free(file_org);
1764
1765                 } else {
1766                         ce->ce_file = add(cp, ce->ce_file);
1767                 }
1768         }
1769
1770         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1771                 content_error(ce->ce_file, ct,
1772                                 "unable to fopen for reading/writing");
1773                 return NOTOK;
1774         }
1775
1776         if ((len = ct->c_end - ct->c_begin) < 0)
1777                 adios(NULL, "internal error(2)");
1778
1779         if (!ct->c_fp) {
1780                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1781                         content_error(ct->c_file, ct,
1782                                         "unable to open for reading");
1783                         return NOTOK;
1784                 }
1785                 own_ct_fp = 1;
1786         }
1787
1788         quoted = 0;
1789
1790         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1791         while (len > 0) {
1792                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1793                         content_error(NULL, ct, "premature eof");
1794                         goto clean_up;
1795                 }
1796
1797                 if ((cc = strlen(buffer)) > len)
1798                         cc = len;
1799                 len -= cc;
1800
1801                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1802                         if (!isspace(*ep))
1803                                 break;
1804                 *++ep = '\n', ep++;
1805
1806                 for (; cp < ep; cp++) {
1807                         if (quoted > 0) {
1808                                 /* in an escape sequence */
1809                                 if (quoted == 1) {
1810                                         /* at byte 1 of an escape sequence */
1811                                         mask = hex2nib[*cp & 0x7f];
1812                                         /* next is byte 2 */
1813                                         quoted = 2;
1814                                 } else {
1815                                         /* at byte 2 of an escape sequence */
1816                                         mask <<= 4;
1817                                         mask |= hex2nib[*cp & 0x7f];
1818                                         putc(mask, ce->ce_fp);
1819                                         if (ferror(ce->ce_fp)) {
1820                                                 content_error(ce->ce_file, ct, "error writing to");
1821                                                 goto clean_up;
1822                                         }
1823                                         /*
1824                                         ** finished escape sequence; next may
1825                                         ** be literal or a new escape sequence
1826                                         */
1827                                         quoted = 0;
1828                                 }
1829                                 /* on to next byte */
1830                                 continue;
1831                         }
1832
1833                         /* not in an escape sequence */
1834                         if (*cp == '=') {
1835                                 /*
1836                                 ** starting an escape sequence,
1837                                 ** or invalid '='?
1838                                 */
1839                                 if (cp + 1 < ep && cp[1] == '\n') {
1840                                         /* "=\n" soft line break, eat the \n */
1841                                         cp++;
1842                                         continue;
1843                                 }
1844                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1845                                         /*
1846                                         ** We don't have 2 bytes left,
1847                                         ** so this is an invalid escape
1848                                         ** sequence; just show the raw bytes
1849                                         ** (below).
1850                                         */
1851                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1852                                         /*
1853                                         ** Next 2 bytes are hex digits,
1854                                         ** making this a valid escape
1855                                         ** sequence; let's decode it (above).
1856                                         */
1857                                         quoted = 1;
1858                                         continue;
1859                                 } else {
1860                                         /*
1861                                         ** One or both of the next 2 is
1862                                         ** out of range, making this an
1863                                         ** invalid escape sequence; just
1864                                         ** show the raw bytes (below).
1865                                         */
1866                                 }
1867                         }
1868
1869                         /* Just show the raw byte. */
1870                         putc(*cp, ce->ce_fp);
1871                         if (ferror(ce->ce_fp)) {
1872                                 content_error(ce->ce_file, ct,
1873                                                 "error writing to");
1874                                 goto clean_up;
1875                         }
1876                 }
1877         }
1878         if (quoted) {
1879                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1880                 goto clean_up;
1881         }
1882
1883         fseek(ct->c_fp, 0L, SEEK_SET);
1884
1885         if (fflush(ce->ce_fp)) {
1886                 content_error(ce->ce_file, ct, "error writing to");
1887                 goto clean_up;
1888         }
1889
1890         fseek(ce->ce_fp, 0L, SEEK_SET);
1891
1892 ready_to_go:
1893         *file = ce->ce_file;
1894         if (own_ct_fp) {
1895                 fclose(ct->c_fp);
1896                 ct->c_fp = NULL;
1897         }
1898         return fileno(ce->ce_fp);
1899
1900 clean_up:
1901         free_encoding(ct, 0);
1902         if (own_ct_fp) {
1903                 fclose(ct->c_fp);
1904                 ct->c_fp = NULL;
1905         }
1906         return NOTOK;
1907 }
1908
1909
1910 /*
1911 ** 7BIT
1912 */
1913
1914 static int
1915 Init7Bit(CT ct)
1916 {
1917         if (init_encoding(ct, open7Bit) == NOTOK)
1918                 return NOTOK;
1919
1920         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1921         return OK;
1922 }
1923
1924
1925 int
1926 open7Bit(CT ct, char **file)
1927 {
1928         int cc, fd, len, own_ct_fp = 0;
1929         char buffer[BUFSIZ];
1930         /* sbeck -- handle suffixes */
1931         char *cp;
1932         CI ci;
1933         CE ce;
1934
1935         ce = ct->c_cefile;
1936         if (ce->ce_fp) {
1937                 fseek(ce->ce_fp, 0L, SEEK_SET);
1938                 goto ready_to_go;
1939         }
1940
1941         if (ce->ce_file) {
1942                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1943                         content_error(ce->ce_file, ct,
1944                                         "unable to fopen for reading");
1945                         return NOTOK;
1946                 }
1947                 goto ready_to_go;
1948         }
1949
1950         if (*file == NULL) {
1951                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1952                 ce->ce_unlink = 1;
1953         } else {
1954                 ce->ce_file = getcpy(*file);
1955                 ce->ce_unlink = 0;
1956         }
1957
1958         /* sbeck@cise.ufl.edu -- handle suffixes */
1959         ci = &ct->c_ctinfo;
1960         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1961                         invo_name, ci->ci_type, ci->ci_subtype);
1962         cp = context_find(buffer);
1963         if (cp == NULL || *cp == '\0') {
1964                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1965                                 ci->ci_type);
1966                 cp = context_find(buffer);
1967         }
1968         if (cp != NULL && *cp != '\0') {
1969                 if (ce->ce_unlink) {
1970                         /*
1971                         ** Temporary file already exists, so we rename to
1972                         ** version with extension.
1973                         */
1974                         char *file_org = strdup(ce->ce_file);
1975                         ce->ce_file = add(cp, ce->ce_file);
1976                         if (rename(file_org, ce->ce_file)) {
1977                                 adios(ce->ce_file, "unable to rename %s to ",
1978                                                 file_org);
1979                         }
1980                         free(file_org);
1981
1982                 } else {
1983                         ce->ce_file = add(cp, ce->ce_file);
1984                 }
1985         }
1986
1987         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1988                 content_error(ce->ce_file, ct,
1989                                 "unable to fopen for reading/writing");
1990                 return NOTOK;
1991         }
1992
1993         if (ct->c_type == CT_MULTIPART) {
1994                 char **ap, **ep;
1995                 CI ci = &ct->c_ctinfo;
1996
1997                 len = 0;
1998                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1999                                 ci->ci_subtype);
2000                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2001                                 strlen(ci->ci_subtype);
2002                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2003                         putc(';', ce->ce_fp);
2004                         len++;
2005
2006                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2007                                         *ap, *ep);
2008
2009                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2010                                 fputs("\n\t", ce->ce_fp);
2011                                 len = 8;
2012                         } else {
2013                                 putc(' ', ce->ce_fp);
2014                                 len++;
2015                         }
2016                         fprintf(ce->ce_fp, "%s", buffer);
2017                         len += cc;
2018                 }
2019
2020                 if (ci->ci_comment) {
2021                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2022                                                 >= CPERLIN) {
2023                                 fputs("\n\t", ce->ce_fp);
2024                                 len = 8;
2025                         } else {
2026                                 putc(' ', ce->ce_fp);
2027                                 len++;
2028                         }
2029                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2030                         len += cc;
2031                 }
2032                 fprintf(ce->ce_fp, "\n");
2033                 if (ct->c_id)
2034                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2035                 if (ct->c_descr)
2036                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2037                 if (ct->c_dispo)
2038                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2039                 fprintf(ce->ce_fp, "\n");
2040         }
2041
2042         if ((len = ct->c_end - ct->c_begin) < 0)
2043                 adios(NULL, "internal error(3)");
2044
2045         if (!ct->c_fp) {
2046                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2047                         content_error(ct->c_file, ct,
2048                                         "unable to open for reading");
2049                         return NOTOK;
2050                 }
2051                 own_ct_fp = 1;
2052         }
2053
2054         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2055         while (len > 0)
2056                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2057                 case NOTOK:
2058                         content_error(ct->c_file, ct, "error reading from");
2059                         goto clean_up;
2060
2061                 case OK:
2062                         content_error(NULL, ct, "premature eof");
2063                         goto clean_up;
2064
2065                 default:
2066                         if (cc > len)
2067                                 cc = len;
2068                         len -= cc;
2069
2070                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2071                         if (ferror(ce->ce_fp)) {
2072                                 content_error(ce->ce_file, ct,
2073                                                 "error writing to");
2074                                 goto clean_up;
2075                         }
2076                 }
2077
2078         fseek(ct->c_fp, 0L, SEEK_SET);
2079
2080         if (fflush(ce->ce_fp)) {
2081                 content_error(ce->ce_file, ct, "error writing to");
2082                 goto clean_up;
2083         }
2084
2085         fseek(ce->ce_fp, 0L, SEEK_SET);
2086
2087 ready_to_go:
2088         *file = ce->ce_file;
2089         if (own_ct_fp) {
2090                 fclose(ct->c_fp);
2091                 ct->c_fp = NULL;
2092         }
2093         return fileno(ce->ce_fp);
2094
2095 clean_up:
2096         free_encoding(ct, 0);
2097         if (own_ct_fp) {
2098                 fclose(ct->c_fp);
2099                 ct->c_fp = NULL;
2100         }
2101         return NOTOK;
2102 }