Removed unnecessary #include of h/signals.h.
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <errno.h>
12 #include <h/tws.h>
13 #include <h/mime.h>
14 #include <h/mhparse.h>
15 #include <h/utils.h>
16
17 extern int debugsw;
18
19 extern int endian;  /* mhmisc.c */
20
21 extern pid_t xpid;  /* mhshowsbr.c  */
22
23 /*
24 ** Directory to place temp files.  This must
25 ** be set before these routines are called.
26 */
27 char *tmp;
28
29 /*
30 ** Structures for TEXT messages
31 */
32 struct k2v SubText[] = {
33         { "plain", TEXT_PLAIN },
34         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
35         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
36         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
37 };
38
39 struct k2v Charset[] = {
40         { "us-ascii",   CHARSET_USASCII },
41         { "iso-8859-1", CHARSET_LATIN },
42         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
43 };
44
45 /*
46 ** Structures for MULTIPART messages
47 */
48 struct k2v SubMultiPart[] = {
49         { "mixed",       MULTI_MIXED },
50         { "alternative", MULTI_ALTERNATE },
51         { "digest",      MULTI_DIGEST },
52         { "parallel",    MULTI_PARALLEL },
53         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
54 };
55
56 /*
57 ** Structures for MESSAGE messages
58 */
59 struct k2v SubMessage[] = {
60         { "rfc822",        MESSAGE_RFC822 },
61         { "partial",       MESSAGE_PARTIAL },
62         { "external-body", MESSAGE_EXTERNAL },
63         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
64 };
65
66 /*
67 ** Structure for APPLICATION messages
68 */
69 struct k2v SubApplication[] = {
70         { "octet-stream", APPLICATION_OCTETS },
71         { "postscript",   APPLICATION_POSTSCRIPT },
72         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
73 };
74
75
76 /* mhmisc.c */
77 int part_ok(CT, int);
78 int type_ok(CT, int);
79 int make_intermediates(char *);
80 void content_error(char *, CT, char *, ...);
81
82 /* mhfree.c */
83 void free_content(CT);
84 void free_encoding(CT, int);
85
86 /*
87 ** static prototypes
88 */
89 static CT get_content(FILE *, char *, int);
90 static int get_comment(CT, unsigned char **, int);
91
92 static int InitGeneric(CT);
93 static int InitText(CT);
94 static int InitMultiPart(CT);
95 static void reverse_parts(CT);
96 static int InitMessage(CT);
97 static int InitApplication(CT);
98 static int init_encoding(CT, OpenCEFunc);
99 static unsigned long size_encoding(CT);
100 static int InitBase64(CT);
101 static int openBase64(CT, char **);
102 static int InitQuoted(CT);
103 static int openQuoted(CT, char **);
104 static int Init7Bit(CT);
105
106 struct str2init str2cts[] = {
107         { "application", CT_APPLICATION, InitApplication },
108         { "audio",       CT_AUDIO,       InitGeneric },
109         { "image",       CT_IMAGE,       InitGeneric },
110         { "message",     CT_MESSAGE,     InitMessage },
111         { "multipart",   CT_MULTIPART,   InitMultiPart },
112         { "text",        CT_TEXT,        InitText },
113         { "video",       CT_VIDEO,       InitGeneric },
114         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
115         { NULL,          CT_UNKNOWN,     NULL },
116 };
117
118 struct str2init str2ces[] = {
119         { "base64",           CE_BASE64,    InitBase64 },
120         { "quoted-printable", CE_QUOTED,    InitQuoted },
121         { "8bit",             CE_8BIT,      Init7Bit },
122         { "7bit",             CE_7BIT,      Init7Bit },
123         { "binary",           CE_BINARY,    Init7Bit },
124         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
125         { NULL,               CE_UNKNOWN,    NULL },
126 };
127
128
129 int
130 pidcheck(int status)
131 {
132         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
133                 return status;
134
135         fflush(stdout);
136         fflush(stderr);
137         exit(1);
138         return 1;
139 }
140
141
142 /*
143 ** Main entry point for parsing a MIME message or file.
144 ** It returns the Content structure for the top level
145 ** entity in the file.
146 */
147 CT
148 parse_mime(char *file)
149 {
150         int is_stdin;
151         char buffer[BUFSIZ];
152         FILE *fp;
153         CT ct;
154
155         /*
156         ** Check if file is actually standard input
157         */
158         if ((is_stdin = (strcmp(file, "-")==0))) {
159                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
160                 if (tfile == NULL) {
161                         advise("mhparse", "unable to create temporary file");
162                         return NULL;
163                 }
164                 file = getcpy(tfile);
165                 chmod(file, 0600);
166
167                 while (fgets(buffer, sizeof(buffer), stdin))
168                         fputs(buffer, fp);
169                 fflush(fp);
170
171                 if (ferror(stdin)) {
172                         unlink(file);
173                         advise("stdin", "error reading");
174                         return NULL;
175                 }
176                 if (ferror(fp)) {
177                         unlink(file);
178                         advise(file, "error writing");
179                         return NULL;
180                 }
181                 fseek(fp, 0L, SEEK_SET);
182         } else if ((fp = fopen(file, "r")) == NULL) {
183                 advise(file, "unable to read");
184                 return NULL;
185         }
186
187         if (!(ct = get_content(fp, file, 1))) {
188                 if (is_stdin)
189                         unlink(file);
190                 advise(NULL, "unable to decode %s", file);
191                 return NULL;
192         }
193
194         if (is_stdin)
195                 ct->c_unlink = 1;  /* temp file to remove */
196
197         ct->c_fp = NULL;
198
199         if (ct->c_end == 0L) {
200                 fseek(fp, 0L, SEEK_END);
201                 ct->c_end = ftell(fp);
202         }
203
204         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
205                 fclose(fp);
206                 free_content(ct);
207                 return NULL;
208         }
209
210         fclose(fp);
211         return ct;
212 }
213
214
215 /*
216 ** Main routine for reading/parsing the headers
217 ** of a message content.
218 **
219 ** toplevel =  1   # we are at the top level of the message
220 ** toplevel =  0   # we are inside message type or multipart type
221 **                 # other than multipart/digest
222 ** toplevel = -1   # we are inside multipart/digest
223 ** NB: on failure we will fclose(in)!
224 */
225
226 static CT
227 get_content(FILE *in, char *file, int toplevel)
228 {
229         int compnum, state;
230         char buf[BUFSIZ], name[NAMESZ];
231         char *np, *vp;
232         CT ct;
233         HF hp;
234
235         /* allocate the content structure */
236         if (!(ct = (CT) calloc(1, sizeof(*ct))))
237                 adios(NULL, "out of memory");
238
239         ct->c_fp = in;
240         ct->c_file = getcpy(file);
241         ct->c_begin = ftell(ct->c_fp) + 1;
242
243         /*
244         ** Parse the header fields for this
245         ** content into a linked list.
246         */
247         for (compnum = 1, state = FLD;;) {
248                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
249                 case FLD:
250                 case FLDPLUS:
251                 case FLDEOF:
252                         compnum++;
253
254                         /* get copies of the buffers */
255                         np = getcpy(name);
256                         vp = getcpy(buf);
257
258                         /* if necessary, get rest of field */
259                         while (state == FLDPLUS) {
260                                 state = m_getfld(state, name, buf,
261                                                 sizeof(buf), in);
262                                 vp = add(buf, vp);  /* add to previous value */
263                         }
264
265                         /* Now add the header data to the list */
266                         add_header(ct, np, vp);
267
268                         /* continue, if this isn't the last header field */
269                         if (state != FLDEOF) {
270                                 ct->c_begin = ftell(in) + 1;
271                                 continue;
272                         }
273                         /* else fall... */
274
275                 case BODY:
276                 case BODYEOF:
277                         ct->c_begin = ftell(in) - strlen(buf);
278                         break;
279
280                 case FILEEOF:
281                         ct->c_begin = ftell(in);
282                         break;
283
284                 case LENERR:
285                 case FMTERR:
286                         adios(NULL, "message format error in component #%d",
287                                         compnum);
288
289                 default:
290                         adios(NULL, "getfld() returned %d", state);
291                 }
292
293                 /* break out of the loop */
294                 break;
295         }
296
297         /*
298         ** Read the content headers.  We will parse the
299         ** MIME related header fields into their various
300         ** structures and set internal flags related to
301         ** content type/subtype, etc.
302         */
303
304         hp = ct->c_first_hf;  /* start at first header field */
305         while (hp) {
306                 /* Get MIME-Version field */
307                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
308                         int ucmp;
309                         char c;
310                         unsigned char *cp, *dp;
311
312                         if (ct->c_vrsn) {
313                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
314                                 goto next_header;
315                         }
316                         ct->c_vrsn = getcpy(hp->value);
317
318                         /* Now, cleanup this field */
319                         cp = ct->c_vrsn;
320
321                         while (isspace(*cp))
322                                 cp++;
323                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
324                                 *dp++ = ' ';
325                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
326                                 if (!isspace(*dp))
327                                         break;
328                         *++dp = '\0';
329                         if (debugsw)
330                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
331
332                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
333                                 goto out;
334
335                         for (dp = cp; istoken(*dp); dp++)
336                                 continue;
337                         c = *dp;
338                         *dp = '\0';
339                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
340                         *dp = c;
341                         if (!ucmp) {
342                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
343                         }
344
345                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
346                         /* Get Content-Type field */
347                         struct str2init *s2i;
348                         CI ci = &ct->c_ctinfo;
349
350                         /* Check if we've already seen a Content-Type header */
351                         if (ct->c_ctline) {
352                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
353                                 goto next_header;
354                         }
355
356                         /* Parse the Content-Type field */
357                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
358                                 goto out;
359
360                         /*
361                         ** Set the Init function and the internal
362                         ** flag for this content type.
363                         */
364                         for (s2i = str2cts; s2i->si_key; s2i++)
365                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
366                                         break;
367                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
368                                 s2i++;
369                         ct->c_type = s2i->si_val;
370                         ct->c_ctinitfnx = s2i->si_init;
371
372                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
373                         /* Get Content-Transfer-Encoding field */
374                         char c;
375                         unsigned char *cp, *dp;
376                         struct str2init *s2i;
377
378                         /*
379                         ** Check if we've already seen the
380                         ** Content-Transfer-Encoding field
381                         */
382                         if (ct->c_celine) {
383                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
384                                 goto next_header;
385                         }
386
387                         /* get copy of this field */
388                         ct->c_celine = cp = getcpy(hp->value);
389
390                         while (isspace(*cp))
391                                 cp++;
392                         for (dp = cp; istoken(*dp); dp++)
393                                 continue;
394                         c = *dp;
395                         *dp = '\0';
396
397                         /*
398                         ** Find the internal flag and Init function
399                         ** for this transfer encoding.
400                         */
401                         for (s2i = str2ces; s2i->si_key; s2i++)
402                                 if (!mh_strcasecmp(cp, s2i->si_key))
403                                         break;
404                         if (!s2i->si_key && !uprf(cp, "X-"))
405                                 s2i++;
406                         *dp = c;
407                         ct->c_encoding = s2i->si_val;
408
409                         /* Call the Init function for this encoding */
410                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
411                                 goto out;
412
413                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
414                         /* Get Content-ID field */
415                         ct->c_id = add(hp->value, ct->c_id);
416
417                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
418                         /* Get Content-Description field */
419                         ct->c_descr = add(hp->value, ct->c_descr);
420
421                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
422                         /* Get Content-Disposition field */
423                         ct->c_dispo = add(hp->value, ct->c_dispo);
424                 }
425
426 next_header:
427                 hp = hp->next;  /* next header field */
428         }
429
430         /*
431         ** Check if we saw a Content-Type field.
432         ** If not, then assign a default value for
433         ** it, and the Init function.
434         */
435         if (!ct->c_ctline) {
436                 /*
437                 ** If we are inside a multipart/digest message,
438                 ** so default type is message/rfc822
439                 */
440                 if (toplevel < 0) {
441                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
442                                 goto out;
443                         ct->c_type = CT_MESSAGE;
444                         ct->c_ctinitfnx = InitMessage;
445                 } else {
446                         /*
447                         ** Else default type is text/plain
448                         */
449                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
450                                 goto out;
451                         ct->c_type = CT_TEXT;
452                         ct->c_ctinitfnx = InitText;
453                 }
454         }
455
456         /* Use default Transfer-Encoding, if necessary */
457         if (!ct->c_celine) {
458                 ct->c_encoding = CE_7BIT;
459                 Init7Bit(ct);
460         }
461
462         return ct;
463
464 out:
465         free_content(ct);
466         return NULL;
467 }
468
469
470 /*
471 ** small routine to add header field to list
472 */
473
474 int
475 add_header(CT ct, char *name, char *value)
476 {
477         HF hp;
478
479         /* allocate header field structure */
480         hp = mh_xmalloc(sizeof(*hp));
481
482         /* link data into header structure */
483         hp->name = name;
484         hp->value = value;
485         hp->next = NULL;
486
487         /* link header structure into the list */
488         if (ct->c_first_hf == NULL) {
489                 ct->c_first_hf = hp;  /* this is the first */
490                 ct->c_last_hf = hp;
491         } else {
492                 ct->c_last_hf->next = hp;  /* add it to the end */
493                 ct->c_last_hf = hp;
494         }
495
496         return 0;
497 }
498
499
500 /*
501 ** Make sure that buf contains at least one appearance of name,
502 ** followed by =.  If not, insert both name and value, just after
503 ** first semicolon, if any.  Note that name should not contain a
504 ** trailing =.  And quotes will be added around the value.  Typical
505 ** usage:  make sure that a Content-Disposition header contains
506 ** filename="foo".  If it doesn't and value does, use value from
507 ** that.
508 */
509 static char *
510 incl_name_value(unsigned char *buf, char *name, char *value) {
511         char *newbuf = buf;
512
513         /* Assume that name is non-null. */
514         if (buf && value) {
515                 char *name_plus_equal = concat(name, "=", NULL);
516
517                 if (!strstr(buf, name_plus_equal)) {
518                         char *insertion;
519                         unsigned char *cp;
520                         char *prefix, *suffix;
521
522                         /* Trim trailing space, esp. newline. */
523                         for (cp = &buf[strlen(buf) - 1];
524                                          cp >= buf && isspace(*cp); --cp) {
525                                 *cp = '\0';
526                         }
527
528                         insertion = concat("; ", name, "=", "\"", value, "\"",
529                                         NULL);
530
531                         /*
532                         ** Insert at first semicolon, if any.
533                         ** If none, append to end.
534                         */
535                         prefix = getcpy(buf);
536                         if ((cp = strchr(prefix, ';'))) {
537                                 suffix = concat(cp, NULL);
538                                 *cp = '\0';
539                                 newbuf = concat(prefix, insertion, suffix,
540                                                 "\n", NULL);
541                                 free(suffix);
542                         } else {
543                                 /* Append to end. */
544                                 newbuf = concat(buf, insertion, "\n", NULL);
545                         }
546
547                         free(prefix);
548                         free(insertion);
549                         free(buf);
550                 }
551
552                 free(name_plus_equal);
553         }
554
555         return newbuf;
556 }
557
558 /*
559 ** Extract just name_suffix="foo", if any, from value.  If there isn't
560 ** one, return the entire value.  Note that, for example, a name_suffix
561 ** of name will match filename="foo", and return foo.
562 */
563 static char *
564 extract_name_value(char *name_suffix, char *value) {
565         char *extracted_name_value = value;
566         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
567         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
568         char *cp;
569
570         free(name_suffix_plus_quote);
571         if (name_suffix_equals) {
572                 char *name_suffix_begin;
573
574                 /* Find first \". */
575                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
576                         ;
577                 name_suffix_begin = ++cp;
578                 /* Find second \". */
579                 for (; *cp != '"'; ++cp)
580                         ;
581
582                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
583                 memcpy(extracted_name_value, name_suffix_begin,
584                                 cp - name_suffix_begin);
585                 extracted_name_value[cp - name_suffix_begin] = '\0';
586         }
587
588         return extracted_name_value;
589 }
590
591 /*
592 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
593 ** directives.  Fills in the information of the CTinfo structure.
594 */
595 int
596 get_ctinfo(unsigned char *cp, CT ct, int magic)
597 {
598         int i;
599         unsigned char *dp;
600         char **ap, **ep;
601         char c;
602         CI ci;
603
604         ci = &ct->c_ctinfo;
605         i = strlen(invo_name) + 2;
606
607         /* store copy of Content-Type line */
608         cp = ct->c_ctline = getcpy(cp);
609
610         while (isspace(*cp))  /* trim leading spaces */
611                 cp++;
612
613         /* change newlines to spaces */
614         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
615                 *dp++ = ' ';
616
617         /* trim trailing spaces */
618         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
619                 if (!isspace(*dp))
620                         break;
621         *++dp = '\0';
622
623         if (debugsw)
624                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
625
626         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
627                 return NOTOK;
628
629         for (dp = cp; istoken(*dp); dp++)
630                 continue;
631         c = *dp, *dp = '\0';
632         ci->ci_type = getcpy(cp);  /* store content type */
633         *dp = c, cp = dp;
634
635         if (!*ci->ci_type) {
636                 advise(NULL, "invalid %s: field in message %s (empty type)",
637                                 TYPE_FIELD, ct->c_file);
638                 return NOTOK;
639         }
640
641         /* down case the content type string */
642         for (dp = ci->ci_type; *dp; dp++)
643                 if (isalpha(*dp) && isupper(*dp))
644                         *dp = tolower(*dp);
645
646         while (isspace(*cp))
647                 cp++;
648
649         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
650                 return NOTOK;
651
652         if (*cp != '/') {
653                 if (!magic)
654                         ci->ci_subtype = getcpy("");
655                 goto magic_skip;
656         }
657
658         cp++;
659         while (isspace(*cp))
660                 cp++;
661
662         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
663                 return NOTOK;
664
665         for (dp = cp; istoken(*dp); dp++)
666                 continue;
667         c = *dp, *dp = '\0';
668         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
669         *dp = c, cp = dp;
670
671         if (!*ci->ci_subtype) {
672                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
673                 return NOTOK;
674         }
675
676         /* down case the content subtype string */
677         for (dp = ci->ci_subtype; *dp; dp++)
678                 if (isalpha(*dp) && isupper(*dp))
679                         *dp = tolower(*dp);
680
681 magic_skip:
682         while (isspace(*cp))
683                 cp++;
684
685         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
686                 return NOTOK;
687
688         /*
689         ** Parse attribute/value pairs given with Content-Type
690         */
691         ep = (ap = ci->ci_attrs) + NPARMS;
692         while (*cp == ';') {
693                 char *vp;
694                 unsigned char *up;
695
696                 if (ap >= ep) {
697                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
698                         return NOTOK;
699                 }
700
701                 cp++;
702                 while (isspace(*cp))
703                         cp++;
704
705                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
706                         return NOTOK;
707
708                 if (*cp == 0) {
709                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
710                         return OK;
711                 }
712
713                 /* down case the attribute name */
714                 for (dp = cp; istoken(*dp); dp++)
715                         if (isalpha(*dp) && isupper(*dp))
716                                 *dp = tolower(*dp);
717
718                 for (up = dp; isspace(*dp);)
719                         dp++;
720                 if (dp == cp || *dp != '=') {
721                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
722                         return NOTOK;
723                 }
724
725                 vp = (*ap = getcpy(cp)) + (up - cp);
726                 *vp = '\0';
727                 for (dp++; isspace(*dp);)
728                         dp++;
729
730                 /* now add the attribute value */
731                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
732
733                 if (*dp == '"') {
734                         for (cp = ++dp, dp = vp;;) {
735                                 switch (c = *cp++) {
736                                 case '\0':
737 bad_quote:
738                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
739                                         return NOTOK;
740
741                                 case '\\':
742                                         *dp++ = c;
743                                         if ((c = *cp++) == '\0')
744                                                 goto bad_quote;
745                                         /* else fall... */
746
747                                 default:
748                                         *dp++ = c;
749                                         continue;
750
751                                 case '"':
752                                         *dp = '\0';
753                                         break;
754                                 }
755                                 break;
756                         }
757                 } else {
758                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
759                                 continue;
760                         *dp = '\0';
761                 }
762                 if (!*vp) {
763                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
764                         return NOTOK;
765                 }
766                 ap++;
767
768                 while (isspace(*cp))
769                         cp++;
770
771                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
772                         return NOTOK;
773         }
774
775         /*
776         ** Get any <Content-Id> given in buffer
777         */
778         if (magic && *cp == '<') {
779                 if (ct->c_id) {
780                         free(ct->c_id);
781                         ct->c_id = NULL;
782                 }
783                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784                         advise(NULL, "invalid ID in message %s", ct->c_file);
785                         return NOTOK;
786                 }
787                 c = *dp;
788                 *dp = '\0';
789                 if (*ct->c_id)
790                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
791                 else
792                         ct->c_id = NULL;
793                 *dp++ = c;
794                 cp = dp;
795
796                 while (isspace(*cp))
797                         cp++;
798         }
799
800         /*
801         ** Get any [Content-Description] given in buffer.
802         */
803         if (magic && *cp == '[') {
804                 ct->c_descr = ++cp;
805                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
806                         if (*dp == ']')
807                                 break;
808                 if (dp < cp) {
809                         advise(NULL, "invalid description in message %s",
810                                         ct->c_file);
811                         ct->c_descr = NULL;
812                         return NOTOK;
813                 }
814
815                 c = *dp;
816                 *dp = '\0';
817                 if (*ct->c_descr)
818                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
819                 else
820                         ct->c_descr = NULL;
821                 *dp++ = c;
822                 cp = dp;
823
824                 while (isspace(*cp))
825                         cp++;
826         }
827
828         /*
829         ** Get any {Content-Disposition} given in buffer.
830         */
831         if (magic && *cp == '{') {
832                 ct->c_dispo = ++cp;
833                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
834                         if (*dp == '}')
835                                 break;
836                 if (dp < cp) {
837                         advise(NULL, "invalid disposition in message %s",
838                                         ct->c_file);
839                         ct->c_dispo = NULL;
840                         return NOTOK;
841                 }
842
843                 c = *dp;
844                 *dp = '\0';
845                 if (*ct->c_dispo)
846                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
847                 else
848                         ct->c_dispo = NULL;
849                 *dp++ = c;
850                 cp = dp;
851
852                 while (isspace(*cp))
853                         cp++;
854         }
855
856         /*
857         ** Check if anything is left over
858         */
859         if (*cp) {
860                 if (magic) {
861                         ci->ci_magic = getcpy(cp);
862
863                         /*
864                         ** If there is a Content-Disposition header and
865                         ** it doesn't have a *filename=, extract it from
866                         ** the magic contents.  The mhbasename call skips
867                         ** any leading directory components.
868                         */
869                         if (ct->c_dispo)
870                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
871                         } else
872                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
873         }
874
875         return OK;
876 }
877
878
879 static int
880 get_comment(CT ct, unsigned char **ap, int istype)
881 {
882         int i;
883         char *bp;
884         unsigned char *cp;
885         char c, buffer[BUFSIZ], *dp;
886         CI ci;
887
888         ci = &ct->c_ctinfo;
889         cp = *ap;
890         bp = buffer;
891         cp++;
892
893         for (i = 0;;) {
894                 switch (c = *cp++) {
895                 case '\0':
896 invalid:
897                 advise(NULL, "invalid comment in message %s's %s: field",
898                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
899                 return NOTOK;
900
901                 case '\\':
902                         *bp++ = c;
903                         if ((c = *cp++) == '\0')
904                                 goto invalid;
905                         *bp++ = c;
906                         continue;
907
908                 case '(':
909                         i++;
910                         /* and fall... */
911                 default:
912                         *bp++ = c;
913                         continue;
914
915                 case ')':
916                         if (--i < 0)
917                                 break;
918                         *bp++ = c;
919                         continue;
920                 }
921                 break;
922         }
923         *bp = '\0';
924
925         if (istype) {
926                 if ((dp = ci->ci_comment)) {
927                         ci->ci_comment = concat(dp, " ", buffer, NULL);
928                         free(dp);
929                 } else {
930                         ci->ci_comment = getcpy(buffer);
931                 }
932         }
933
934         while (isspace(*cp))
935                 cp++;
936
937         *ap = cp;
938         return OK;
939 }
940
941
942 /*
943 ** CONTENTS
944 **
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
947 */
948
949 static int
950 InitGeneric(CT ct)
951 {
952         return OK;  /* not much to do here */
953 }
954
955
956 /*
957 ** TEXT
958 */
959
960 static int
961 InitText(CT ct)
962 {
963         char **ap, **ep;
964         struct k2v *kv;
965         struct text *t;
966         CI ci = &ct->c_ctinfo;
967
968         /* check for missing subtype */
969         if (!*ci->ci_subtype)
970                 ci->ci_subtype = add("plain", ci->ci_subtype);
971
972         /* match subtype */
973         for (kv = SubText; kv->kv_key; kv++)
974                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
975                         break;
976         ct->c_subtype = kv->kv_value;
977
978         /* allocate text character set structure */
979         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
980                 adios(NULL, "out of memory");
981         ct->c_ctparams = (void *) t;
982
983         /* scan for charset parameter */
984         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
985                 if (!mh_strcasecmp(*ap, "charset"))
986                         break;
987
988         /* check if content specified a character set */
989         if (*ap) {
990                 /* store its name */
991                 ct->c_charset = getcpy(norm_charmap(*ep));
992                 /* match character set or set to CHARSET_UNKNOWN */
993                 for (kv = Charset; kv->kv_key; kv++) {
994                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
995                                 break;
996                         }
997                 }
998                 t->tx_charset = kv->kv_value;
999         } else {
1000                 t->tx_charset = CHARSET_UNSPECIFIED;
1001         }
1002
1003         return OK;
1004 }
1005
1006
1007 /*
1008 ** MULTIPART
1009 */
1010
1011 static int
1012 InitMultiPart(CT ct)
1013 {
1014         int inout;
1015         long last, pos;
1016         unsigned char *cp, *dp;
1017         char **ap, **ep;
1018         char *bp, buffer[BUFSIZ];
1019         struct multipart *m;
1020         struct k2v *kv;
1021         struct part *part, **next;
1022         CI ci = &ct->c_ctinfo;
1023         CT p;
1024         FILE *fp;
1025
1026         /*
1027         ** The encoding for multipart messages must be either
1028         ** 7bit, 8bit, or binary (per RFC2045).
1029         */
1030         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1031                 && ct->c_encoding != CE_BINARY) {
1032                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1033                 return NOTOK;
1034         }
1035
1036         /* match subtype */
1037         for (kv = SubMultiPart; kv->kv_key; kv++)
1038                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1039                         break;
1040         ct->c_subtype = kv->kv_value;
1041
1042         /*
1043         ** Check for "boundary" parameter, which is
1044         ** required for multipart messages.
1045         */
1046         bp = 0;
1047         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1048                 if (!mh_strcasecmp(*ap, "boundary")) {
1049                         bp = *ep;
1050                         break;
1051                 }
1052         }
1053
1054         /* complain if boundary parameter is missing */
1055         if (!*ap) {
1056                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1057                 return NOTOK;
1058         }
1059
1060         /* allocate primary structure for multipart info */
1061         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1062                 adios(NULL, "out of memory");
1063         ct->c_ctparams = (void *) m;
1064
1065         /* check if boundary parameter contains only whitespace characters */
1066         for (cp = bp; isspace(*cp); cp++)
1067                 continue;
1068         if (!*cp) {
1069                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1070                 return NOTOK;
1071         }
1072
1073         /* remove trailing whitespace from boundary parameter */
1074         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1075                 if (!isspace(*dp))
1076                         break;
1077         *++dp = '\0';
1078
1079         /* record boundary separators */
1080         m->mp_start = concat(bp, "\n", NULL);
1081         m->mp_stop = concat(bp, "--\n", NULL);
1082
1083         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1084                 advise(ct->c_file, "unable to open for reading");
1085                 return NOTOK;
1086         }
1087
1088         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1089         last = ct->c_end;
1090         next = &m->mp_parts;
1091         part = NULL;
1092         inout = 1;
1093
1094         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1095                 if (pos > last)
1096                         break;
1097
1098                 pos += strlen(buffer);
1099                 if (buffer[0] != '-' || buffer[1] != '-')
1100                         continue;
1101                 if (inout) {
1102                         if (strcmp(buffer + 2, m->mp_start)!=0)
1103                                 continue;
1104 next_part:
1105                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1106                                         == NULL)
1107                                 adios(NULL, "out of memory");
1108                         *next = part;
1109                         next = &part->mp_next;
1110
1111                         if (!(p = get_content(fp, ct->c_file,
1112                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1113                                 ct->c_fp = NULL;
1114                                 return NOTOK;
1115                         }
1116                         p->c_fp = NULL;
1117                         part->mp_part = p;
1118                         pos = p->c_begin;
1119                         fseek(fp, pos, SEEK_SET);
1120                         inout = 0;
1121                 } else {
1122                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1123                                 inout = 1;
1124 end_part:
1125                                 p = part->mp_part;
1126                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1127                                 if (p->c_end < p->c_begin)
1128                                         p->c_begin = p->c_end;
1129                                 if (inout)
1130                                         goto next_part;
1131                                 goto last_part;
1132                         } else {
1133                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1134                                         goto end_part;
1135                         }
1136                 }
1137         }
1138
1139         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1140         if (!inout && part) {
1141                 p = part->mp_part;
1142                 p->c_end = ct->c_end;
1143
1144                 if (p->c_begin >= p->c_end) {
1145                         for (next = &m->mp_parts; *next != part;
1146                                 next = &((*next)->mp_next))
1147                                 continue;
1148                         *next = NULL;
1149                         free_content(p);
1150                         free((char *) part);
1151                 }
1152         }
1153
1154 last_part:
1155         /* reverse the order of the parts for multipart/alternative */
1156         if (ct->c_subtype == MULTI_ALTERNATE)
1157                 reverse_parts(ct);
1158
1159         /*
1160         ** label all subparts with part number, and
1161         ** then initialize the content of the subpart.
1162         */
1163         {
1164                 int partnum;
1165                 char *pp;
1166                 char partnam[BUFSIZ];
1167
1168                 if (ct->c_partno) {
1169                         snprintf(partnam, sizeof(partnam), "%s.",
1170                                         ct->c_partno);
1171                         pp = partnam + strlen(partnam);
1172                 } else {
1173                         pp = partnam;
1174                 }
1175
1176                 for (part = m->mp_parts, partnum = 1; part;
1177                         part = part->mp_next, partnum++) {
1178                         p = part->mp_part;
1179
1180                         sprintf(pp, "%d", partnum);
1181                         p->c_partno = getcpy(partnam);
1182
1183                         /* initialize the content of the subparts */
1184                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1185                                 fclose(ct->c_fp);
1186                                 ct->c_fp = NULL;
1187                                 return NOTOK;
1188                         }
1189                 }
1190         }
1191
1192         fclose(ct->c_fp);
1193         ct->c_fp = NULL;
1194         return OK;
1195 }
1196
1197
1198 /*
1199 ** reverse the order of the parts of a multipart
1200 */
1201
1202 static void
1203 reverse_parts(CT ct)
1204 {
1205         int i;
1206         struct multipart *m;
1207         struct part **base, **bmp, **next, *part;
1208
1209         m = (struct multipart *) ct->c_ctparams;
1210
1211         /* if only one part, just return */
1212         if (!m->mp_parts || !m->mp_parts->mp_next)
1213                 return;
1214
1215         /* count number of parts */
1216         i = 0;
1217         for (part = m->mp_parts; part; part = part->mp_next)
1218                 i++;
1219
1220         /* allocate array of pointers to the parts */
1221         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1222                 adios(NULL, "out of memory");
1223         bmp = base;
1224
1225         /* point at all the parts */
1226         for (part = m->mp_parts; part; part = part->mp_next)
1227                 *bmp++ = part;
1228         *bmp = NULL;
1229
1230         /* reverse the order of the parts */
1231         next = &m->mp_parts;
1232         for (bmp--; bmp >= base; bmp--) {
1233                 part = *bmp;
1234                 *next = part;
1235                 next = &part->mp_next;
1236         }
1237         *next = NULL;
1238
1239         /* free array of pointers */
1240         free((char *) base);
1241 }
1242
1243
1244 /*
1245 ** MESSAGE
1246 */
1247
1248 static int
1249 InitMessage(CT ct)
1250 {
1251         struct k2v *kv;
1252         CI ci = &ct->c_ctinfo;
1253
1254         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1255                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1256                 return NOTOK;
1257         }
1258
1259         /* check for missing subtype */
1260         if (!*ci->ci_subtype)
1261                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1262
1263         /* match subtype */
1264         for (kv = SubMessage; kv->kv_key; kv++)
1265                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1266                         break;
1267         ct->c_subtype = kv->kv_value;
1268
1269         switch (ct->c_subtype) {
1270         case MESSAGE_RFC822:
1271                 break;
1272
1273         case MESSAGE_PARTIAL:
1274                 {
1275                 char **ap, **ep;
1276                 struct partial *p;
1277
1278                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1279                         adios(NULL, "out of memory");
1280                 ct->c_ctparams = (void *) p;
1281
1282                 /*
1283                 ** scan for parameters "id", "number",
1284                 ** and "total"
1285                 */
1286                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1287                         if (!mh_strcasecmp(*ap, "id")) {
1288                                 p->pm_partid = getcpy(*ep);
1289                                 continue;
1290                         }
1291                         if (!mh_strcasecmp(*ap, "number")) {
1292                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1293 invalid_param:
1294                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1295                                         return NOTOK;
1296                                 }
1297                                 continue;
1298                         }
1299                         if (!mh_strcasecmp(*ap, "total")) {
1300                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1301                                                 p->pm_maxno < 1)
1302                                         goto invalid_param;
1303                                 continue;
1304                         }
1305                 }
1306
1307                 if (!p->pm_partid || !p->pm_partno
1308                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1309                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1310                         return NOTOK;
1311                 }
1312                 }
1313                 break;
1314
1315         case MESSAGE_EXTERNAL:
1316                 {
1317                 CT p;
1318                 FILE *fp;
1319
1320                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1321                         advise(ct->c_file, "unable to open for reading");
1322                         return NOTOK;
1323                 }
1324
1325                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1326
1327                 if (!(p = get_content(fp, ct->c_file, 0))) {
1328                         ct->c_fp = NULL;
1329                         return NOTOK;
1330                 }
1331
1332                 p->c_fp = NULL;
1333                 p->c_end = p->c_begin;
1334
1335                 fclose(ct->c_fp);
1336                 ct->c_fp = NULL;
1337
1338                 switch (p->c_type) {
1339                 case CT_MULTIPART:
1340                         break;
1341
1342                 case CT_MESSAGE:
1343                         if (p->c_subtype != MESSAGE_RFC822)
1344                                 break;
1345                         /* else fall... */
1346                 default:
1347                         if (p->c_ctinitfnx)
1348                                 (*p->c_ctinitfnx) (p);
1349                         break;
1350                 }
1351                 }
1352                 break;
1353
1354         default:
1355                 break;
1356         }
1357
1358         return OK;
1359 }
1360
1361
1362 /*
1363 ** APPLICATION
1364 */
1365
1366 static int
1367 InitApplication(CT ct)
1368 {
1369         struct k2v *kv;
1370         CI ci = &ct->c_ctinfo;
1371
1372         /* match subtype */
1373         for (kv = SubApplication; kv->kv_key; kv++)
1374                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1375                         break;
1376         ct->c_subtype = kv->kv_value;
1377
1378         return OK;
1379 }
1380
1381
1382 /*
1383 ** TRANSFER ENCODINGS
1384 */
1385
1386 static int
1387 init_encoding(CT ct, OpenCEFunc openfnx)
1388 {
1389         CE ce;
1390
1391         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1392                 adios(NULL, "out of memory");
1393
1394         ct->c_cefile     = ce;
1395         ct->c_ceopenfnx  = openfnx;
1396         ct->c_ceclosefnx = close_encoding;
1397         ct->c_cesizefnx  = size_encoding;
1398
1399         return OK;
1400 }
1401
1402
1403 void
1404 close_encoding(CT ct)
1405 {
1406         CE ce;
1407
1408         if (!(ce = ct->c_cefile))
1409                 return;
1410
1411         if (ce->ce_fp) {
1412                 fclose(ce->ce_fp);
1413                 ce->ce_fp = NULL;
1414         }
1415 }
1416
1417
1418 static unsigned long
1419 size_encoding(CT ct)
1420 {
1421         int fd;
1422         unsigned long size;
1423         char *file;
1424         CE ce;
1425         struct stat st;
1426
1427         if (!(ce = ct->c_cefile))
1428                 return (ct->c_end - ct->c_begin);
1429
1430         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1431                 return (long) st.st_size;
1432
1433         if (ce->ce_file) {
1434                 if (stat(ce->ce_file, &st) != NOTOK)
1435                         return (long) st.st_size;
1436                 else
1437                         return 0L;
1438         }
1439
1440         if (ct->c_encoding == CE_EXTERNAL)
1441                 return (ct->c_end - ct->c_begin);
1442
1443         file = NULL;
1444         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1445                 return (ct->c_end - ct->c_begin);
1446
1447         if (fstat(fd, &st) != NOTOK)
1448                 size = (long) st.st_size;
1449         else
1450                 size = 0L;
1451
1452         (*ct->c_ceclosefnx) (ct);
1453         return size;
1454 }
1455
1456
1457 /*
1458 ** BASE64
1459 */
1460
1461 static unsigned char b642nib[0x80] = {
1462         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1468         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1469         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1471         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1472         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1473         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1474         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1475         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1476         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1477         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1478 };
1479
1480
1481 static int
1482 InitBase64(CT ct)
1483 {
1484         return init_encoding(ct, openBase64);
1485 }
1486
1487
1488 static int
1489 openBase64(CT ct, char **file)
1490 {
1491         int bitno, cc;
1492         int fd, len, skip, own_ct_fp = 0;
1493         unsigned long bits;
1494         unsigned char value, *b, *b1, *b2, *b3;
1495         unsigned char *cp, *ep;
1496         char buffer[BUFSIZ];
1497         /* sbeck -- handle suffixes */
1498         CI ci;
1499         CE ce;
1500
1501         b  = (unsigned char *) &bits;
1502         b1 = &b[endian > 0 ? 1 : 2];
1503         b2 = &b[endian > 0 ? 2 : 1];
1504         b3 = &b[endian > 0 ? 3 : 0];
1505
1506         ce = ct->c_cefile;
1507         if (ce->ce_fp) {
1508                 fseek(ce->ce_fp, 0L, SEEK_SET);
1509                 goto ready_to_go;
1510         }
1511
1512         if (ce->ce_file) {
1513                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1514                         content_error(ce->ce_file, ct,
1515                                         "unable to fopen for reading");
1516                         return NOTOK;
1517                 }
1518                 goto ready_to_go;
1519         }
1520
1521         if (*file == NULL) {
1522                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1523                 ce->ce_unlink = 1;
1524         } else {
1525                 ce->ce_file = getcpy(*file);
1526                 ce->ce_unlink = 0;
1527         }
1528
1529         /* sbeck@cise.ufl.edu -- handle suffixes */
1530         ci = &ct->c_ctinfo;
1531         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1532                         invo_name, ci->ci_type, ci->ci_subtype);
1533         cp = context_find(buffer);
1534         if (cp == NULL || *cp == '\0') {
1535                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1536                                 ci->ci_type);
1537                 cp = context_find(buffer);
1538         }
1539         if (cp != NULL && *cp != '\0') {
1540                 if (ce->ce_unlink) {
1541                         /*
1542                         ** Temporary file already exists, so we rename to
1543                         ** version with extension.
1544                         */
1545                         char *file_org = strdup(ce->ce_file);
1546                         ce->ce_file = add(cp, ce->ce_file);
1547                         if (rename(file_org, ce->ce_file)) {
1548                                 adios(ce->ce_file, "unable to rename %s to ",
1549                                                 file_org);
1550                         }
1551                         free(file_org);
1552
1553                 } else {
1554                         ce->ce_file = add(cp, ce->ce_file);
1555                 }
1556         }
1557
1558         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1559                 content_error(ce->ce_file, ct,
1560                                 "unable to fopen for reading/writing");
1561                 return NOTOK;
1562         }
1563
1564         if ((len = ct->c_end - ct->c_begin) < 0)
1565                 adios(NULL, "internal error(1)");
1566
1567         if (!ct->c_fp) {
1568                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1569                         content_error(ct->c_file, ct,
1570                                         "unable to open for reading");
1571                         return NOTOK;
1572                 }
1573                 own_ct_fp = 1;
1574         }
1575
1576         bitno = 18;
1577         bits = 0L;
1578         skip = 0;
1579
1580         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1581         while (len > 0) {
1582                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1583                 case NOTOK:
1584                         content_error(ct->c_file, ct, "error reading from");
1585                         goto clean_up;
1586
1587                 case OK:
1588                         content_error(NULL, ct, "premature eof");
1589                         goto clean_up;
1590
1591                 default:
1592                         if (cc > len)
1593                                 cc = len;
1594                         len -= cc;
1595
1596                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1597                                 switch (*cp) {
1598                                 default:
1599                                         if (isspace(*cp))
1600                                                 break;
1601                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1602                                                 if (debugsw) {
1603                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1604                                                 }
1605                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1606                                                 continue;
1607                                         }
1608
1609                                         bits |= value << bitno;
1610 test_end:
1611                                         if ((bitno -= 6) < 0) {
1612                                                 putc((char) *b1, ce->ce_fp);
1613                                                 if (skip < 2) {
1614                                                         putc((char) *b2, ce->ce_fp);
1615                                                         if (skip < 1) {
1616                                                                 putc((char) *b3, ce->ce_fp);
1617                                                         }
1618                                                 }
1619
1620                                                 if (ferror(ce->ce_fp)) {
1621                                                         content_error(ce->ce_file, ct,
1622                                                                                    "error writing to");
1623                                                         goto clean_up;
1624                                                 }
1625                                                 bitno = 18, bits = 0L, skip = 0;
1626                                         }
1627                                         break;
1628
1629                                 case '=':
1630                                         if (++skip > 3)
1631                                                 goto self_delimiting;
1632                                         goto test_end;
1633                                 }
1634                         }
1635                 }
1636         }
1637
1638         if (bitno != 18) {
1639                 if (debugsw)
1640                         fprintf(stderr, "premature ending (bitno %d)\n",
1641                                         bitno);
1642
1643                 content_error(NULL, ct, "invalid BASE64 encoding");
1644                 goto clean_up;
1645         }
1646
1647 self_delimiting:
1648         fseek(ct->c_fp, 0L, SEEK_SET);
1649
1650         if (fflush(ce->ce_fp)) {
1651                 content_error(ce->ce_file, ct, "error writing to");
1652                 goto clean_up;
1653         }
1654
1655         fseek(ce->ce_fp, 0L, SEEK_SET);
1656
1657 ready_to_go:
1658         *file = ce->ce_file;
1659         if (own_ct_fp) {
1660                 fclose(ct->c_fp);
1661                 ct->c_fp = NULL;
1662         }
1663         return fileno(ce->ce_fp);
1664
1665 clean_up:
1666         free_encoding(ct, 0);
1667         if (own_ct_fp) {
1668                 fclose(ct->c_fp);
1669                 ct->c_fp = NULL;
1670         }
1671         return NOTOK;
1672 }
1673
1674
1675 /*
1676 ** QUOTED PRINTABLE
1677 */
1678
1679 static char hex2nib[0x80] = {
1680         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1687         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1696 };
1697
1698
1699 static int
1700 InitQuoted(CT ct)
1701 {
1702         return init_encoding(ct, openQuoted);
1703 }
1704
1705
1706 static int
1707 openQuoted(CT ct, char **file)
1708 {
1709         int cc, len, quoted, own_ct_fp = 0;
1710         unsigned char *cp, *ep;
1711         char buffer[BUFSIZ];
1712         unsigned char mask = 0;
1713         CE ce;
1714         /* sbeck -- handle suffixes */
1715         CI ci;
1716
1717         ce = ct->c_cefile;
1718         if (ce->ce_fp) {
1719                 fseek(ce->ce_fp, 0L, SEEK_SET);
1720                 goto ready_to_go;
1721         }
1722
1723         if (ce->ce_file) {
1724                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1725                         content_error(ce->ce_file, ct,
1726                                         "unable to fopen for reading");
1727                         return NOTOK;
1728                 }
1729                 goto ready_to_go;
1730         }
1731
1732         if (*file == NULL) {
1733                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1734                 ce->ce_unlink = 1;
1735         } else {
1736                 ce->ce_file = getcpy(*file);
1737                 ce->ce_unlink = 0;
1738         }
1739
1740         /* sbeck@cise.ufl.edu -- handle suffixes */
1741         ci = &ct->c_ctinfo;
1742         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1743                         invo_name, ci->ci_type, ci->ci_subtype);
1744         cp = context_find(buffer);
1745         if (cp == NULL || *cp == '\0') {
1746                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1747                                 ci->ci_type);
1748                 cp = context_find(buffer);
1749         }
1750         if (cp != NULL && *cp != '\0') {
1751                 if (ce->ce_unlink) {
1752                         /*
1753                         ** Temporary file already exists, so we rename to
1754                         ** version with extension.
1755                         */
1756                         char *file_org = strdup(ce->ce_file);
1757                         ce->ce_file = add(cp, ce->ce_file);
1758                         if (rename(file_org, ce->ce_file)) {
1759                                 adios(ce->ce_file, "unable to rename %s to ",
1760                                                 file_org);
1761                         }
1762                         free(file_org);
1763
1764                 } else {
1765                         ce->ce_file = add(cp, ce->ce_file);
1766                 }
1767         }
1768
1769         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1770                 content_error(ce->ce_file, ct,
1771                                 "unable to fopen for reading/writing");
1772                 return NOTOK;
1773         }
1774
1775         if ((len = ct->c_end - ct->c_begin) < 0)
1776                 adios(NULL, "internal error(2)");
1777
1778         if (!ct->c_fp) {
1779                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1780                         content_error(ct->c_file, ct,
1781                                         "unable to open for reading");
1782                         return NOTOK;
1783                 }
1784                 own_ct_fp = 1;
1785         }
1786
1787         quoted = 0;
1788
1789         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1790         while (len > 0) {
1791                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1792                         content_error(NULL, ct, "premature eof");
1793                         goto clean_up;
1794                 }
1795
1796                 if ((cc = strlen(buffer)) > len)
1797                         cc = len;
1798                 len -= cc;
1799
1800                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1801                         if (!isspace(*ep))
1802                                 break;
1803                 *++ep = '\n', ep++;
1804
1805                 for (; cp < ep; cp++) {
1806                         if (quoted > 0) {
1807                                 /* in an escape sequence */
1808                                 if (quoted == 1) {
1809                                         /* at byte 1 of an escape sequence */
1810                                         mask = hex2nib[*cp & 0x7f];
1811                                         /* next is byte 2 */
1812                                         quoted = 2;
1813                                 } else {
1814                                         /* at byte 2 of an escape sequence */
1815                                         mask <<= 4;
1816                                         mask |= hex2nib[*cp & 0x7f];
1817                                         putc(mask, ce->ce_fp);
1818                                         if (ferror(ce->ce_fp)) {
1819                                                 content_error(ce->ce_file, ct, "error writing to");
1820                                                 goto clean_up;
1821                                         }
1822                                         /*
1823                                         ** finished escape sequence; next may
1824                                         ** be literal or a new escape sequence
1825                                         */
1826                                         quoted = 0;
1827                                 }
1828                                 /* on to next byte */
1829                                 continue;
1830                         }
1831
1832                         /* not in an escape sequence */
1833                         if (*cp == '=') {
1834                                 /*
1835                                 ** starting an escape sequence,
1836                                 ** or invalid '='?
1837                                 */
1838                                 if (cp + 1 < ep && cp[1] == '\n') {
1839                                         /* "=\n" soft line break, eat the \n */
1840                                         cp++;
1841                                         continue;
1842                                 }
1843                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1844                                         /*
1845                                         ** We don't have 2 bytes left,
1846                                         ** so this is an invalid escape
1847                                         ** sequence; just show the raw bytes
1848                                         ** (below).
1849                                         */
1850                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1851                                         /*
1852                                         ** Next 2 bytes are hex digits,
1853                                         ** making this a valid escape
1854                                         ** sequence; let's decode it (above).
1855                                         */
1856                                         quoted = 1;
1857                                         continue;
1858                                 } else {
1859                                         /*
1860                                         ** One or both of the next 2 is
1861                                         ** out of range, making this an
1862                                         ** invalid escape sequence; just
1863                                         ** show the raw bytes (below).
1864                                         */
1865                                 }
1866                         }
1867
1868                         /* Just show the raw byte. */
1869                         putc(*cp, ce->ce_fp);
1870                         if (ferror(ce->ce_fp)) {
1871                                 content_error(ce->ce_file, ct,
1872                                                 "error writing to");
1873                                 goto clean_up;
1874                         }
1875                 }
1876         }
1877         if (quoted) {
1878                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1879                 goto clean_up;
1880         }
1881
1882         fseek(ct->c_fp, 0L, SEEK_SET);
1883
1884         if (fflush(ce->ce_fp)) {
1885                 content_error(ce->ce_file, ct, "error writing to");
1886                 goto clean_up;
1887         }
1888
1889         fseek(ce->ce_fp, 0L, SEEK_SET);
1890
1891 ready_to_go:
1892         *file = ce->ce_file;
1893         if (own_ct_fp) {
1894                 fclose(ct->c_fp);
1895                 ct->c_fp = NULL;
1896         }
1897         return fileno(ce->ce_fp);
1898
1899 clean_up:
1900         free_encoding(ct, 0);
1901         if (own_ct_fp) {
1902                 fclose(ct->c_fp);
1903                 ct->c_fp = NULL;
1904         }
1905         return NOTOK;
1906 }
1907
1908
1909 /*
1910 ** 7BIT
1911 */
1912
1913 static int
1914 Init7Bit(CT ct)
1915 {
1916         if (init_encoding(ct, open7Bit) == NOTOK)
1917                 return NOTOK;
1918
1919         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1920         return OK;
1921 }
1922
1923
1924 int
1925 open7Bit(CT ct, char **file)
1926 {
1927         int cc, fd, len, own_ct_fp = 0;
1928         char buffer[BUFSIZ];
1929         /* sbeck -- handle suffixes */
1930         char *cp;
1931         CI ci;
1932         CE ce;
1933
1934         ce = ct->c_cefile;
1935         if (ce->ce_fp) {
1936                 fseek(ce->ce_fp, 0L, SEEK_SET);
1937                 goto ready_to_go;
1938         }
1939
1940         if (ce->ce_file) {
1941                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1942                         content_error(ce->ce_file, ct,
1943                                         "unable to fopen for reading");
1944                         return NOTOK;
1945                 }
1946                 goto ready_to_go;
1947         }
1948
1949         if (*file == NULL) {
1950                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1951                 ce->ce_unlink = 1;
1952         } else {
1953                 ce->ce_file = getcpy(*file);
1954                 ce->ce_unlink = 0;
1955         }
1956
1957         /* sbeck@cise.ufl.edu -- handle suffixes */
1958         ci = &ct->c_ctinfo;
1959         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1960                         invo_name, ci->ci_type, ci->ci_subtype);
1961         cp = context_find(buffer);
1962         if (cp == NULL || *cp == '\0') {
1963                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1964                                 ci->ci_type);
1965                 cp = context_find(buffer);
1966         }
1967         if (cp != NULL && *cp != '\0') {
1968                 if (ce->ce_unlink) {
1969                         /*
1970                         ** Temporary file already exists, so we rename to
1971                         ** version with extension.
1972                         */
1973                         char *file_org = strdup(ce->ce_file);
1974                         ce->ce_file = add(cp, ce->ce_file);
1975                         if (rename(file_org, ce->ce_file)) {
1976                                 adios(ce->ce_file, "unable to rename %s to ",
1977                                                 file_org);
1978                         }
1979                         free(file_org);
1980
1981                 } else {
1982                         ce->ce_file = add(cp, ce->ce_file);
1983                 }
1984         }
1985
1986         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1987                 content_error(ce->ce_file, ct,
1988                                 "unable to fopen for reading/writing");
1989                 return NOTOK;
1990         }
1991
1992         if (ct->c_type == CT_MULTIPART) {
1993                 char **ap, **ep;
1994                 CI ci = &ct->c_ctinfo;
1995
1996                 len = 0;
1997                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1998                                 ci->ci_subtype);
1999                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2000                                 strlen(ci->ci_subtype);
2001                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2002                         putc(';', ce->ce_fp);
2003                         len++;
2004
2005                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2006                                         *ap, *ep);
2007
2008                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2009                                 fputs("\n\t", ce->ce_fp);
2010                                 len = 8;
2011                         } else {
2012                                 putc(' ', ce->ce_fp);
2013                                 len++;
2014                         }
2015                         fprintf(ce->ce_fp, "%s", buffer);
2016                         len += cc;
2017                 }
2018
2019                 if (ci->ci_comment) {
2020                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2021                                                 >= CPERLIN) {
2022                                 fputs("\n\t", ce->ce_fp);
2023                                 len = 8;
2024                         } else {
2025                                 putc(' ', ce->ce_fp);
2026                                 len++;
2027                         }
2028                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2029                         len += cc;
2030                 }
2031                 fprintf(ce->ce_fp, "\n");
2032                 if (ct->c_id)
2033                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2034                 if (ct->c_descr)
2035                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2036                 if (ct->c_dispo)
2037                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2038                 fprintf(ce->ce_fp, "\n");
2039         }
2040
2041         if ((len = ct->c_end - ct->c_begin) < 0)
2042                 adios(NULL, "internal error(3)");
2043
2044         if (!ct->c_fp) {
2045                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2046                         content_error(ct->c_file, ct,
2047                                         "unable to open for reading");
2048                         return NOTOK;
2049                 }
2050                 own_ct_fp = 1;
2051         }
2052
2053         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2054         while (len > 0)
2055                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2056                 case NOTOK:
2057                         content_error(ct->c_file, ct, "error reading from");
2058                         goto clean_up;
2059
2060                 case OK:
2061                         content_error(NULL, ct, "premature eof");
2062                         goto clean_up;
2063
2064                 default:
2065                         if (cc > len)
2066                                 cc = len;
2067                         len -= cc;
2068
2069                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2070                         if (ferror(ce->ce_fp)) {
2071                                 content_error(ce->ce_file, ct,
2072                                                 "error writing to");
2073                                 goto clean_up;
2074                         }
2075                 }
2076
2077         fseek(ct->c_fp, 0L, SEEK_SET);
2078
2079         if (fflush(ce->ce_fp)) {
2080                 content_error(ce->ce_file, ct, "error writing to");
2081                 goto clean_up;
2082         }
2083
2084         fseek(ce->ce_fp, 0L, SEEK_SET);
2085
2086 ready_to_go:
2087         *file = ce->ce_file;
2088         if (own_ct_fp) {
2089                 fclose(ct->c_fp);
2090                 ct->c_fp = NULL;
2091         }
2092         return fileno(ce->ce_fp);
2093
2094 clean_up:
2095         free_encoding(ct, 0);
2096         if (own_ct_fp) {
2097                 fclose(ct->c_fp);
2098                 ct->c_fp = NULL;
2099         }
2100         return NOTOK;
2101 }