Move #include from h/mh.h to source files
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20
21 extern int debugsw;
22
23 extern int endian;  /* mhmisc.c */
24
25 extern pid_t xpid;  /* mhshowsbr.c  */
26
27 /*
28 ** Directory to place temp files.  This must
29 ** be set before these routines are called.
30 */
31 char *tmp;
32
33 /*
34 ** Structures for TEXT messages
35 */
36 struct k2v SubText[] = {
37         { "plain", TEXT_PLAIN },
38         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
39         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
40         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
41 };
42
43 struct k2v Charset[] = {
44         { "us-ascii",   CHARSET_USASCII },
45         { "iso-8859-1", CHARSET_LATIN },
46         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
47 };
48
49 /*
50 ** Structures for MULTIPART messages
51 */
52 struct k2v SubMultiPart[] = {
53         { "mixed",       MULTI_MIXED },
54         { "alternative", MULTI_ALTERNATE },
55         { "digest",      MULTI_DIGEST },
56         { "parallel",    MULTI_PARALLEL },
57         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
58 };
59
60 /*
61 ** Structures for MESSAGE messages
62 */
63 struct k2v SubMessage[] = {
64         { "rfc822",        MESSAGE_RFC822 },
65         { "partial",       MESSAGE_PARTIAL },
66         { "external-body", MESSAGE_EXTERNAL },
67         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
68 };
69
70 /*
71 ** Structure for APPLICATION messages
72 */
73 struct k2v SubApplication[] = {
74         { "octet-stream", APPLICATION_OCTETS },
75         { "postscript",   APPLICATION_POSTSCRIPT },
76         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
77 };
78
79
80 /* mhmisc.c */
81 int part_ok(CT, int);
82 int type_ok(CT, int);
83 int make_intermediates(char *);
84 void content_error(char *, CT, char *, ...);
85
86 /* mhfree.c */
87 void free_content(CT);
88 void free_encoding(CT, int);
89
90 /*
91 ** static prototypes
92 */
93 static CT get_content(FILE *, char *, int);
94 static int get_comment(CT, unsigned char **, int);
95
96 static int InitGeneric(CT);
97 static int InitText(CT);
98 static int InitMultiPart(CT);
99 static void reverse_parts(CT);
100 static int InitMessage(CT);
101 static int InitApplication(CT);
102 static int init_encoding(CT, OpenCEFunc);
103 static unsigned long size_encoding(CT);
104 static int InitBase64(CT);
105 static int openBase64(CT, char **);
106 static int InitQuoted(CT);
107 static int openQuoted(CT, char **);
108 static int Init7Bit(CT);
109
110 struct str2init str2cts[] = {
111         { "application", CT_APPLICATION, InitApplication },
112         { "audio",       CT_AUDIO,       InitGeneric },
113         { "image",       CT_IMAGE,       InitGeneric },
114         { "message",     CT_MESSAGE,     InitMessage },
115         { "multipart",   CT_MULTIPART,   InitMultiPart },
116         { "text",        CT_TEXT,        InitText },
117         { "video",       CT_VIDEO,       InitGeneric },
118         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
119         { NULL,          CT_UNKNOWN,     NULL },
120 };
121
122 struct str2init str2ces[] = {
123         { "base64",           CE_BASE64,    InitBase64 },
124         { "quoted-printable", CE_QUOTED,    InitQuoted },
125         { "8bit",             CE_8BIT,      Init7Bit },
126         { "7bit",             CE_7BIT,      Init7Bit },
127         { "binary",           CE_BINARY,    Init7Bit },
128         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
129         { NULL,               CE_UNKNOWN,    NULL },
130 };
131
132
133 int
134 pidcheck(int status)
135 {
136         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
137                 return status;
138
139         fflush(stdout);
140         fflush(stderr);
141         exit(1);
142         return 1;
143 }
144
145
146 /*
147 ** Main entry point for parsing a MIME message or file.
148 ** It returns the Content structure for the top level
149 ** entity in the file.
150 */
151 CT
152 parse_mime(char *file)
153 {
154         int is_stdin;
155         char buffer[BUFSIZ];
156         FILE *fp;
157         CT ct;
158
159         /*
160         ** Check if file is actually standard input
161         */
162         if ((is_stdin = (strcmp(file, "-")==0))) {
163                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
164                 if (tfile == NULL) {
165                         advise("mhparse", "unable to create temporary file");
166                         return NULL;
167                 }
168                 file = getcpy(tfile);
169                 chmod(file, 0600);
170
171                 while (fgets(buffer, sizeof(buffer), stdin))
172                         fputs(buffer, fp);
173                 fflush(fp);
174
175                 if (ferror(stdin)) {
176                         unlink(file);
177                         advise("stdin", "error reading");
178                         return NULL;
179                 }
180                 if (ferror(fp)) {
181                         unlink(file);
182                         advise(file, "error writing");
183                         return NULL;
184                 }
185                 fseek(fp, 0L, SEEK_SET);
186         } else if ((fp = fopen(file, "r")) == NULL) {
187                 advise(file, "unable to read");
188                 return NULL;
189         }
190
191         if (!(ct = get_content(fp, file, 1))) {
192                 if (is_stdin)
193                         unlink(file);
194                 advise(NULL, "unable to decode %s", file);
195                 return NULL;
196         }
197
198         if (is_stdin)
199                 ct->c_unlink = 1;  /* temp file to remove */
200
201         ct->c_fp = NULL;
202
203         if (ct->c_end == 0L) {
204                 fseek(fp, 0L, SEEK_END);
205                 ct->c_end = ftell(fp);
206         }
207
208         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
209                 fclose(fp);
210                 free_content(ct);
211                 return NULL;
212         }
213
214         fclose(fp);
215         return ct;
216 }
217
218
219 /*
220 ** Main routine for reading/parsing the headers
221 ** of a message content.
222 **
223 ** toplevel =  1   # we are at the top level of the message
224 ** toplevel =  0   # we are inside message type or multipart type
225 **                 # other than multipart/digest
226 ** toplevel = -1   # we are inside multipart/digest
227 ** NB: on failure we will fclose(in)!
228 */
229
230 static CT
231 get_content(FILE *in, char *file, int toplevel)
232 {
233         int compnum, state;
234         char buf[BUFSIZ], name[NAMESZ];
235         char *np, *vp;
236         CT ct;
237         HF hp;
238
239         /* allocate the content structure */
240         if (!(ct = (CT) calloc(1, sizeof(*ct))))
241                 adios(NULL, "out of memory");
242
243         ct->c_fp = in;
244         ct->c_file = getcpy(file);
245         ct->c_begin = ftell(ct->c_fp) + 1;
246
247         /*
248         ** Parse the header fields for this
249         ** content into a linked list.
250         */
251         for (compnum = 1, state = FLD;;) {
252                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
253                 case FLD:
254                 case FLDPLUS:
255                 case FLDEOF:
256                         compnum++;
257
258                         /* get copies of the buffers */
259                         np = getcpy(name);
260                         vp = getcpy(buf);
261
262                         /* if necessary, get rest of field */
263                         while (state == FLDPLUS) {
264                                 state = m_getfld(state, name, buf,
265                                                 sizeof(buf), in);
266                                 vp = add(buf, vp);  /* add to previous value */
267                         }
268
269                         /* Now add the header data to the list */
270                         add_header(ct, np, vp);
271
272                         /* continue, if this isn't the last header field */
273                         if (state != FLDEOF) {
274                                 ct->c_begin = ftell(in) + 1;
275                                 continue;
276                         }
277                         /* else fall... */
278
279                 case BODY:
280                 case BODYEOF:
281                         ct->c_begin = ftell(in) - strlen(buf);
282                         break;
283
284                 case FILEEOF:
285                         ct->c_begin = ftell(in);
286                         break;
287
288                 case LENERR:
289                 case FMTERR:
290                         adios(NULL, "message format error in component #%d",
291                                         compnum);
292
293                 default:
294                         adios(NULL, "getfld() returned %d", state);
295                 }
296
297                 /* break out of the loop */
298                 break;
299         }
300
301         /*
302         ** Read the content headers.  We will parse the
303         ** MIME related header fields into their various
304         ** structures and set internal flags related to
305         ** content type/subtype, etc.
306         */
307
308         hp = ct->c_first_hf;  /* start at first header field */
309         while (hp) {
310                 /* Get MIME-Version field */
311                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
312                         int ucmp;
313                         char c;
314                         unsigned char *cp, *dp;
315
316                         if (ct->c_vrsn) {
317                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
318                                 goto next_header;
319                         }
320                         ct->c_vrsn = getcpy(hp->value);
321
322                         /* Now, cleanup this field */
323                         cp = ct->c_vrsn;
324
325                         while (isspace(*cp))
326                                 cp++;
327                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
328                                 *dp++ = ' ';
329                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
330                                 if (!isspace(*dp))
331                                         break;
332                         *++dp = '\0';
333                         if (debugsw)
334                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
335
336                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
337                                 goto out;
338
339                         for (dp = cp; istoken(*dp); dp++)
340                                 continue;
341                         c = *dp;
342                         *dp = '\0';
343                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
344                         *dp = c;
345                         if (!ucmp) {
346                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
347                         }
348
349                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
350                         /* Get Content-Type field */
351                         struct str2init *s2i;
352                         CI ci = &ct->c_ctinfo;
353
354                         /* Check if we've already seen a Content-Type header */
355                         if (ct->c_ctline) {
356                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
357                                 goto next_header;
358                         }
359
360                         /* Parse the Content-Type field */
361                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
362                                 goto out;
363
364                         /*
365                         ** Set the Init function and the internal
366                         ** flag for this content type.
367                         */
368                         for (s2i = str2cts; s2i->si_key; s2i++)
369                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
370                                         break;
371                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
372                                 s2i++;
373                         ct->c_type = s2i->si_val;
374                         ct->c_ctinitfnx = s2i->si_init;
375
376                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
377                         /* Get Content-Transfer-Encoding field */
378                         char c;
379                         unsigned char *cp, *dp;
380                         struct str2init *s2i;
381
382                         /*
383                         ** Check if we've already seen the
384                         ** Content-Transfer-Encoding field
385                         */
386                         if (ct->c_celine) {
387                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
388                                 goto next_header;
389                         }
390
391                         /* get copy of this field */
392                         ct->c_celine = cp = getcpy(hp->value);
393
394                         while (isspace(*cp))
395                                 cp++;
396                         for (dp = cp; istoken(*dp); dp++)
397                                 continue;
398                         c = *dp;
399                         *dp = '\0';
400
401                         /*
402                         ** Find the internal flag and Init function
403                         ** for this transfer encoding.
404                         */
405                         for (s2i = str2ces; s2i->si_key; s2i++)
406                                 if (!mh_strcasecmp(cp, s2i->si_key))
407                                         break;
408                         if (!s2i->si_key && !uprf(cp, "X-"))
409                                 s2i++;
410                         *dp = c;
411                         ct->c_encoding = s2i->si_val;
412
413                         /* Call the Init function for this encoding */
414                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
415                                 goto out;
416
417                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
418                         /* Get Content-ID field */
419                         ct->c_id = add(hp->value, ct->c_id);
420
421                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
422                         /* Get Content-Description field */
423                         ct->c_descr = add(hp->value, ct->c_descr);
424
425                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
426                         /* Get Content-Disposition field */
427                         ct->c_dispo = add(hp->value, ct->c_dispo);
428                 }
429
430 next_header:
431                 hp = hp->next;  /* next header field */
432         }
433
434         /*
435         ** Check if we saw a Content-Type field.
436         ** If not, then assign a default value for
437         ** it, and the Init function.
438         */
439         if (!ct->c_ctline) {
440                 /*
441                 ** If we are inside a multipart/digest message,
442                 ** so default type is message/rfc822
443                 */
444                 if (toplevel < 0) {
445                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
446                                 goto out;
447                         ct->c_type = CT_MESSAGE;
448                         ct->c_ctinitfnx = InitMessage;
449                 } else {
450                         /*
451                         ** Else default type is text/plain
452                         */
453                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
454                                 goto out;
455                         ct->c_type = CT_TEXT;
456                         ct->c_ctinitfnx = InitText;
457                 }
458         }
459
460         /* Use default Transfer-Encoding, if necessary */
461         if (!ct->c_celine) {
462                 ct->c_encoding = CE_7BIT;
463                 Init7Bit(ct);
464         }
465
466         return ct;
467
468 out:
469         free_content(ct);
470         return NULL;
471 }
472
473
474 /*
475 ** small routine to add header field to list
476 */
477
478 int
479 add_header(CT ct, char *name, char *value)
480 {
481         HF hp;
482
483         /* allocate header field structure */
484         hp = mh_xmalloc(sizeof(*hp));
485
486         /* link data into header structure */
487         hp->name = name;
488         hp->value = value;
489         hp->next = NULL;
490
491         /* link header structure into the list */
492         if (ct->c_first_hf == NULL) {
493                 ct->c_first_hf = hp;  /* this is the first */
494                 ct->c_last_hf = hp;
495         } else {
496                 ct->c_last_hf->next = hp;  /* add it to the end */
497                 ct->c_last_hf = hp;
498         }
499
500         return 0;
501 }
502
503
504 /*
505 ** Make sure that buf contains at least one appearance of name,
506 ** followed by =.  If not, insert both name and value, just after
507 ** first semicolon, if any.  Note that name should not contain a
508 ** trailing =.  And quotes will be added around the value.  Typical
509 ** usage:  make sure that a Content-Disposition header contains
510 ** filename="foo".  If it doesn't and value does, use value from
511 ** that.
512 */
513 static char *
514 incl_name_value(unsigned char *buf, char *name, char *value) {
515         char *newbuf = buf;
516
517         /* Assume that name is non-null. */
518         if (buf && value) {
519                 char *name_plus_equal = concat(name, "=", NULL);
520
521                 if (!strstr(buf, name_plus_equal)) {
522                         char *insertion;
523                         unsigned char *cp;
524                         char *prefix, *suffix;
525
526                         /* Trim trailing space, esp. newline. */
527                         for (cp = &buf[strlen(buf) - 1];
528                                          cp >= buf && isspace(*cp); --cp) {
529                                 *cp = '\0';
530                         }
531
532                         insertion = concat("; ", name, "=", "\"", value, "\"",
533                                         NULL);
534
535                         /*
536                         ** Insert at first semicolon, if any.
537                         ** If none, append to end.
538                         */
539                         prefix = getcpy(buf);
540                         if ((cp = strchr(prefix, ';'))) {
541                                 suffix = concat(cp, NULL);
542                                 *cp = '\0';
543                                 newbuf = concat(prefix, insertion, suffix,
544                                                 "\n", NULL);
545                                 free(suffix);
546                         } else {
547                                 /* Append to end. */
548                                 newbuf = concat(buf, insertion, "\n", NULL);
549                         }
550
551                         free(prefix);
552                         free(insertion);
553                         free(buf);
554                 }
555
556                 free(name_plus_equal);
557         }
558
559         return newbuf;
560 }
561
562 /*
563 ** Extract just name_suffix="foo", if any, from value.  If there isn't
564 ** one, return the entire value.  Note that, for example, a name_suffix
565 ** of name will match filename="foo", and return foo.
566 */
567 static char *
568 extract_name_value(char *name_suffix, char *value) {
569         char *extracted_name_value = value;
570         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
571         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
572         char *cp;
573
574         free(name_suffix_plus_quote);
575         if (name_suffix_equals) {
576                 char *name_suffix_begin;
577
578                 /* Find first \". */
579                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
580                         ;
581                 name_suffix_begin = ++cp;
582                 /* Find second \". */
583                 for (; *cp != '"'; ++cp)
584                         ;
585
586                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
587                 memcpy(extracted_name_value, name_suffix_begin,
588                                 cp - name_suffix_begin);
589                 extracted_name_value[cp - name_suffix_begin] = '\0';
590         }
591
592         return extracted_name_value;
593 }
594
595 /*
596 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
597 ** directives.  Fills in the information of the CTinfo structure.
598 */
599 int
600 get_ctinfo(unsigned char *cp, CT ct, int magic)
601 {
602         int i;
603         unsigned char *dp;
604         char **ap, **ep;
605         char c;
606         CI ci;
607
608         ci = &ct->c_ctinfo;
609         i = strlen(invo_name) + 2;
610
611         /* store copy of Content-Type line */
612         cp = ct->c_ctline = getcpy(cp);
613
614         while (isspace(*cp))  /* trim leading spaces */
615                 cp++;
616
617         /* change newlines to spaces */
618         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
619                 *dp++ = ' ';
620
621         /* trim trailing spaces */
622         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
623                 if (!isspace(*dp))
624                         break;
625         *++dp = '\0';
626
627         if (debugsw)
628                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
629
630         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
631                 return NOTOK;
632
633         for (dp = cp; istoken(*dp); dp++)
634                 continue;
635         c = *dp, *dp = '\0';
636         ci->ci_type = getcpy(cp);  /* store content type */
637         *dp = c, cp = dp;
638
639         if (!*ci->ci_type) {
640                 advise(NULL, "invalid %s: field in message %s (empty type)",
641                                 TYPE_FIELD, ct->c_file);
642                 return NOTOK;
643         }
644
645         /* down case the content type string */
646         for (dp = ci->ci_type; *dp; dp++)
647                 if (isalpha(*dp) && isupper(*dp))
648                         *dp = tolower(*dp);
649
650         while (isspace(*cp))
651                 cp++;
652
653         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
654                 return NOTOK;
655
656         if (*cp != '/') {
657                 if (!magic)
658                         ci->ci_subtype = getcpy("");
659                 goto magic_skip;
660         }
661
662         cp++;
663         while (isspace(*cp))
664                 cp++;
665
666         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
667                 return NOTOK;
668
669         for (dp = cp; istoken(*dp); dp++)
670                 continue;
671         c = *dp, *dp = '\0';
672         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
673         *dp = c, cp = dp;
674
675         if (!*ci->ci_subtype) {
676                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
677                 return NOTOK;
678         }
679
680         /* down case the content subtype string */
681         for (dp = ci->ci_subtype; *dp; dp++)
682                 if (isalpha(*dp) && isupper(*dp))
683                         *dp = tolower(*dp);
684
685 magic_skip:
686         while (isspace(*cp))
687                 cp++;
688
689         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
690                 return NOTOK;
691
692         /*
693         ** Parse attribute/value pairs given with Content-Type
694         */
695         ep = (ap = ci->ci_attrs) + NPARMS;
696         while (*cp == ';') {
697                 char *vp;
698                 unsigned char *up;
699
700                 if (ap >= ep) {
701                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
702                         return NOTOK;
703                 }
704
705                 cp++;
706                 while (isspace(*cp))
707                         cp++;
708
709                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
710                         return NOTOK;
711
712                 if (*cp == 0) {
713                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
714                         return OK;
715                 }
716
717                 /* down case the attribute name */
718                 for (dp = cp; istoken(*dp); dp++)
719                         if (isalpha(*dp) && isupper(*dp))
720                                 *dp = tolower(*dp);
721
722                 for (up = dp; isspace(*dp);)
723                         dp++;
724                 if (dp == cp || *dp != '=') {
725                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
726                         return NOTOK;
727                 }
728
729                 vp = (*ap = getcpy(cp)) + (up - cp);
730                 *vp = '\0';
731                 for (dp++; isspace(*dp);)
732                         dp++;
733
734                 /* now add the attribute value */
735                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
736
737                 if (*dp == '"') {
738                         for (cp = ++dp, dp = vp;;) {
739                                 switch (c = *cp++) {
740                                 case '\0':
741 bad_quote:
742                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
743                                         return NOTOK;
744
745                                 case '\\':
746                                         *dp++ = c;
747                                         if ((c = *cp++) == '\0')
748                                                 goto bad_quote;
749                                         /* else fall... */
750
751                                 default:
752                                         *dp++ = c;
753                                         continue;
754
755                                 case '"':
756                                         *dp = '\0';
757                                         break;
758                                 }
759                                 break;
760                         }
761                 } else {
762                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
763                                 continue;
764                         *dp = '\0';
765                 }
766                 if (!*vp) {
767                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
768                         return NOTOK;
769                 }
770                 ap++;
771
772                 while (isspace(*cp))
773                         cp++;
774
775                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
776                         return NOTOK;
777         }
778
779         /*
780         ** Get any <Content-Id> given in buffer
781         */
782         if (magic && *cp == '<') {
783                 if (ct->c_id) {
784                         free(ct->c_id);
785                         ct->c_id = NULL;
786                 }
787                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
788                         advise(NULL, "invalid ID in message %s", ct->c_file);
789                         return NOTOK;
790                 }
791                 c = *dp;
792                 *dp = '\0';
793                 if (*ct->c_id)
794                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
795                 else
796                         ct->c_id = NULL;
797                 *dp++ = c;
798                 cp = dp;
799
800                 while (isspace(*cp))
801                         cp++;
802         }
803
804         /*
805         ** Get any [Content-Description] given in buffer.
806         */
807         if (magic && *cp == '[') {
808                 ct->c_descr = ++cp;
809                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
810                         if (*dp == ']')
811                                 break;
812                 if (dp < cp) {
813                         advise(NULL, "invalid description in message %s",
814                                         ct->c_file);
815                         ct->c_descr = NULL;
816                         return NOTOK;
817                 }
818
819                 c = *dp;
820                 *dp = '\0';
821                 if (*ct->c_descr)
822                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
823                 else
824                         ct->c_descr = NULL;
825                 *dp++ = c;
826                 cp = dp;
827
828                 while (isspace(*cp))
829                         cp++;
830         }
831
832         /*
833         ** Get any {Content-Disposition} given in buffer.
834         */
835         if (magic && *cp == '{') {
836                 ct->c_dispo = ++cp;
837                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
838                         if (*dp == '}')
839                                 break;
840                 if (dp < cp) {
841                         advise(NULL, "invalid disposition in message %s",
842                                         ct->c_file);
843                         ct->c_dispo = NULL;
844                         return NOTOK;
845                 }
846
847                 c = *dp;
848                 *dp = '\0';
849                 if (*ct->c_dispo)
850                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
851                 else
852                         ct->c_dispo = NULL;
853                 *dp++ = c;
854                 cp = dp;
855
856                 while (isspace(*cp))
857                         cp++;
858         }
859
860         /*
861         ** Check if anything is left over
862         */
863         if (*cp) {
864                 if (magic) {
865                         ci->ci_magic = getcpy(cp);
866
867                         /*
868                         ** If there is a Content-Disposition header and
869                         ** it doesn't have a *filename=, extract it from
870                         ** the magic contents.  The mhbasename call skips
871                         ** any leading directory components.
872                         */
873                         if (ct->c_dispo)
874                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
875                         } else
876                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
877         }
878
879         return OK;
880 }
881
882
883 static int
884 get_comment(CT ct, unsigned char **ap, int istype)
885 {
886         int i;
887         char *bp;
888         unsigned char *cp;
889         char c, buffer[BUFSIZ], *dp;
890         CI ci;
891
892         ci = &ct->c_ctinfo;
893         cp = *ap;
894         bp = buffer;
895         cp++;
896
897         for (i = 0;;) {
898                 switch (c = *cp++) {
899                 case '\0':
900 invalid:
901                 advise(NULL, "invalid comment in message %s's %s: field",
902                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
903                 return NOTOK;
904
905                 case '\\':
906                         *bp++ = c;
907                         if ((c = *cp++) == '\0')
908                                 goto invalid;
909                         *bp++ = c;
910                         continue;
911
912                 case '(':
913                         i++;
914                         /* and fall... */
915                 default:
916                         *bp++ = c;
917                         continue;
918
919                 case ')':
920                         if (--i < 0)
921                                 break;
922                         *bp++ = c;
923                         continue;
924                 }
925                 break;
926         }
927         *bp = '\0';
928
929         if (istype) {
930                 if ((dp = ci->ci_comment)) {
931                         ci->ci_comment = concat(dp, " ", buffer, NULL);
932                         free(dp);
933                 } else {
934                         ci->ci_comment = getcpy(buffer);
935                 }
936         }
937
938         while (isspace(*cp))
939                 cp++;
940
941         *ap = cp;
942         return OK;
943 }
944
945
946 /*
947 ** CONTENTS
948 **
949 ** Handles content types audio, image, and video.
950 ** There's not much to do right here.
951 */
952
953 static int
954 InitGeneric(CT ct)
955 {
956         return OK;  /* not much to do here */
957 }
958
959
960 /*
961 ** TEXT
962 */
963
964 static int
965 InitText(CT ct)
966 {
967         char **ap, **ep;
968         struct k2v *kv;
969         struct text *t;
970         CI ci = &ct->c_ctinfo;
971
972         /* check for missing subtype */
973         if (!*ci->ci_subtype)
974                 ci->ci_subtype = add("plain", ci->ci_subtype);
975
976         /* match subtype */
977         for (kv = SubText; kv->kv_key; kv++)
978                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
979                         break;
980         ct->c_subtype = kv->kv_value;
981
982         /* allocate text character set structure */
983         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
984                 adios(NULL, "out of memory");
985         ct->c_ctparams = (void *) t;
986
987         /* scan for charset parameter */
988         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
989                 if (!mh_strcasecmp(*ap, "charset"))
990                         break;
991
992         /* check if content specified a character set */
993         if (*ap) {
994                 /* store its name */
995                 ct->c_charset = getcpy(norm_charmap(*ep));
996                 /* match character set or set to CHARSET_UNKNOWN */
997                 for (kv = Charset; kv->kv_key; kv++) {
998                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
999                                 break;
1000                         }
1001                 }
1002                 t->tx_charset = kv->kv_value;
1003         } else {
1004                 t->tx_charset = CHARSET_UNSPECIFIED;
1005         }
1006
1007         return OK;
1008 }
1009
1010
1011 /*
1012 ** MULTIPART
1013 */
1014
1015 static int
1016 InitMultiPart(CT ct)
1017 {
1018         int inout;
1019         long last, pos;
1020         unsigned char *cp, *dp;
1021         char **ap, **ep;
1022         char *bp, buffer[BUFSIZ];
1023         struct multipart *m;
1024         struct k2v *kv;
1025         struct part *part, **next;
1026         CI ci = &ct->c_ctinfo;
1027         CT p;
1028         FILE *fp;
1029
1030         /*
1031         ** The encoding for multipart messages must be either
1032         ** 7bit, 8bit, or binary (per RFC2045).
1033         */
1034         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1035                 && ct->c_encoding != CE_BINARY) {
1036                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1037                 return NOTOK;
1038         }
1039
1040         /* match subtype */
1041         for (kv = SubMultiPart; kv->kv_key; kv++)
1042                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1043                         break;
1044         ct->c_subtype = kv->kv_value;
1045
1046         /*
1047         ** Check for "boundary" parameter, which is
1048         ** required for multipart messages.
1049         */
1050         bp = 0;
1051         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1052                 if (!mh_strcasecmp(*ap, "boundary")) {
1053                         bp = *ep;
1054                         break;
1055                 }
1056         }
1057
1058         /* complain if boundary parameter is missing */
1059         if (!*ap) {
1060                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1061                 return NOTOK;
1062         }
1063
1064         /* allocate primary structure for multipart info */
1065         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1066                 adios(NULL, "out of memory");
1067         ct->c_ctparams = (void *) m;
1068
1069         /* check if boundary parameter contains only whitespace characters */
1070         for (cp = bp; isspace(*cp); cp++)
1071                 continue;
1072         if (!*cp) {
1073                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1074                 return NOTOK;
1075         }
1076
1077         /* remove trailing whitespace from boundary parameter */
1078         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1079                 if (!isspace(*dp))
1080                         break;
1081         *++dp = '\0';
1082
1083         /* record boundary separators */
1084         m->mp_start = concat(bp, "\n", NULL);
1085         m->mp_stop = concat(bp, "--\n", NULL);
1086
1087         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1088                 advise(ct->c_file, "unable to open for reading");
1089                 return NOTOK;
1090         }
1091
1092         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1093         last = ct->c_end;
1094         next = &m->mp_parts;
1095         part = NULL;
1096         inout = 1;
1097
1098         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1099                 if (pos > last)
1100                         break;
1101
1102                 pos += strlen(buffer);
1103                 if (buffer[0] != '-' || buffer[1] != '-')
1104                         continue;
1105                 if (inout) {
1106                         if (strcmp(buffer + 2, m->mp_start)!=0)
1107                                 continue;
1108 next_part:
1109                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1110                                         == NULL)
1111                                 adios(NULL, "out of memory");
1112                         *next = part;
1113                         next = &part->mp_next;
1114
1115                         if (!(p = get_content(fp, ct->c_file,
1116                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1117                                 ct->c_fp = NULL;
1118                                 return NOTOK;
1119                         }
1120                         p->c_fp = NULL;
1121                         part->mp_part = p;
1122                         pos = p->c_begin;
1123                         fseek(fp, pos, SEEK_SET);
1124                         inout = 0;
1125                 } else {
1126                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1127                                 inout = 1;
1128 end_part:
1129                                 p = part->mp_part;
1130                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1131                                 if (p->c_end < p->c_begin)
1132                                         p->c_begin = p->c_end;
1133                                 if (inout)
1134                                         goto next_part;
1135                                 goto last_part;
1136                         } else {
1137                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1138                                         goto end_part;
1139                         }
1140                 }
1141         }
1142
1143         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1144         if (!inout && part) {
1145                 p = part->mp_part;
1146                 p->c_end = ct->c_end;
1147
1148                 if (p->c_begin >= p->c_end) {
1149                         for (next = &m->mp_parts; *next != part;
1150                                 next = &((*next)->mp_next))
1151                                 continue;
1152                         *next = NULL;
1153                         free_content(p);
1154                         free((char *) part);
1155                 }
1156         }
1157
1158 last_part:
1159         /* reverse the order of the parts for multipart/alternative */
1160         if (ct->c_subtype == MULTI_ALTERNATE)
1161                 reverse_parts(ct);
1162
1163         /*
1164         ** label all subparts with part number, and
1165         ** then initialize the content of the subpart.
1166         */
1167         {
1168                 int partnum;
1169                 char *pp;
1170                 char partnam[BUFSIZ];
1171
1172                 if (ct->c_partno) {
1173                         snprintf(partnam, sizeof(partnam), "%s.",
1174                                         ct->c_partno);
1175                         pp = partnam + strlen(partnam);
1176                 } else {
1177                         pp = partnam;
1178                 }
1179
1180                 for (part = m->mp_parts, partnum = 1; part;
1181                         part = part->mp_next, partnum++) {
1182                         p = part->mp_part;
1183
1184                         sprintf(pp, "%d", partnum);
1185                         p->c_partno = getcpy(partnam);
1186
1187                         /* initialize the content of the subparts */
1188                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1189                                 fclose(ct->c_fp);
1190                                 ct->c_fp = NULL;
1191                                 return NOTOK;
1192                         }
1193                 }
1194         }
1195
1196         fclose(ct->c_fp);
1197         ct->c_fp = NULL;
1198         return OK;
1199 }
1200
1201
1202 /*
1203 ** reverse the order of the parts of a multipart
1204 */
1205
1206 static void
1207 reverse_parts(CT ct)
1208 {
1209         int i;
1210         struct multipart *m;
1211         struct part **base, **bmp, **next, *part;
1212
1213         m = (struct multipart *) ct->c_ctparams;
1214
1215         /* if only one part, just return */
1216         if (!m->mp_parts || !m->mp_parts->mp_next)
1217                 return;
1218
1219         /* count number of parts */
1220         i = 0;
1221         for (part = m->mp_parts; part; part = part->mp_next)
1222                 i++;
1223
1224         /* allocate array of pointers to the parts */
1225         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1226                 adios(NULL, "out of memory");
1227         bmp = base;
1228
1229         /* point at all the parts */
1230         for (part = m->mp_parts; part; part = part->mp_next)
1231                 *bmp++ = part;
1232         *bmp = NULL;
1233
1234         /* reverse the order of the parts */
1235         next = &m->mp_parts;
1236         for (bmp--; bmp >= base; bmp--) {
1237                 part = *bmp;
1238                 *next = part;
1239                 next = &part->mp_next;
1240         }
1241         *next = NULL;
1242
1243         /* free array of pointers */
1244         free((char *) base);
1245 }
1246
1247
1248 /*
1249 ** MESSAGE
1250 */
1251
1252 static int
1253 InitMessage(CT ct)
1254 {
1255         struct k2v *kv;
1256         CI ci = &ct->c_ctinfo;
1257
1258         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1259                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1260                 return NOTOK;
1261         }
1262
1263         /* check for missing subtype */
1264         if (!*ci->ci_subtype)
1265                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1266
1267         /* match subtype */
1268         for (kv = SubMessage; kv->kv_key; kv++)
1269                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1270                         break;
1271         ct->c_subtype = kv->kv_value;
1272
1273         switch (ct->c_subtype) {
1274         case MESSAGE_RFC822:
1275                 break;
1276
1277         case MESSAGE_PARTIAL:
1278                 {
1279                 char **ap, **ep;
1280                 struct partial *p;
1281
1282                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1283                         adios(NULL, "out of memory");
1284                 ct->c_ctparams = (void *) p;
1285
1286                 /*
1287                 ** scan for parameters "id", "number",
1288                 ** and "total"
1289                 */
1290                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1291                         if (!mh_strcasecmp(*ap, "id")) {
1292                                 p->pm_partid = getcpy(*ep);
1293                                 continue;
1294                         }
1295                         if (!mh_strcasecmp(*ap, "number")) {
1296                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1297 invalid_param:
1298                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1299                                         return NOTOK;
1300                                 }
1301                                 continue;
1302                         }
1303                         if (!mh_strcasecmp(*ap, "total")) {
1304                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1305                                                 p->pm_maxno < 1)
1306                                         goto invalid_param;
1307                                 continue;
1308                         }
1309                 }
1310
1311                 if (!p->pm_partid || !p->pm_partno
1312                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1313                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1314                         return NOTOK;
1315                 }
1316                 }
1317                 break;
1318
1319         case MESSAGE_EXTERNAL:
1320                 {
1321                 CT p;
1322                 FILE *fp;
1323
1324                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1325                         advise(ct->c_file, "unable to open for reading");
1326                         return NOTOK;
1327                 }
1328
1329                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1330
1331                 if (!(p = get_content(fp, ct->c_file, 0))) {
1332                         ct->c_fp = NULL;
1333                         return NOTOK;
1334                 }
1335
1336                 p->c_fp = NULL;
1337                 p->c_end = p->c_begin;
1338
1339                 fclose(ct->c_fp);
1340                 ct->c_fp = NULL;
1341
1342                 switch (p->c_type) {
1343                 case CT_MULTIPART:
1344                         break;
1345
1346                 case CT_MESSAGE:
1347                         if (p->c_subtype != MESSAGE_RFC822)
1348                                 break;
1349                         /* else fall... */
1350                 default:
1351                         if (p->c_ctinitfnx)
1352                                 (*p->c_ctinitfnx) (p);
1353                         break;
1354                 }
1355                 }
1356                 break;
1357
1358         default:
1359                 break;
1360         }
1361
1362         return OK;
1363 }
1364
1365
1366 /*
1367 ** APPLICATION
1368 */
1369
1370 static int
1371 InitApplication(CT ct)
1372 {
1373         struct k2v *kv;
1374         CI ci = &ct->c_ctinfo;
1375
1376         /* match subtype */
1377         for (kv = SubApplication; kv->kv_key; kv++)
1378                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1379                         break;
1380         ct->c_subtype = kv->kv_value;
1381
1382         return OK;
1383 }
1384
1385
1386 /*
1387 ** TRANSFER ENCODINGS
1388 */
1389
1390 static int
1391 init_encoding(CT ct, OpenCEFunc openfnx)
1392 {
1393         CE ce;
1394
1395         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1396                 adios(NULL, "out of memory");
1397
1398         ct->c_cefile     = ce;
1399         ct->c_ceopenfnx  = openfnx;
1400         ct->c_ceclosefnx = close_encoding;
1401         ct->c_cesizefnx  = size_encoding;
1402
1403         return OK;
1404 }
1405
1406
1407 void
1408 close_encoding(CT ct)
1409 {
1410         CE ce;
1411
1412         if (!(ce = ct->c_cefile))
1413                 return;
1414
1415         if (ce->ce_fp) {
1416                 fclose(ce->ce_fp);
1417                 ce->ce_fp = NULL;
1418         }
1419 }
1420
1421
1422 static unsigned long
1423 size_encoding(CT ct)
1424 {
1425         int fd;
1426         unsigned long size;
1427         char *file;
1428         CE ce;
1429         struct stat st;
1430
1431         if (!(ce = ct->c_cefile))
1432                 return (ct->c_end - ct->c_begin);
1433
1434         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1435                 return (long) st.st_size;
1436
1437         if (ce->ce_file) {
1438                 if (stat(ce->ce_file, &st) != NOTOK)
1439                         return (long) st.st_size;
1440                 else
1441                         return 0L;
1442         }
1443
1444         if (ct->c_encoding == CE_EXTERNAL)
1445                 return (ct->c_end - ct->c_begin);
1446
1447         file = NULL;
1448         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1449                 return (ct->c_end - ct->c_begin);
1450
1451         if (fstat(fd, &st) != NOTOK)
1452                 size = (long) st.st_size;
1453         else
1454                 size = 0L;
1455
1456         (*ct->c_ceclosefnx) (ct);
1457         return size;
1458 }
1459
1460
1461 /*
1462 ** BASE64
1463 */
1464
1465 static unsigned char b642nib[0x80] = {
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1472         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1473         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1475         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1476         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1477         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1478         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1479         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1480         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1481         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1482 };
1483
1484
1485 static int
1486 InitBase64(CT ct)
1487 {
1488         return init_encoding(ct, openBase64);
1489 }
1490
1491
1492 static int
1493 openBase64(CT ct, char **file)
1494 {
1495         int bitno, cc;
1496         int fd, len, skip, own_ct_fp = 0;
1497         unsigned long bits;
1498         unsigned char value, *b, *b1, *b2, *b3;
1499         unsigned char *cp, *ep;
1500         char buffer[BUFSIZ];
1501         /* sbeck -- handle suffixes */
1502         CI ci;
1503         CE ce;
1504
1505         b  = (unsigned char *) &bits;
1506         b1 = &b[endian > 0 ? 1 : 2];
1507         b2 = &b[endian > 0 ? 2 : 1];
1508         b3 = &b[endian > 0 ? 3 : 0];
1509
1510         ce = ct->c_cefile;
1511         if (ce->ce_fp) {
1512                 fseek(ce->ce_fp, 0L, SEEK_SET);
1513                 goto ready_to_go;
1514         }
1515
1516         if (ce->ce_file) {
1517                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1518                         content_error(ce->ce_file, ct,
1519                                         "unable to fopen for reading");
1520                         return NOTOK;
1521                 }
1522                 goto ready_to_go;
1523         }
1524
1525         if (*file == NULL) {
1526                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1527                 ce->ce_unlink = 1;
1528         } else {
1529                 ce->ce_file = getcpy(*file);
1530                 ce->ce_unlink = 0;
1531         }
1532
1533         /* sbeck@cise.ufl.edu -- handle suffixes */
1534         ci = &ct->c_ctinfo;
1535         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1536                         invo_name, ci->ci_type, ci->ci_subtype);
1537         cp = context_find(buffer);
1538         if (cp == NULL || *cp == '\0') {
1539                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1540                                 ci->ci_type);
1541                 cp = context_find(buffer);
1542         }
1543         if (cp != NULL && *cp != '\0') {
1544                 if (ce->ce_unlink) {
1545                         /*
1546                         ** Temporary file already exists, so we rename to
1547                         ** version with extension.
1548                         */
1549                         char *file_org = strdup(ce->ce_file);
1550                         ce->ce_file = add(cp, ce->ce_file);
1551                         if (rename(file_org, ce->ce_file)) {
1552                                 adios(ce->ce_file, "unable to rename %s to ",
1553                                                 file_org);
1554                         }
1555                         free(file_org);
1556
1557                 } else {
1558                         ce->ce_file = add(cp, ce->ce_file);
1559                 }
1560         }
1561
1562         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1563                 content_error(ce->ce_file, ct,
1564                                 "unable to fopen for reading/writing");
1565                 return NOTOK;
1566         }
1567
1568         if ((len = ct->c_end - ct->c_begin) < 0)
1569                 adios(NULL, "internal error(1)");
1570
1571         if (!ct->c_fp) {
1572                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1573                         content_error(ct->c_file, ct,
1574                                         "unable to open for reading");
1575                         return NOTOK;
1576                 }
1577                 own_ct_fp = 1;
1578         }
1579
1580         bitno = 18;
1581         bits = 0L;
1582         skip = 0;
1583
1584         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1585         while (len > 0) {
1586                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1587                 case NOTOK:
1588                         content_error(ct->c_file, ct, "error reading from");
1589                         goto clean_up;
1590
1591                 case OK:
1592                         content_error(NULL, ct, "premature eof");
1593                         goto clean_up;
1594
1595                 default:
1596                         if (cc > len)
1597                                 cc = len;
1598                         len -= cc;
1599
1600                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1601                                 switch (*cp) {
1602                                 default:
1603                                         if (isspace(*cp))
1604                                                 break;
1605                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1606                                                 if (debugsw) {
1607                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1608                                                 }
1609                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1610                                                 continue;
1611                                         }
1612
1613                                         bits |= value << bitno;
1614 test_end:
1615                                         if ((bitno -= 6) < 0) {
1616                                                 putc((char) *b1, ce->ce_fp);
1617                                                 if (skip < 2) {
1618                                                         putc((char) *b2, ce->ce_fp);
1619                                                         if (skip < 1) {
1620                                                                 putc((char) *b3, ce->ce_fp);
1621                                                         }
1622                                                 }
1623
1624                                                 if (ferror(ce->ce_fp)) {
1625                                                         content_error(ce->ce_file, ct,
1626                                                                                    "error writing to");
1627                                                         goto clean_up;
1628                                                 }
1629                                                 bitno = 18, bits = 0L, skip = 0;
1630                                         }
1631                                         break;
1632
1633                                 case '=':
1634                                         if (++skip > 3)
1635                                                 goto self_delimiting;
1636                                         goto test_end;
1637                                 }
1638                         }
1639                 }
1640         }
1641
1642         if (bitno != 18) {
1643                 if (debugsw)
1644                         fprintf(stderr, "premature ending (bitno %d)\n",
1645                                         bitno);
1646
1647                 content_error(NULL, ct, "invalid BASE64 encoding");
1648                 goto clean_up;
1649         }
1650
1651 self_delimiting:
1652         fseek(ct->c_fp, 0L, SEEK_SET);
1653
1654         if (fflush(ce->ce_fp)) {
1655                 content_error(ce->ce_file, ct, "error writing to");
1656                 goto clean_up;
1657         }
1658
1659         fseek(ce->ce_fp, 0L, SEEK_SET);
1660
1661 ready_to_go:
1662         *file = ce->ce_file;
1663         if (own_ct_fp) {
1664                 fclose(ct->c_fp);
1665                 ct->c_fp = NULL;
1666         }
1667         return fileno(ce->ce_fp);
1668
1669 clean_up:
1670         free_encoding(ct, 0);
1671         if (own_ct_fp) {
1672                 fclose(ct->c_fp);
1673                 ct->c_fp = NULL;
1674         }
1675         return NOTOK;
1676 }
1677
1678
1679 /*
1680 ** QUOTED PRINTABLE
1681 */
1682
1683 static char hex2nib[0x80] = {
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1691         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1697         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1700 };
1701
1702
1703 static int
1704 InitQuoted(CT ct)
1705 {
1706         return init_encoding(ct, openQuoted);
1707 }
1708
1709
1710 static int
1711 openQuoted(CT ct, char **file)
1712 {
1713         int cc, len, quoted, own_ct_fp = 0;
1714         unsigned char *cp, *ep;
1715         char buffer[BUFSIZ];
1716         unsigned char mask = 0;
1717         CE ce;
1718         /* sbeck -- handle suffixes */
1719         CI ci;
1720
1721         ce = ct->c_cefile;
1722         if (ce->ce_fp) {
1723                 fseek(ce->ce_fp, 0L, SEEK_SET);
1724                 goto ready_to_go;
1725         }
1726
1727         if (ce->ce_file) {
1728                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1729                         content_error(ce->ce_file, ct,
1730                                         "unable to fopen for reading");
1731                         return NOTOK;
1732                 }
1733                 goto ready_to_go;
1734         }
1735
1736         if (*file == NULL) {
1737                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1738                 ce->ce_unlink = 1;
1739         } else {
1740                 ce->ce_file = getcpy(*file);
1741                 ce->ce_unlink = 0;
1742         }
1743
1744         /* sbeck@cise.ufl.edu -- handle suffixes */
1745         ci = &ct->c_ctinfo;
1746         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1747                         invo_name, ci->ci_type, ci->ci_subtype);
1748         cp = context_find(buffer);
1749         if (cp == NULL || *cp == '\0') {
1750                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1751                                 ci->ci_type);
1752                 cp = context_find(buffer);
1753         }
1754         if (cp != NULL && *cp != '\0') {
1755                 if (ce->ce_unlink) {
1756                         /*
1757                         ** Temporary file already exists, so we rename to
1758                         ** version with extension.
1759                         */
1760                         char *file_org = strdup(ce->ce_file);
1761                         ce->ce_file = add(cp, ce->ce_file);
1762                         if (rename(file_org, ce->ce_file)) {
1763                                 adios(ce->ce_file, "unable to rename %s to ",
1764                                                 file_org);
1765                         }
1766                         free(file_org);
1767
1768                 } else {
1769                         ce->ce_file = add(cp, ce->ce_file);
1770                 }
1771         }
1772
1773         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1774                 content_error(ce->ce_file, ct,
1775                                 "unable to fopen for reading/writing");
1776                 return NOTOK;
1777         }
1778
1779         if ((len = ct->c_end - ct->c_begin) < 0)
1780                 adios(NULL, "internal error(2)");
1781
1782         if (!ct->c_fp) {
1783                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1784                         content_error(ct->c_file, ct,
1785                                         "unable to open for reading");
1786                         return NOTOK;
1787                 }
1788                 own_ct_fp = 1;
1789         }
1790
1791         quoted = 0;
1792
1793         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1794         while (len > 0) {
1795                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1796                         content_error(NULL, ct, "premature eof");
1797                         goto clean_up;
1798                 }
1799
1800                 if ((cc = strlen(buffer)) > len)
1801                         cc = len;
1802                 len -= cc;
1803
1804                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1805                         if (!isspace(*ep))
1806                                 break;
1807                 *++ep = '\n', ep++;
1808
1809                 for (; cp < ep; cp++) {
1810                         if (quoted > 0) {
1811                                 /* in an escape sequence */
1812                                 if (quoted == 1) {
1813                                         /* at byte 1 of an escape sequence */
1814                                         mask = hex2nib[*cp & 0x7f];
1815                                         /* next is byte 2 */
1816                                         quoted = 2;
1817                                 } else {
1818                                         /* at byte 2 of an escape sequence */
1819                                         mask <<= 4;
1820                                         mask |= hex2nib[*cp & 0x7f];
1821                                         putc(mask, ce->ce_fp);
1822                                         if (ferror(ce->ce_fp)) {
1823                                                 content_error(ce->ce_file, ct, "error writing to");
1824                                                 goto clean_up;
1825                                         }
1826                                         /*
1827                                         ** finished escape sequence; next may
1828                                         ** be literal or a new escape sequence
1829                                         */
1830                                         quoted = 0;
1831                                 }
1832                                 /* on to next byte */
1833                                 continue;
1834                         }
1835
1836                         /* not in an escape sequence */
1837                         if (*cp == '=') {
1838                                 /*
1839                                 ** starting an escape sequence,
1840                                 ** or invalid '='?
1841                                 */
1842                                 if (cp + 1 < ep && cp[1] == '\n') {
1843                                         /* "=\n" soft line break, eat the \n */
1844                                         cp++;
1845                                         continue;
1846                                 }
1847                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1848                                         /*
1849                                         ** We don't have 2 bytes left,
1850                                         ** so this is an invalid escape
1851                                         ** sequence; just show the raw bytes
1852                                         ** (below).
1853                                         */
1854                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1855                                         /*
1856                                         ** Next 2 bytes are hex digits,
1857                                         ** making this a valid escape
1858                                         ** sequence; let's decode it (above).
1859                                         */
1860                                         quoted = 1;
1861                                         continue;
1862                                 } else {
1863                                         /*
1864                                         ** One or both of the next 2 is
1865                                         ** out of range, making this an
1866                                         ** invalid escape sequence; just
1867                                         ** show the raw bytes (below).
1868                                         */
1869                                 }
1870                         }
1871
1872                         /* Just show the raw byte. */
1873                         putc(*cp, ce->ce_fp);
1874                         if (ferror(ce->ce_fp)) {
1875                                 content_error(ce->ce_file, ct,
1876                                                 "error writing to");
1877                                 goto clean_up;
1878                         }
1879                 }
1880         }
1881         if (quoted) {
1882                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1883                 goto clean_up;
1884         }
1885
1886         fseek(ct->c_fp, 0L, SEEK_SET);
1887
1888         if (fflush(ce->ce_fp)) {
1889                 content_error(ce->ce_file, ct, "error writing to");
1890                 goto clean_up;
1891         }
1892
1893         fseek(ce->ce_fp, 0L, SEEK_SET);
1894
1895 ready_to_go:
1896         *file = ce->ce_file;
1897         if (own_ct_fp) {
1898                 fclose(ct->c_fp);
1899                 ct->c_fp = NULL;
1900         }
1901         return fileno(ce->ce_fp);
1902
1903 clean_up:
1904         free_encoding(ct, 0);
1905         if (own_ct_fp) {
1906                 fclose(ct->c_fp);
1907                 ct->c_fp = NULL;
1908         }
1909         return NOTOK;
1910 }
1911
1912
1913 /*
1914 ** 7BIT
1915 */
1916
1917 static int
1918 Init7Bit(CT ct)
1919 {
1920         if (init_encoding(ct, open7Bit) == NOTOK)
1921                 return NOTOK;
1922
1923         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1924         return OK;
1925 }
1926
1927
1928 int
1929 open7Bit(CT ct, char **file)
1930 {
1931         int cc, fd, len, own_ct_fp = 0;
1932         char buffer[BUFSIZ];
1933         /* sbeck -- handle suffixes */
1934         char *cp;
1935         CI ci;
1936         CE ce;
1937
1938         ce = ct->c_cefile;
1939         if (ce->ce_fp) {
1940                 fseek(ce->ce_fp, 0L, SEEK_SET);
1941                 goto ready_to_go;
1942         }
1943
1944         if (ce->ce_file) {
1945                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1946                         content_error(ce->ce_file, ct,
1947                                         "unable to fopen for reading");
1948                         return NOTOK;
1949                 }
1950                 goto ready_to_go;
1951         }
1952
1953         if (*file == NULL) {
1954                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1955                 ce->ce_unlink = 1;
1956         } else {
1957                 ce->ce_file = getcpy(*file);
1958                 ce->ce_unlink = 0;
1959         }
1960
1961         /* sbeck@cise.ufl.edu -- handle suffixes */
1962         ci = &ct->c_ctinfo;
1963         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1964                         invo_name, ci->ci_type, ci->ci_subtype);
1965         cp = context_find(buffer);
1966         if (cp == NULL || *cp == '\0') {
1967                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1968                                 ci->ci_type);
1969                 cp = context_find(buffer);
1970         }
1971         if (cp != NULL && *cp != '\0') {
1972                 if (ce->ce_unlink) {
1973                         /*
1974                         ** Temporary file already exists, so we rename to
1975                         ** version with extension.
1976                         */
1977                         char *file_org = strdup(ce->ce_file);
1978                         ce->ce_file = add(cp, ce->ce_file);
1979                         if (rename(file_org, ce->ce_file)) {
1980                                 adios(ce->ce_file, "unable to rename %s to ",
1981                                                 file_org);
1982                         }
1983                         free(file_org);
1984
1985                 } else {
1986                         ce->ce_file = add(cp, ce->ce_file);
1987                 }
1988         }
1989
1990         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1991                 content_error(ce->ce_file, ct,
1992                                 "unable to fopen for reading/writing");
1993                 return NOTOK;
1994         }
1995
1996         if (ct->c_type == CT_MULTIPART) {
1997                 char **ap, **ep;
1998                 CI ci = &ct->c_ctinfo;
1999
2000                 len = 0;
2001                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2002                                 ci->ci_subtype);
2003                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2004                                 strlen(ci->ci_subtype);
2005                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2006                         putc(';', ce->ce_fp);
2007                         len++;
2008
2009                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2010                                         *ap, *ep);
2011
2012                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2013                                 fputs("\n\t", ce->ce_fp);
2014                                 len = 8;
2015                         } else {
2016                                 putc(' ', ce->ce_fp);
2017                                 len++;
2018                         }
2019                         fprintf(ce->ce_fp, "%s", buffer);
2020                         len += cc;
2021                 }
2022
2023                 if (ci->ci_comment) {
2024                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2025                                                 >= CPERLIN) {
2026                                 fputs("\n\t", ce->ce_fp);
2027                                 len = 8;
2028                         } else {
2029                                 putc(' ', ce->ce_fp);
2030                                 len++;
2031                         }
2032                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2033                         len += cc;
2034                 }
2035                 fprintf(ce->ce_fp, "\n");
2036                 if (ct->c_id)
2037                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2038                 if (ct->c_descr)
2039                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2040                 if (ct->c_dispo)
2041                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2042                 fprintf(ce->ce_fp, "\n");
2043         }
2044
2045         if ((len = ct->c_end - ct->c_begin) < 0)
2046                 adios(NULL, "internal error(3)");
2047
2048         if (!ct->c_fp) {
2049                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2050                         content_error(ct->c_file, ct,
2051                                         "unable to open for reading");
2052                         return NOTOK;
2053                 }
2054                 own_ct_fp = 1;
2055         }
2056
2057         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2058         while (len > 0)
2059                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2060                 case NOTOK:
2061                         content_error(ct->c_file, ct, "error reading from");
2062                         goto clean_up;
2063
2064                 case OK:
2065                         content_error(NULL, ct, "premature eof");
2066                         goto clean_up;
2067
2068                 default:
2069                         if (cc > len)
2070                                 cc = len;
2071                         len -= cc;
2072
2073                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2074                         if (ferror(ce->ce_fp)) {
2075                                 content_error(ce->ce_file, ct,
2076                                                 "error writing to");
2077                                 goto clean_up;
2078                         }
2079                 }
2080
2081         fseek(ct->c_fp, 0L, SEEK_SET);
2082
2083         if (fflush(ce->ce_fp)) {
2084                 content_error(ce->ce_file, ct, "error writing to");
2085                 goto clean_up;
2086         }
2087
2088         fseek(ce->ce_fp, 0L, SEEK_SET);
2089
2090 ready_to_go:
2091         *file = ce->ce_file;
2092         if (own_ct_fp) {
2093                 fclose(ct->c_fp);
2094                 ct->c_fp = NULL;
2095         }
2096         return fileno(ce->ce_fp);
2097
2098 clean_up:
2099         free_encoding(ct, 0);
2100         if (own_ct_fp) {
2101                 fclose(ct->c_fp);
2102                 ct->c_fp = NULL;
2103         }
2104         return NOTOK;
2105 }