Fixed the error message in lockit() when it fails to create tmp file.
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <h/signals.h>
12 #include <errno.h>
13 #include <signal.h>
14 #include <h/tws.h>
15 #include <h/mime.h>
16 #include <h/mhparse.h>
17 #include <h/utils.h>
18
19 extern int debugsw;
20
21 extern int endian;  /* mhmisc.c */
22
23 extern pid_t xpid;  /* mhshowsbr.c  */
24
25 /*
26 ** Directory to place temp files.  This must
27 ** be set before these routines are called.
28 */
29 char *tmp;
30
31 /*
32 ** Structures for TEXT messages
33 */
34 struct k2v SubText[] = {
35         { "plain", TEXT_PLAIN },
36         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
37         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
38         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
39 };
40
41 struct k2v Charset[] = {
42         { "us-ascii",   CHARSET_USASCII },
43         { "iso-8859-1", CHARSET_LATIN },
44         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
45 };
46
47 /*
48 ** Structures for MULTIPART messages
49 */
50 struct k2v SubMultiPart[] = {
51         { "mixed",       MULTI_MIXED },
52         { "alternative", MULTI_ALTERNATE },
53         { "digest",      MULTI_DIGEST },
54         { "parallel",    MULTI_PARALLEL },
55         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
56 };
57
58 /*
59 ** Structures for MESSAGE messages
60 */
61 struct k2v SubMessage[] = {
62         { "rfc822",        MESSAGE_RFC822 },
63         { "partial",       MESSAGE_PARTIAL },
64         { "external-body", MESSAGE_EXTERNAL },
65         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
66 };
67
68 /*
69 ** Structure for APPLICATION messages
70 */
71 struct k2v SubApplication[] = {
72         { "octet-stream", APPLICATION_OCTETS },
73         { "postscript",   APPLICATION_POSTSCRIPT },
74         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
75 };
76
77
78 /* mhmisc.c */
79 int part_ok(CT, int);
80 int type_ok(CT, int);
81 int make_intermediates(char *);
82 void content_error(char *, CT, char *, ...);
83
84 /* mhfree.c */
85 void free_content(CT);
86 void free_encoding(CT, int);
87
88 /*
89 ** static prototypes
90 */
91 static CT get_content(FILE *, char *, int);
92 static int get_comment(CT, unsigned char **, int);
93
94 static int InitGeneric(CT);
95 static int InitText(CT);
96 static int InitMultiPart(CT);
97 static void reverse_parts(CT);
98 static int InitMessage(CT);
99 static int InitApplication(CT);
100 static int init_encoding(CT, OpenCEFunc);
101 static unsigned long size_encoding(CT);
102 static int InitBase64(CT);
103 static int openBase64(CT, char **);
104 static int InitQuoted(CT);
105 static int openQuoted(CT, char **);
106 static int Init7Bit(CT);
107
108 struct str2init str2cts[] = {
109         { "application", CT_APPLICATION, InitApplication },
110         { "audio",       CT_AUDIO,       InitGeneric },
111         { "image",       CT_IMAGE,       InitGeneric },
112         { "message",     CT_MESSAGE,     InitMessage },
113         { "multipart",   CT_MULTIPART,   InitMultiPart },
114         { "text",        CT_TEXT,        InitText },
115         { "video",       CT_VIDEO,       InitGeneric },
116         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
117         { NULL,          CT_UNKNOWN,     NULL },
118 };
119
120 struct str2init str2ces[] = {
121         { "base64",           CE_BASE64,    InitBase64 },
122         { "quoted-printable", CE_QUOTED,    InitQuoted },
123         { "8bit",             CE_8BIT,      Init7Bit },
124         { "7bit",             CE_7BIT,      Init7Bit },
125         { "binary",           CE_BINARY,    Init7Bit },
126         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
127         { NULL,               CE_UNKNOWN,    NULL },
128 };
129
130
131 int
132 pidcheck(int status)
133 {
134         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
135                 return status;
136
137         fflush(stdout);
138         fflush(stderr);
139         exit(1);
140         return 1;
141 }
142
143
144 /*
145 ** Main entry point for parsing a MIME message or file.
146 ** It returns the Content structure for the top level
147 ** entity in the file.
148 */
149 CT
150 parse_mime(char *file)
151 {
152         int is_stdin;
153         char buffer[BUFSIZ];
154         FILE *fp;
155         CT ct;
156
157         /*
158         ** Check if file is actually standard input
159         */
160         if ((is_stdin = (strcmp(file, "-")==0))) {
161                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
162                 if (tfile == NULL) {
163                         advise("mhparse", "unable to create temporary file");
164                         return NULL;
165                 }
166                 file = getcpy(tfile);
167                 chmod(file, 0600);
168
169                 while (fgets(buffer, sizeof(buffer), stdin))
170                         fputs(buffer, fp);
171                 fflush(fp);
172
173                 if (ferror(stdin)) {
174                         unlink(file);
175                         advise("stdin", "error reading");
176                         return NULL;
177                 }
178                 if (ferror(fp)) {
179                         unlink(file);
180                         advise(file, "error writing");
181                         return NULL;
182                 }
183                 fseek(fp, 0L, SEEK_SET);
184         } else if ((fp = fopen(file, "r")) == NULL) {
185                 advise(file, "unable to read");
186                 return NULL;
187         }
188
189         if (!(ct = get_content(fp, file, 1))) {
190                 if (is_stdin)
191                         unlink(file);
192                 advise(NULL, "unable to decode %s", file);
193                 return NULL;
194         }
195
196         if (is_stdin)
197                 ct->c_unlink = 1;  /* temp file to remove */
198
199         ct->c_fp = NULL;
200
201         if (ct->c_end == 0L) {
202                 fseek(fp, 0L, SEEK_END);
203                 ct->c_end = ftell(fp);
204         }
205
206         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
207                 fclose(fp);
208                 free_content(ct);
209                 return NULL;
210         }
211
212         fclose(fp);
213         return ct;
214 }
215
216
217 /*
218 ** Main routine for reading/parsing the headers
219 ** of a message content.
220 **
221 ** toplevel =  1   # we are at the top level of the message
222 ** toplevel =  0   # we are inside message type or multipart type
223 **                 # other than multipart/digest
224 ** toplevel = -1   # we are inside multipart/digest
225 ** NB: on failure we will fclose(in)!
226 */
227
228 static CT
229 get_content(FILE *in, char *file, int toplevel)
230 {
231         int compnum, state;
232         char buf[BUFSIZ], name[NAMESZ];
233         char *np, *vp;
234         CT ct;
235         HF hp;
236
237         /* allocate the content structure */
238         if (!(ct = (CT) calloc(1, sizeof(*ct))))
239                 adios(NULL, "out of memory");
240
241         ct->c_fp = in;
242         ct->c_file = getcpy(file);
243         ct->c_begin = ftell(ct->c_fp) + 1;
244
245         /*
246         ** Parse the header fields for this
247         ** content into a linked list.
248         */
249         for (compnum = 1, state = FLD;;) {
250                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
251                 case FLD:
252                 case FLDPLUS:
253                 case FLDEOF:
254                         compnum++;
255
256                         /* get copies of the buffers */
257                         np = getcpy(name);
258                         vp = getcpy(buf);
259
260                         /* if necessary, get rest of field */
261                         while (state == FLDPLUS) {
262                                 state = m_getfld(state, name, buf,
263                                                 sizeof(buf), in);
264                                 vp = add(buf, vp);  /* add to previous value */
265                         }
266
267                         /* Now add the header data to the list */
268                         add_header(ct, np, vp);
269
270                         /* continue, if this isn't the last header field */
271                         if (state != FLDEOF) {
272                                 ct->c_begin = ftell(in) + 1;
273                                 continue;
274                         }
275                         /* else fall... */
276
277                 case BODY:
278                 case BODYEOF:
279                         ct->c_begin = ftell(in) - strlen(buf);
280                         break;
281
282                 case FILEEOF:
283                         ct->c_begin = ftell(in);
284                         break;
285
286                 case LENERR:
287                 case FMTERR:
288                         adios(NULL, "message format error in component #%d",
289                                         compnum);
290
291                 default:
292                         adios(NULL, "getfld() returned %d", state);
293                 }
294
295                 /* break out of the loop */
296                 break;
297         }
298
299         /*
300         ** Read the content headers.  We will parse the
301         ** MIME related header fields into their various
302         ** structures and set internal flags related to
303         ** content type/subtype, etc.
304         */
305
306         hp = ct->c_first_hf;  /* start at first header field */
307         while (hp) {
308                 /* Get MIME-Version field */
309                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
310                         int ucmp;
311                         char c;
312                         unsigned char *cp, *dp;
313
314                         if (ct->c_vrsn) {
315                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
316                                 goto next_header;
317                         }
318                         ct->c_vrsn = getcpy(hp->value);
319
320                         /* Now, cleanup this field */
321                         cp = ct->c_vrsn;
322
323                         while (isspace(*cp))
324                                 cp++;
325                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
326                                 *dp++ = ' ';
327                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
328                                 if (!isspace(*dp))
329                                         break;
330                         *++dp = '\0';
331                         if (debugsw)
332                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
333
334                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
335                                 goto out;
336
337                         for (dp = cp; istoken(*dp); dp++)
338                                 continue;
339                         c = *dp;
340                         *dp = '\0';
341                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
342                         *dp = c;
343                         if (!ucmp) {
344                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
345                         }
346
347                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
348                         /* Get Content-Type field */
349                         struct str2init *s2i;
350                         CI ci = &ct->c_ctinfo;
351
352                         /* Check if we've already seen a Content-Type header */
353                         if (ct->c_ctline) {
354                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
355                                 goto next_header;
356                         }
357
358                         /* Parse the Content-Type field */
359                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
360                                 goto out;
361
362                         /*
363                         ** Set the Init function and the internal
364                         ** flag for this content type.
365                         */
366                         for (s2i = str2cts; s2i->si_key; s2i++)
367                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
368                                         break;
369                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
370                                 s2i++;
371                         ct->c_type = s2i->si_val;
372                         ct->c_ctinitfnx = s2i->si_init;
373
374                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
375                         /* Get Content-Transfer-Encoding field */
376                         char c;
377                         unsigned char *cp, *dp;
378                         struct str2init *s2i;
379
380                         /*
381                         ** Check if we've already seen the
382                         ** Content-Transfer-Encoding field
383                         */
384                         if (ct->c_celine) {
385                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
386                                 goto next_header;
387                         }
388
389                         /* get copy of this field */
390                         ct->c_celine = cp = getcpy(hp->value);
391
392                         while (isspace(*cp))
393                                 cp++;
394                         for (dp = cp; istoken(*dp); dp++)
395                                 continue;
396                         c = *dp;
397                         *dp = '\0';
398
399                         /*
400                         ** Find the internal flag and Init function
401                         ** for this transfer encoding.
402                         */
403                         for (s2i = str2ces; s2i->si_key; s2i++)
404                                 if (!mh_strcasecmp(cp, s2i->si_key))
405                                         break;
406                         if (!s2i->si_key && !uprf(cp, "X-"))
407                                 s2i++;
408                         *dp = c;
409                         ct->c_encoding = s2i->si_val;
410
411                         /* Call the Init function for this encoding */
412                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
413                                 goto out;
414
415                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
416                         /* Get Content-ID field */
417                         ct->c_id = add(hp->value, ct->c_id);
418
419                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
420                         /* Get Content-Description field */
421                         ct->c_descr = add(hp->value, ct->c_descr);
422
423                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
424                         /* Get Content-Disposition field */
425                         ct->c_dispo = add(hp->value, ct->c_dispo);
426                 }
427
428 next_header:
429                 hp = hp->next;  /* next header field */
430         }
431
432         /*
433         ** Check if we saw a Content-Type field.
434         ** If not, then assign a default value for
435         ** it, and the Init function.
436         */
437         if (!ct->c_ctline) {
438                 /*
439                 ** If we are inside a multipart/digest message,
440                 ** so default type is message/rfc822
441                 */
442                 if (toplevel < 0) {
443                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
444                                 goto out;
445                         ct->c_type = CT_MESSAGE;
446                         ct->c_ctinitfnx = InitMessage;
447                 } else {
448                         /*
449                         ** Else default type is text/plain
450                         */
451                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
452                                 goto out;
453                         ct->c_type = CT_TEXT;
454                         ct->c_ctinitfnx = InitText;
455                 }
456         }
457
458         /* Use default Transfer-Encoding, if necessary */
459         if (!ct->c_celine) {
460                 ct->c_encoding = CE_7BIT;
461                 Init7Bit(ct);
462         }
463
464         return ct;
465
466 out:
467         free_content(ct);
468         return NULL;
469 }
470
471
472 /*
473 ** small routine to add header field to list
474 */
475
476 int
477 add_header(CT ct, char *name, char *value)
478 {
479         HF hp;
480
481         /* allocate header field structure */
482         hp = mh_xmalloc(sizeof(*hp));
483
484         /* link data into header structure */
485         hp->name = name;
486         hp->value = value;
487         hp->next = NULL;
488
489         /* link header structure into the list */
490         if (ct->c_first_hf == NULL) {
491                 ct->c_first_hf = hp;  /* this is the first */
492                 ct->c_last_hf = hp;
493         } else {
494                 ct->c_last_hf->next = hp;  /* add it to the end */
495                 ct->c_last_hf = hp;
496         }
497
498         return 0;
499 }
500
501
502 /*
503 ** Make sure that buf contains at least one appearance of name,
504 ** followed by =.  If not, insert both name and value, just after
505 ** first semicolon, if any.  Note that name should not contain a
506 ** trailing =.  And quotes will be added around the value.  Typical
507 ** usage:  make sure that a Content-Disposition header contains
508 ** filename="foo".  If it doesn't and value does, use value from
509 ** that.
510 */
511 static char *
512 incl_name_value(unsigned char *buf, char *name, char *value) {
513         char *newbuf = buf;
514
515         /* Assume that name is non-null. */
516         if (buf && value) {
517                 char *name_plus_equal = concat(name, "=", NULL);
518
519                 if (!strstr(buf, name_plus_equal)) {
520                         char *insertion;
521                         unsigned char *cp;
522                         char *prefix, *suffix;
523
524                         /* Trim trailing space, esp. newline. */
525                         for (cp = &buf[strlen(buf) - 1];
526                                          cp >= buf && isspace(*cp); --cp) {
527                                 *cp = '\0';
528                         }
529
530                         insertion = concat("; ", name, "=", "\"", value, "\"",
531                                         NULL);
532
533                         /*
534                         ** Insert at first semicolon, if any.
535                         ** If none, append to end.
536                         */
537                         prefix = getcpy(buf);
538                         if ((cp = strchr(prefix, ';'))) {
539                                 suffix = concat(cp, NULL);
540                                 *cp = '\0';
541                                 newbuf = concat(prefix, insertion, suffix,
542                                                 "\n", NULL);
543                                 free(suffix);
544                         } else {
545                                 /* Append to end. */
546                                 newbuf = concat(buf, insertion, "\n", NULL);
547                         }
548
549                         free(prefix);
550                         free(insertion);
551                         free(buf);
552                 }
553
554                 free(name_plus_equal);
555         }
556
557         return newbuf;
558 }
559
560 /*
561 ** Extract just name_suffix="foo", if any, from value.  If there isn't
562 ** one, return the entire value.  Note that, for example, a name_suffix
563 ** of name will match filename="foo", and return foo.
564 */
565 static char *
566 extract_name_value(char *name_suffix, char *value) {
567         char *extracted_name_value = value;
568         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
569         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
570         char *cp;
571
572         free(name_suffix_plus_quote);
573         if (name_suffix_equals) {
574                 char *name_suffix_begin;
575
576                 /* Find first \". */
577                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
578                         ;
579                 name_suffix_begin = ++cp;
580                 /* Find second \". */
581                 for (; *cp != '"'; ++cp)
582                         ;
583
584                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
585                 memcpy(extracted_name_value, name_suffix_begin,
586                                 cp - name_suffix_begin);
587                 extracted_name_value[cp - name_suffix_begin] = '\0';
588         }
589
590         return extracted_name_value;
591 }
592
593 /*
594 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
595 ** directives.  Fills in the information of the CTinfo structure.
596 */
597 int
598 get_ctinfo(unsigned char *cp, CT ct, int magic)
599 {
600         int i;
601         unsigned char *dp;
602         char **ap, **ep;
603         char c;
604         CI ci;
605
606         ci = &ct->c_ctinfo;
607         i = strlen(invo_name) + 2;
608
609         /* store copy of Content-Type line */
610         cp = ct->c_ctline = getcpy(cp);
611
612         while (isspace(*cp))  /* trim leading spaces */
613                 cp++;
614
615         /* change newlines to spaces */
616         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
617                 *dp++ = ' ';
618
619         /* trim trailing spaces */
620         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
621                 if (!isspace(*dp))
622                         break;
623         *++dp = '\0';
624
625         if (debugsw)
626                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
627
628         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
629                 return NOTOK;
630
631         for (dp = cp; istoken(*dp); dp++)
632                 continue;
633         c = *dp, *dp = '\0';
634         ci->ci_type = getcpy(cp);  /* store content type */
635         *dp = c, cp = dp;
636
637         if (!*ci->ci_type) {
638                 advise(NULL, "invalid %s: field in message %s (empty type)",
639                                 TYPE_FIELD, ct->c_file);
640                 return NOTOK;
641         }
642
643         /* down case the content type string */
644         for (dp = ci->ci_type; *dp; dp++)
645                 if (isalpha(*dp) && isupper(*dp))
646                         *dp = tolower(*dp);
647
648         while (isspace(*cp))
649                 cp++;
650
651         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
652                 return NOTOK;
653
654         if (*cp != '/') {
655                 if (!magic)
656                         ci->ci_subtype = getcpy("");
657                 goto magic_skip;
658         }
659
660         cp++;
661         while (isspace(*cp))
662                 cp++;
663
664         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
665                 return NOTOK;
666
667         for (dp = cp; istoken(*dp); dp++)
668                 continue;
669         c = *dp, *dp = '\0';
670         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
671         *dp = c, cp = dp;
672
673         if (!*ci->ci_subtype) {
674                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
675                 return NOTOK;
676         }
677
678         /* down case the content subtype string */
679         for (dp = ci->ci_subtype; *dp; dp++)
680                 if (isalpha(*dp) && isupper(*dp))
681                         *dp = tolower(*dp);
682
683 magic_skip:
684         while (isspace(*cp))
685                 cp++;
686
687         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
688                 return NOTOK;
689
690         /*
691         ** Parse attribute/value pairs given with Content-Type
692         */
693         ep = (ap = ci->ci_attrs) + NPARMS;
694         while (*cp == ';') {
695                 char *vp;
696                 unsigned char *up;
697
698                 if (ap >= ep) {
699                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
700                         return NOTOK;
701                 }
702
703                 cp++;
704                 while (isspace(*cp))
705                         cp++;
706
707                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
708                         return NOTOK;
709
710                 if (*cp == 0) {
711                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
712                         return OK;
713                 }
714
715                 /* down case the attribute name */
716                 for (dp = cp; istoken(*dp); dp++)
717                         if (isalpha(*dp) && isupper(*dp))
718                                 *dp = tolower(*dp);
719
720                 for (up = dp; isspace(*dp);)
721                         dp++;
722                 if (dp == cp || *dp != '=') {
723                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
724                         return NOTOK;
725                 }
726
727                 vp = (*ap = getcpy(cp)) + (up - cp);
728                 *vp = '\0';
729                 for (dp++; isspace(*dp);)
730                         dp++;
731
732                 /* now add the attribute value */
733                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
734
735                 if (*dp == '"') {
736                         for (cp = ++dp, dp = vp;;) {
737                                 switch (c = *cp++) {
738                                 case '\0':
739 bad_quote:
740                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
741                                         return NOTOK;
742
743                                 case '\\':
744                                         *dp++ = c;
745                                         if ((c = *cp++) == '\0')
746                                                 goto bad_quote;
747                                         /* else fall... */
748
749                                 default:
750                                         *dp++ = c;
751                                         continue;
752
753                                 case '"':
754                                         *dp = '\0';
755                                         break;
756                                 }
757                                 break;
758                         }
759                 } else {
760                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
761                                 continue;
762                         *dp = '\0';
763                 }
764                 if (!*vp) {
765                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
766                         return NOTOK;
767                 }
768                 ap++;
769
770                 while (isspace(*cp))
771                         cp++;
772
773                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
774                         return NOTOK;
775         }
776
777         /*
778         ** Get any <Content-Id> given in buffer
779         */
780         if (magic && *cp == '<') {
781                 if (ct->c_id) {
782                         free(ct->c_id);
783                         ct->c_id = NULL;
784                 }
785                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
786                         advise(NULL, "invalid ID in message %s", ct->c_file);
787                         return NOTOK;
788                 }
789                 c = *dp;
790                 *dp = '\0';
791                 if (*ct->c_id)
792                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
793                 else
794                         ct->c_id = NULL;
795                 *dp++ = c;
796                 cp = dp;
797
798                 while (isspace(*cp))
799                         cp++;
800         }
801
802         /*
803         ** Get any [Content-Description] given in buffer.
804         */
805         if (magic && *cp == '[') {
806                 ct->c_descr = ++cp;
807                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
808                         if (*dp == ']')
809                                 break;
810                 if (dp < cp) {
811                         advise(NULL, "invalid description in message %s",
812                                         ct->c_file);
813                         ct->c_descr = NULL;
814                         return NOTOK;
815                 }
816
817                 c = *dp;
818                 *dp = '\0';
819                 if (*ct->c_descr)
820                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
821                 else
822                         ct->c_descr = NULL;
823                 *dp++ = c;
824                 cp = dp;
825
826                 while (isspace(*cp))
827                         cp++;
828         }
829
830         /*
831         ** Get any {Content-Disposition} given in buffer.
832         */
833         if (magic && *cp == '{') {
834                 ct->c_dispo = ++cp;
835                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
836                         if (*dp == '}')
837                                 break;
838                 if (dp < cp) {
839                         advise(NULL, "invalid disposition in message %s",
840                                         ct->c_file);
841                         ct->c_dispo = NULL;
842                         return NOTOK;
843                 }
844
845                 c = *dp;
846                 *dp = '\0';
847                 if (*ct->c_dispo)
848                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
849                 else
850                         ct->c_dispo = NULL;
851                 *dp++ = c;
852                 cp = dp;
853
854                 while (isspace(*cp))
855                         cp++;
856         }
857
858         /*
859         ** Check if anything is left over
860         */
861         if (*cp) {
862                 if (magic) {
863                         ci->ci_magic = getcpy(cp);
864
865                         /*
866                         ** If there is a Content-Disposition header and
867                         ** it doesn't have a *filename=, extract it from
868                         ** the magic contents.  The mhbasename call skips
869                         ** any leading directory components.
870                         */
871                         if (ct->c_dispo)
872                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
873                         } else
874                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
875         }
876
877         return OK;
878 }
879
880
881 static int
882 get_comment(CT ct, unsigned char **ap, int istype)
883 {
884         int i;
885         char *bp;
886         unsigned char *cp;
887         char c, buffer[BUFSIZ], *dp;
888         CI ci;
889
890         ci = &ct->c_ctinfo;
891         cp = *ap;
892         bp = buffer;
893         cp++;
894
895         for (i = 0;;) {
896                 switch (c = *cp++) {
897                 case '\0':
898 invalid:
899                 advise(NULL, "invalid comment in message %s's %s: field",
900                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
901                 return NOTOK;
902
903                 case '\\':
904                         *bp++ = c;
905                         if ((c = *cp++) == '\0')
906                                 goto invalid;
907                         *bp++ = c;
908                         continue;
909
910                 case '(':
911                         i++;
912                         /* and fall... */
913                 default:
914                         *bp++ = c;
915                         continue;
916
917                 case ')':
918                         if (--i < 0)
919                                 break;
920                         *bp++ = c;
921                         continue;
922                 }
923                 break;
924         }
925         *bp = '\0';
926
927         if (istype) {
928                 if ((dp = ci->ci_comment)) {
929                         ci->ci_comment = concat(dp, " ", buffer, NULL);
930                         free(dp);
931                 } else {
932                         ci->ci_comment = getcpy(buffer);
933                 }
934         }
935
936         while (isspace(*cp))
937                 cp++;
938
939         *ap = cp;
940         return OK;
941 }
942
943
944 /*
945 ** CONTENTS
946 **
947 ** Handles content types audio, image, and video.
948 ** There's not much to do right here.
949 */
950
951 static int
952 InitGeneric(CT ct)
953 {
954         return OK;  /* not much to do here */
955 }
956
957
958 /*
959 ** TEXT
960 */
961
962 static int
963 InitText(CT ct)
964 {
965         char **ap, **ep;
966         struct k2v *kv;
967         struct text *t;
968         CI ci = &ct->c_ctinfo;
969
970         /* check for missing subtype */
971         if (!*ci->ci_subtype)
972                 ci->ci_subtype = add("plain", ci->ci_subtype);
973
974         /* match subtype */
975         for (kv = SubText; kv->kv_key; kv++)
976                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
977                         break;
978         ct->c_subtype = kv->kv_value;
979
980         /* allocate text character set structure */
981         if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
982                 adios(NULL, "out of memory");
983         ct->c_ctparams = (void *) t;
984
985         /* scan for charset parameter */
986         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
987                 if (!mh_strcasecmp(*ap, "charset"))
988                         break;
989
990         /* check if content specified a character set */
991         if (*ap) {
992                 /* store its name */
993                 ct->c_charset = getcpy(norm_charmap(*ep));
994                 /* match character set or set to CHARSET_UNKNOWN */
995                 for (kv = Charset; kv->kv_key; kv++) {
996                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
997                                 break;
998                         }
999                 }
1000                 t->tx_charset = kv->kv_value;
1001         } else {
1002                 t->tx_charset = CHARSET_UNSPECIFIED;
1003         }
1004
1005         return OK;
1006 }
1007
1008
1009 /*
1010 ** MULTIPART
1011 */
1012
1013 static int
1014 InitMultiPart(CT ct)
1015 {
1016         int inout;
1017         long last, pos;
1018         unsigned char *cp, *dp;
1019         char **ap, **ep;
1020         char *bp, buffer[BUFSIZ];
1021         struct multipart *m;
1022         struct k2v *kv;
1023         struct part *part, **next;
1024         CI ci = &ct->c_ctinfo;
1025         CT p;
1026         FILE *fp;
1027
1028         /*
1029         ** The encoding for multipart messages must be either
1030         ** 7bit, 8bit, or binary (per RFC2045).
1031         */
1032         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1033                 && ct->c_encoding != CE_BINARY) {
1034                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1035                 return NOTOK;
1036         }
1037
1038         /* match subtype */
1039         for (kv = SubMultiPart; kv->kv_key; kv++)
1040                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1041                         break;
1042         ct->c_subtype = kv->kv_value;
1043
1044         /*
1045         ** Check for "boundary" parameter, which is
1046         ** required for multipart messages.
1047         */
1048         bp = 0;
1049         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1050                 if (!mh_strcasecmp(*ap, "boundary")) {
1051                         bp = *ep;
1052                         break;
1053                 }
1054         }
1055
1056         /* complain if boundary parameter is missing */
1057         if (!*ap) {
1058                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1059                 return NOTOK;
1060         }
1061
1062         /* allocate primary structure for multipart info */
1063         if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1064                 adios(NULL, "out of memory");
1065         ct->c_ctparams = (void *) m;
1066
1067         /* check if boundary parameter contains only whitespace characters */
1068         for (cp = bp; isspace(*cp); cp++)
1069                 continue;
1070         if (!*cp) {
1071                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1072                 return NOTOK;
1073         }
1074
1075         /* remove trailing whitespace from boundary parameter */
1076         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1077                 if (!isspace(*dp))
1078                         break;
1079         *++dp = '\0';
1080
1081         /* record boundary separators */
1082         m->mp_start = concat(bp, "\n", NULL);
1083         m->mp_stop = concat(bp, "--\n", NULL);
1084
1085         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1086                 advise(ct->c_file, "unable to open for reading");
1087                 return NOTOK;
1088         }
1089
1090         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1091         last = ct->c_end;
1092         next = &m->mp_parts;
1093         part = NULL;
1094         inout = 1;
1095
1096         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1097                 if (pos > last)
1098                         break;
1099
1100                 pos += strlen(buffer);
1101                 if (buffer[0] != '-' || buffer[1] != '-')
1102                         continue;
1103                 if (inout) {
1104                         if (strcmp(buffer + 2, m->mp_start)!=0)
1105                                 continue;
1106 next_part:
1107                         if ((part = (struct part *) calloc(1, sizeof(*part)))
1108                                         == NULL)
1109                                 adios(NULL, "out of memory");
1110                         *next = part;
1111                         next = &part->mp_next;
1112
1113                         if (!(p = get_content(fp, ct->c_file,
1114                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1115                                 ct->c_fp = NULL;
1116                                 return NOTOK;
1117                         }
1118                         p->c_fp = NULL;
1119                         part->mp_part = p;
1120                         pos = p->c_begin;
1121                         fseek(fp, pos, SEEK_SET);
1122                         inout = 0;
1123                 } else {
1124                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1125                                 inout = 1;
1126 end_part:
1127                                 p = part->mp_part;
1128                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1129                                 if (p->c_end < p->c_begin)
1130                                         p->c_begin = p->c_end;
1131                                 if (inout)
1132                                         goto next_part;
1133                                 goto last_part;
1134                         } else {
1135                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1136                                         goto end_part;
1137                         }
1138                 }
1139         }
1140
1141         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1142         if (!inout && part) {
1143                 p = part->mp_part;
1144                 p->c_end = ct->c_end;
1145
1146                 if (p->c_begin >= p->c_end) {
1147                         for (next = &m->mp_parts; *next != part;
1148                                 next = &((*next)->mp_next))
1149                                 continue;
1150                         *next = NULL;
1151                         free_content(p);
1152                         free((char *) part);
1153                 }
1154         }
1155
1156 last_part:
1157         /* reverse the order of the parts for multipart/alternative */
1158         if (ct->c_subtype == MULTI_ALTERNATE)
1159                 reverse_parts(ct);
1160
1161         /*
1162         ** label all subparts with part number, and
1163         ** then initialize the content of the subpart.
1164         */
1165         {
1166                 int partnum;
1167                 char *pp;
1168                 char partnam[BUFSIZ];
1169
1170                 if (ct->c_partno) {
1171                         snprintf(partnam, sizeof(partnam), "%s.",
1172                                         ct->c_partno);
1173                         pp = partnam + strlen(partnam);
1174                 } else {
1175                         pp = partnam;
1176                 }
1177
1178                 for (part = m->mp_parts, partnum = 1; part;
1179                         part = part->mp_next, partnum++) {
1180                         p = part->mp_part;
1181
1182                         sprintf(pp, "%d", partnum);
1183                         p->c_partno = getcpy(partnam);
1184
1185                         /* initialize the content of the subparts */
1186                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1187                                 fclose(ct->c_fp);
1188                                 ct->c_fp = NULL;
1189                                 return NOTOK;
1190                         }
1191                 }
1192         }
1193
1194         fclose(ct->c_fp);
1195         ct->c_fp = NULL;
1196         return OK;
1197 }
1198
1199
1200 /*
1201 ** reverse the order of the parts of a multipart
1202 */
1203
1204 static void
1205 reverse_parts(CT ct)
1206 {
1207         int i;
1208         struct multipart *m;
1209         struct part **base, **bmp, **next, *part;
1210
1211         m = (struct multipart *) ct->c_ctparams;
1212
1213         /* if only one part, just return */
1214         if (!m->mp_parts || !m->mp_parts->mp_next)
1215                 return;
1216
1217         /* count number of parts */
1218         i = 0;
1219         for (part = m->mp_parts; part; part = part->mp_next)
1220                 i++;
1221
1222         /* allocate array of pointers to the parts */
1223         if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1224                 adios(NULL, "out of memory");
1225         bmp = base;
1226
1227         /* point at all the parts */
1228         for (part = m->mp_parts; part; part = part->mp_next)
1229                 *bmp++ = part;
1230         *bmp = NULL;
1231
1232         /* reverse the order of the parts */
1233         next = &m->mp_parts;
1234         for (bmp--; bmp >= base; bmp--) {
1235                 part = *bmp;
1236                 *next = part;
1237                 next = &part->mp_next;
1238         }
1239         *next = NULL;
1240
1241         /* free array of pointers */
1242         free((char *) base);
1243 }
1244
1245
1246 /*
1247 ** MESSAGE
1248 */
1249
1250 static int
1251 InitMessage(CT ct)
1252 {
1253         struct k2v *kv;
1254         CI ci = &ct->c_ctinfo;
1255
1256         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1257                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1258                 return NOTOK;
1259         }
1260
1261         /* check for missing subtype */
1262         if (!*ci->ci_subtype)
1263                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1264
1265         /* match subtype */
1266         for (kv = SubMessage; kv->kv_key; kv++)
1267                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1268                         break;
1269         ct->c_subtype = kv->kv_value;
1270
1271         switch (ct->c_subtype) {
1272         case MESSAGE_RFC822:
1273                 break;
1274
1275         case MESSAGE_PARTIAL:
1276                 {
1277                 char **ap, **ep;
1278                 struct partial *p;
1279
1280                 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1281                         adios(NULL, "out of memory");
1282                 ct->c_ctparams = (void *) p;
1283
1284                 /*
1285                 ** scan for parameters "id", "number",
1286                 ** and "total"
1287                 */
1288                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1289                         if (!mh_strcasecmp(*ap, "id")) {
1290                                 p->pm_partid = getcpy(*ep);
1291                                 continue;
1292                         }
1293                         if (!mh_strcasecmp(*ap, "number")) {
1294                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1295 invalid_param:
1296                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1297                                         return NOTOK;
1298                                 }
1299                                 continue;
1300                         }
1301                         if (!mh_strcasecmp(*ap, "total")) {
1302                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1303                                                 p->pm_maxno < 1)
1304                                         goto invalid_param;
1305                                 continue;
1306                         }
1307                 }
1308
1309                 if (!p->pm_partid || !p->pm_partno
1310                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1311                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1312                         return NOTOK;
1313                 }
1314                 }
1315                 break;
1316
1317         case MESSAGE_EXTERNAL:
1318                 {
1319                 CT p;
1320                 FILE *fp;
1321
1322                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1323                         advise(ct->c_file, "unable to open for reading");
1324                         return NOTOK;
1325                 }
1326
1327                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1328
1329                 if (!(p = get_content(fp, ct->c_file, 0))) {
1330                         ct->c_fp = NULL;
1331                         return NOTOK;
1332                 }
1333
1334                 p->c_fp = NULL;
1335                 p->c_end = p->c_begin;
1336
1337                 fclose(ct->c_fp);
1338                 ct->c_fp = NULL;
1339
1340                 switch (p->c_type) {
1341                 case CT_MULTIPART:
1342                         break;
1343
1344                 case CT_MESSAGE:
1345                         if (p->c_subtype != MESSAGE_RFC822)
1346                                 break;
1347                         /* else fall... */
1348                 default:
1349                         if (p->c_ctinitfnx)
1350                                 (*p->c_ctinitfnx) (p);
1351                         break;
1352                 }
1353                 }
1354                 break;
1355
1356         default:
1357                 break;
1358         }
1359
1360         return OK;
1361 }
1362
1363
1364 /*
1365 ** APPLICATION
1366 */
1367
1368 static int
1369 InitApplication(CT ct)
1370 {
1371         struct k2v *kv;
1372         CI ci = &ct->c_ctinfo;
1373
1374         /* match subtype */
1375         for (kv = SubApplication; kv->kv_key; kv++)
1376                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1377                         break;
1378         ct->c_subtype = kv->kv_value;
1379
1380         return OK;
1381 }
1382
1383
1384 /*
1385 ** TRANSFER ENCODINGS
1386 */
1387
1388 static int
1389 init_encoding(CT ct, OpenCEFunc openfnx)
1390 {
1391         CE ce;
1392
1393         if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1394                 adios(NULL, "out of memory");
1395
1396         ct->c_cefile     = ce;
1397         ct->c_ceopenfnx  = openfnx;
1398         ct->c_ceclosefnx = close_encoding;
1399         ct->c_cesizefnx  = size_encoding;
1400
1401         return OK;
1402 }
1403
1404
1405 void
1406 close_encoding(CT ct)
1407 {
1408         CE ce;
1409
1410         if (!(ce = ct->c_cefile))
1411                 return;
1412
1413         if (ce->ce_fp) {
1414                 fclose(ce->ce_fp);
1415                 ce->ce_fp = NULL;
1416         }
1417 }
1418
1419
1420 static unsigned long
1421 size_encoding(CT ct)
1422 {
1423         int fd;
1424         unsigned long size;
1425         char *file;
1426         CE ce;
1427         struct stat st;
1428
1429         if (!(ce = ct->c_cefile))
1430                 return (ct->c_end - ct->c_begin);
1431
1432         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1433                 return (long) st.st_size;
1434
1435         if (ce->ce_file) {
1436                 if (stat(ce->ce_file, &st) != NOTOK)
1437                         return (long) st.st_size;
1438                 else
1439                         return 0L;
1440         }
1441
1442         if (ct->c_encoding == CE_EXTERNAL)
1443                 return (ct->c_end - ct->c_begin);
1444
1445         file = NULL;
1446         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1447                 return (ct->c_end - ct->c_begin);
1448
1449         if (fstat(fd, &st) != NOTOK)
1450                 size = (long) st.st_size;
1451         else
1452                 size = 0L;
1453
1454         (*ct->c_ceclosefnx) (ct);
1455         return size;
1456 }
1457
1458
1459 /*
1460 ** BASE64
1461 */
1462
1463 static unsigned char b642nib[0x80] = {
1464         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1470         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1471         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1473         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1474         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1475         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1476         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1477         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1478         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1479         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1480 };
1481
1482
1483 static int
1484 InitBase64(CT ct)
1485 {
1486         return init_encoding(ct, openBase64);
1487 }
1488
1489
1490 static int
1491 openBase64(CT ct, char **file)
1492 {
1493         int bitno, cc;
1494         int fd, len, skip, own_ct_fp = 0;
1495         unsigned long bits;
1496         unsigned char value, *b, *b1, *b2, *b3;
1497         unsigned char *cp, *ep;
1498         char buffer[BUFSIZ];
1499         /* sbeck -- handle suffixes */
1500         CI ci;
1501         CE ce;
1502
1503         b  = (unsigned char *) &bits;
1504         b1 = &b[endian > 0 ? 1 : 2];
1505         b2 = &b[endian > 0 ? 2 : 1];
1506         b3 = &b[endian > 0 ? 3 : 0];
1507
1508         ce = ct->c_cefile;
1509         if (ce->ce_fp) {
1510                 fseek(ce->ce_fp, 0L, SEEK_SET);
1511                 goto ready_to_go;
1512         }
1513
1514         if (ce->ce_file) {
1515                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1516                         content_error(ce->ce_file, ct,
1517                                         "unable to fopen for reading");
1518                         return NOTOK;
1519                 }
1520                 goto ready_to_go;
1521         }
1522
1523         if (*file == NULL) {
1524                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1525                 ce->ce_unlink = 1;
1526         } else {
1527                 ce->ce_file = getcpy(*file);
1528                 ce->ce_unlink = 0;
1529         }
1530
1531         /* sbeck@cise.ufl.edu -- handle suffixes */
1532         ci = &ct->c_ctinfo;
1533         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1534                         invo_name, ci->ci_type, ci->ci_subtype);
1535         cp = context_find(buffer);
1536         if (cp == NULL || *cp == '\0') {
1537                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1538                                 ci->ci_type);
1539                 cp = context_find(buffer);
1540         }
1541         if (cp != NULL && *cp != '\0') {
1542                 if (ce->ce_unlink) {
1543                         /*
1544                         ** Temporary file already exists, so we rename to
1545                         ** version with extension.
1546                         */
1547                         char *file_org = strdup(ce->ce_file);
1548                         ce->ce_file = add(cp, ce->ce_file);
1549                         if (rename(file_org, ce->ce_file)) {
1550                                 adios(ce->ce_file, "unable to rename %s to ",
1551                                                 file_org);
1552                         }
1553                         free(file_org);
1554
1555                 } else {
1556                         ce->ce_file = add(cp, ce->ce_file);
1557                 }
1558         }
1559
1560         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1561                 content_error(ce->ce_file, ct,
1562                                 "unable to fopen for reading/writing");
1563                 return NOTOK;
1564         }
1565
1566         if ((len = ct->c_end - ct->c_begin) < 0)
1567                 adios(NULL, "internal error(1)");
1568
1569         if (!ct->c_fp) {
1570                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1571                         content_error(ct->c_file, ct,
1572                                         "unable to open for reading");
1573                         return NOTOK;
1574                 }
1575                 own_ct_fp = 1;
1576         }
1577
1578         bitno = 18;
1579         bits = 0L;
1580         skip = 0;
1581
1582         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1583         while (len > 0) {
1584                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1585                 case NOTOK:
1586                         content_error(ct->c_file, ct, "error reading from");
1587                         goto clean_up;
1588
1589                 case OK:
1590                         content_error(NULL, ct, "premature eof");
1591                         goto clean_up;
1592
1593                 default:
1594                         if (cc > len)
1595                                 cc = len;
1596                         len -= cc;
1597
1598                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1599                                 switch (*cp) {
1600                                 default:
1601                                         if (isspace(*cp))
1602                                                 break;
1603                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1604                                                 if (debugsw) {
1605                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1606                                                 }
1607                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1608                                                 continue;
1609                                         }
1610
1611                                         bits |= value << bitno;
1612 test_end:
1613                                         if ((bitno -= 6) < 0) {
1614                                                 putc((char) *b1, ce->ce_fp);
1615                                                 if (skip < 2) {
1616                                                         putc((char) *b2, ce->ce_fp);
1617                                                         if (skip < 1) {
1618                                                                 putc((char) *b3, ce->ce_fp);
1619                                                         }
1620                                                 }
1621
1622                                                 if (ferror(ce->ce_fp)) {
1623                                                         content_error(ce->ce_file, ct,
1624                                                                                    "error writing to");
1625                                                         goto clean_up;
1626                                                 }
1627                                                 bitno = 18, bits = 0L, skip = 0;
1628                                         }
1629                                         break;
1630
1631                                 case '=':
1632                                         if (++skip > 3)
1633                                                 goto self_delimiting;
1634                                         goto test_end;
1635                                 }
1636                         }
1637                 }
1638         }
1639
1640         if (bitno != 18) {
1641                 if (debugsw)
1642                         fprintf(stderr, "premature ending (bitno %d)\n",
1643                                         bitno);
1644
1645                 content_error(NULL, ct, "invalid BASE64 encoding");
1646                 goto clean_up;
1647         }
1648
1649 self_delimiting:
1650         fseek(ct->c_fp, 0L, SEEK_SET);
1651
1652         if (fflush(ce->ce_fp)) {
1653                 content_error(ce->ce_file, ct, "error writing to");
1654                 goto clean_up;
1655         }
1656
1657         fseek(ce->ce_fp, 0L, SEEK_SET);
1658
1659 ready_to_go:
1660         *file = ce->ce_file;
1661         if (own_ct_fp) {
1662                 fclose(ct->c_fp);
1663                 ct->c_fp = NULL;
1664         }
1665         return fileno(ce->ce_fp);
1666
1667 clean_up:
1668         free_encoding(ct, 0);
1669         if (own_ct_fp) {
1670                 fclose(ct->c_fp);
1671                 ct->c_fp = NULL;
1672         }
1673         return NOTOK;
1674 }
1675
1676
1677 /*
1678 ** QUOTED PRINTABLE
1679 */
1680
1681 static char hex2nib[0x80] = {
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1689         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1698 };
1699
1700
1701 static int
1702 InitQuoted(CT ct)
1703 {
1704         return init_encoding(ct, openQuoted);
1705 }
1706
1707
1708 static int
1709 openQuoted(CT ct, char **file)
1710 {
1711         int cc, len, quoted, own_ct_fp = 0;
1712         unsigned char *cp, *ep;
1713         char buffer[BUFSIZ];
1714         unsigned char mask = 0;
1715         CE ce;
1716         /* sbeck -- handle suffixes */
1717         CI ci;
1718
1719         ce = ct->c_cefile;
1720         if (ce->ce_fp) {
1721                 fseek(ce->ce_fp, 0L, SEEK_SET);
1722                 goto ready_to_go;
1723         }
1724
1725         if (ce->ce_file) {
1726                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1727                         content_error(ce->ce_file, ct,
1728                                         "unable to fopen for reading");
1729                         return NOTOK;
1730                 }
1731                 goto ready_to_go;
1732         }
1733
1734         if (*file == NULL) {
1735                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1736                 ce->ce_unlink = 1;
1737         } else {
1738                 ce->ce_file = getcpy(*file);
1739                 ce->ce_unlink = 0;
1740         }
1741
1742         /* sbeck@cise.ufl.edu -- handle suffixes */
1743         ci = &ct->c_ctinfo;
1744         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1745                         invo_name, ci->ci_type, ci->ci_subtype);
1746         cp = context_find(buffer);
1747         if (cp == NULL || *cp == '\0') {
1748                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1749                                 ci->ci_type);
1750                 cp = context_find(buffer);
1751         }
1752         if (cp != NULL && *cp != '\0') {
1753                 if (ce->ce_unlink) {
1754                         /*
1755                         ** Temporary file already exists, so we rename to
1756                         ** version with extension.
1757                         */
1758                         char *file_org = strdup(ce->ce_file);
1759                         ce->ce_file = add(cp, ce->ce_file);
1760                         if (rename(file_org, ce->ce_file)) {
1761                                 adios(ce->ce_file, "unable to rename %s to ",
1762                                                 file_org);
1763                         }
1764                         free(file_org);
1765
1766                 } else {
1767                         ce->ce_file = add(cp, ce->ce_file);
1768                 }
1769         }
1770
1771         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1772                 content_error(ce->ce_file, ct,
1773                                 "unable to fopen for reading/writing");
1774                 return NOTOK;
1775         }
1776
1777         if ((len = ct->c_end - ct->c_begin) < 0)
1778                 adios(NULL, "internal error(2)");
1779
1780         if (!ct->c_fp) {
1781                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1782                         content_error(ct->c_file, ct,
1783                                         "unable to open for reading");
1784                         return NOTOK;
1785                 }
1786                 own_ct_fp = 1;
1787         }
1788
1789         quoted = 0;
1790
1791         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1792         while (len > 0) {
1793                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1794                         content_error(NULL, ct, "premature eof");
1795                         goto clean_up;
1796                 }
1797
1798                 if ((cc = strlen(buffer)) > len)
1799                         cc = len;
1800                 len -= cc;
1801
1802                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1803                         if (!isspace(*ep))
1804                                 break;
1805                 *++ep = '\n', ep++;
1806
1807                 for (; cp < ep; cp++) {
1808                         if (quoted > 0) {
1809                                 /* in an escape sequence */
1810                                 if (quoted == 1) {
1811                                         /* at byte 1 of an escape sequence */
1812                                         mask = hex2nib[*cp & 0x7f];
1813                                         /* next is byte 2 */
1814                                         quoted = 2;
1815                                 } else {
1816                                         /* at byte 2 of an escape sequence */
1817                                         mask <<= 4;
1818                                         mask |= hex2nib[*cp & 0x7f];
1819                                         putc(mask, ce->ce_fp);
1820                                         if (ferror(ce->ce_fp)) {
1821                                                 content_error(ce->ce_file, ct, "error writing to");
1822                                                 goto clean_up;
1823                                         }
1824                                         /*
1825                                         ** finished escape sequence; next may
1826                                         ** be literal or a new escape sequence
1827                                         */
1828                                         quoted = 0;
1829                                 }
1830                                 /* on to next byte */
1831                                 continue;
1832                         }
1833
1834                         /* not in an escape sequence */
1835                         if (*cp == '=') {
1836                                 /*
1837                                 ** starting an escape sequence,
1838                                 ** or invalid '='?
1839                                 */
1840                                 if (cp + 1 < ep && cp[1] == '\n') {
1841                                         /* "=\n" soft line break, eat the \n */
1842                                         cp++;
1843                                         continue;
1844                                 }
1845                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1846                                         /*
1847                                         ** We don't have 2 bytes left,
1848                                         ** so this is an invalid escape
1849                                         ** sequence; just show the raw bytes
1850                                         ** (below).
1851                                         */
1852                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1853                                         /*
1854                                         ** Next 2 bytes are hex digits,
1855                                         ** making this a valid escape
1856                                         ** sequence; let's decode it (above).
1857                                         */
1858                                         quoted = 1;
1859                                         continue;
1860                                 } else {
1861                                         /*
1862                                         ** One or both of the next 2 is
1863                                         ** out of range, making this an
1864                                         ** invalid escape sequence; just
1865                                         ** show the raw bytes (below).
1866                                         */
1867                                 }
1868                         }
1869
1870                         /* Just show the raw byte. */
1871                         putc(*cp, ce->ce_fp);
1872                         if (ferror(ce->ce_fp)) {
1873                                 content_error(ce->ce_file, ct,
1874                                                 "error writing to");
1875                                 goto clean_up;
1876                         }
1877                 }
1878         }
1879         if (quoted) {
1880                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1881                 goto clean_up;
1882         }
1883
1884         fseek(ct->c_fp, 0L, SEEK_SET);
1885
1886         if (fflush(ce->ce_fp)) {
1887                 content_error(ce->ce_file, ct, "error writing to");
1888                 goto clean_up;
1889         }
1890
1891         fseek(ce->ce_fp, 0L, SEEK_SET);
1892
1893 ready_to_go:
1894         *file = ce->ce_file;
1895         if (own_ct_fp) {
1896                 fclose(ct->c_fp);
1897                 ct->c_fp = NULL;
1898         }
1899         return fileno(ce->ce_fp);
1900
1901 clean_up:
1902         free_encoding(ct, 0);
1903         if (own_ct_fp) {
1904                 fclose(ct->c_fp);
1905                 ct->c_fp = NULL;
1906         }
1907         return NOTOK;
1908 }
1909
1910
1911 /*
1912 ** 7BIT
1913 */
1914
1915 static int
1916 Init7Bit(CT ct)
1917 {
1918         if (init_encoding(ct, open7Bit) == NOTOK)
1919                 return NOTOK;
1920
1921         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1922         return OK;
1923 }
1924
1925
1926 int
1927 open7Bit(CT ct, char **file)
1928 {
1929         int cc, fd, len, own_ct_fp = 0;
1930         char buffer[BUFSIZ];
1931         /* sbeck -- handle suffixes */
1932         char *cp;
1933         CI ci;
1934         CE ce;
1935
1936         ce = ct->c_cefile;
1937         if (ce->ce_fp) {
1938                 fseek(ce->ce_fp, 0L, SEEK_SET);
1939                 goto ready_to_go;
1940         }
1941
1942         if (ce->ce_file) {
1943                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1944                         content_error(ce->ce_file, ct,
1945                                         "unable to fopen for reading");
1946                         return NOTOK;
1947                 }
1948                 goto ready_to_go;
1949         }
1950
1951         if (*file == NULL) {
1952                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1953                 ce->ce_unlink = 1;
1954         } else {
1955                 ce->ce_file = getcpy(*file);
1956                 ce->ce_unlink = 0;
1957         }
1958
1959         /* sbeck@cise.ufl.edu -- handle suffixes */
1960         ci = &ct->c_ctinfo;
1961         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1962                         invo_name, ci->ci_type, ci->ci_subtype);
1963         cp = context_find(buffer);
1964         if (cp == NULL || *cp == '\0') {
1965                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1966                                 ci->ci_type);
1967                 cp = context_find(buffer);
1968         }
1969         if (cp != NULL && *cp != '\0') {
1970                 if (ce->ce_unlink) {
1971                         /*
1972                         ** Temporary file already exists, so we rename to
1973                         ** version with extension.
1974                         */
1975                         char *file_org = strdup(ce->ce_file);
1976                         ce->ce_file = add(cp, ce->ce_file);
1977                         if (rename(file_org, ce->ce_file)) {
1978                                 adios(ce->ce_file, "unable to rename %s to ",
1979                                                 file_org);
1980                         }
1981                         free(file_org);
1982
1983                 } else {
1984                         ce->ce_file = add(cp, ce->ce_file);
1985                 }
1986         }
1987
1988         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1989                 content_error(ce->ce_file, ct,
1990                                 "unable to fopen for reading/writing");
1991                 return NOTOK;
1992         }
1993
1994         if (ct->c_type == CT_MULTIPART) {
1995                 char **ap, **ep;
1996                 CI ci = &ct->c_ctinfo;
1997
1998                 len = 0;
1999                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2000                                 ci->ci_subtype);
2001                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2002                                 strlen(ci->ci_subtype);
2003                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2004                         putc(';', ce->ce_fp);
2005                         len++;
2006
2007                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2008                                         *ap, *ep);
2009
2010                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2011                                 fputs("\n\t", ce->ce_fp);
2012                                 len = 8;
2013                         } else {
2014                                 putc(' ', ce->ce_fp);
2015                                 len++;
2016                         }
2017                         fprintf(ce->ce_fp, "%s", buffer);
2018                         len += cc;
2019                 }
2020
2021                 if (ci->ci_comment) {
2022                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2023                                                 >= CPERLIN) {
2024                                 fputs("\n\t", ce->ce_fp);
2025                                 len = 8;
2026                         } else {
2027                                 putc(' ', ce->ce_fp);
2028                                 len++;
2029                         }
2030                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2031                         len += cc;
2032                 }
2033                 fprintf(ce->ce_fp, "\n");
2034                 if (ct->c_id)
2035                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2036                 if (ct->c_descr)
2037                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2038                 if (ct->c_dispo)
2039                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2040                 fprintf(ce->ce_fp, "\n");
2041         }
2042
2043         if ((len = ct->c_end - ct->c_begin) < 0)
2044                 adios(NULL, "internal error(3)");
2045
2046         if (!ct->c_fp) {
2047                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2048                         content_error(ct->c_file, ct,
2049                                         "unable to open for reading");
2050                         return NOTOK;
2051                 }
2052                 own_ct_fp = 1;
2053         }
2054
2055         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2056         while (len > 0)
2057                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2058                 case NOTOK:
2059                         content_error(ct->c_file, ct, "error reading from");
2060                         goto clean_up;
2061
2062                 case OK:
2063                         content_error(NULL, ct, "premature eof");
2064                         goto clean_up;
2065
2066                 default:
2067                         if (cc > len)
2068                                 cc = len;
2069                         len -= cc;
2070
2071                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2072                         if (ferror(ce->ce_fp)) {
2073                                 content_error(ce->ce_file, ct,
2074                                                 "error writing to");
2075                                 goto clean_up;
2076                         }
2077                 }
2078
2079         fseek(ct->c_fp, 0L, SEEK_SET);
2080
2081         if (fflush(ce->ce_fp)) {
2082                 content_error(ce->ce_file, ct, "error writing to");
2083                 goto clean_up;
2084         }
2085
2086         fseek(ce->ce_fp, 0L, SEEK_SET);
2087
2088 ready_to_go:
2089         *file = ce->ce_file;
2090         if (own_ct_fp) {
2091                 fclose(ct->c_fp);
2092                 ct->c_fp = NULL;
2093         }
2094         return fileno(ce->ce_fp);
2095
2096 clean_up:
2097         free_encoding(ct, 0);
2098         if (own_ct_fp) {
2099                 fclose(ct->c_fp);
2100                 ct->c_fp = NULL;
2101         }
2102         return NOTOK;
2103 }