488557281a2553ea7b70e714ea3f06f3ea69fa08
[mmh] / uip / mhparse.c
1 /*
2 ** mhparse.c -- routines to parse the contents of MIME messages
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <errno.h>
13 #include <h/tws.h>
14 #include <h/mime.h>
15 #include <h/mhparse.h>
16 #include <h/utils.h>
17 #include <unistd.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <sysexits.h>
21
22 extern int debugsw;
23
24 extern int endian;  /* mhmisc.c */
25
26 extern pid_t xpid;  /* mhshowsbr.c  */
27
28 /*
29 ** Directory to place temp files.  This must
30 ** be set before these routines are called.
31 */
32 char *tmp;
33
34 /*
35 ** Structures for TEXT messages
36 */
37 struct k2v SubText[] = {
38         { "plain", TEXT_PLAIN },
39         { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341 */
40         { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896 */
41         { NULL, TEXT_UNKNOWN }  /* this one must be last! */
42 };
43
44 struct k2v Charset[] = {
45         { "us-ascii",   CHARSET_USASCII },
46         { "iso-8859-1", CHARSET_LATIN },
47         { NULL,         CHARSET_UNKNOWN }  /* this one must be last! */
48 };
49
50 /*
51 ** Structures for MULTIPART messages
52 */
53 struct k2v SubMultiPart[] = {
54         { "mixed",       MULTI_MIXED },
55         { "alternative", MULTI_ALTERNATE },
56         { "digest",      MULTI_DIGEST },
57         { "parallel",    MULTI_PARALLEL },
58         { NULL,          MULTI_UNKNOWN }  /* this one must be last! */
59 };
60
61 /*
62 ** Structures for MESSAGE messages
63 */
64 struct k2v SubMessage[] = {
65         { "rfc822",        MESSAGE_RFC822 },
66         { "partial",       MESSAGE_PARTIAL },
67         { "external-body", MESSAGE_EXTERNAL },
68         { NULL,            MESSAGE_UNKNOWN }  /* this one must be last! */
69 };
70
71 /*
72 ** Structure for APPLICATION messages
73 */
74 struct k2v SubApplication[] = {
75         { "octet-stream", APPLICATION_OCTETS },
76         { "postscript",   APPLICATION_POSTSCRIPT },
77         { NULL,           APPLICATION_UNKNOWN }  /* this one must be last! */
78 };
79
80
81 /* mhmisc.c */
82 int part_ok(CT, int);
83 int type_ok(CT, int);
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
86
87 /* mhfree.c */
88 void free_content(CT);
89 void free_encoding(CT, int);
90
91 /*
92 ** static prototypes
93 */
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
96
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
110
111 struct str2init str2cts[] = {
112         { "application", CT_APPLICATION, InitApplication },
113         { "audio",       CT_AUDIO,       InitGeneric },
114         { "image",       CT_IMAGE,       InitGeneric },
115         { "message",     CT_MESSAGE,     InitMessage },
116         { "multipart",   CT_MULTIPART,   InitMultiPart },
117         { "text",        CT_TEXT,        InitText },
118         { "video",       CT_VIDEO,       InitGeneric },
119         { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
120         { NULL,          CT_UNKNOWN,     NULL },
121 };
122
123 struct str2init str2ces[] = {
124         { "base64",           CE_BASE64,    InitBase64 },
125         { "quoted-printable", CE_QUOTED,    InitQuoted },
126         { "8bit",             CE_8BIT,      Init7Bit },
127         { "7bit",             CE_7BIT,      Init7Bit },
128         { "binary",           CE_BINARY,    Init7Bit },
129         { NULL,               CE_EXTENSION, NULL }, /* these two must be last! */
130         { NULL,               CE_UNKNOWN,    NULL },
131 };
132
133
134 int
135 pidcheck(int status)
136 {
137         if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
138                 return status;
139
140         fflush(stdout);
141         fflush(stderr);
142         exit(EX_SOFTWARE);
143         return 1;
144 }
145
146
147 /*
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
151 */
152 CT
153 parse_mime(char *file)
154 {
155         int is_stdin;
156         char buffer[BUFSIZ];
157         FILE *fp;
158         CT ct;
159
160         /*
161         ** Check if file is actually standard input
162         */
163         if ((is_stdin = (strcmp(file, "-")==0))) {
164                 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165                 if (tfile == NULL) {
166                         advise("mhparse", "unable to create temporary file");
167                         return NULL;
168                 }
169                 file = getcpy(tfile);
170                 chmod(file, 0600);
171
172                 while (fgets(buffer, sizeof(buffer), stdin))
173                         fputs(buffer, fp);
174                 fflush(fp);
175
176                 if (ferror(stdin)) {
177                         unlink(file);
178                         advise("stdin", "error reading");
179                         return NULL;
180                 }
181                 if (ferror(fp)) {
182                         unlink(file);
183                         advise(file, "error writing");
184                         return NULL;
185                 }
186                 fseek(fp, 0L, SEEK_SET);
187         } else if ((fp = fopen(file, "r")) == NULL) {
188                 advise(file, "unable to read");
189                 return NULL;
190         }
191
192         if (!(ct = get_content(fp, file, 1))) {
193                 if (is_stdin)
194                         unlink(file);
195                 advise(NULL, "unable to decode %s", file);
196                 return NULL;
197         }
198
199         if (is_stdin)
200                 ct->c_unlink = 1;  /* temp file to remove */
201
202         ct->c_fp = NULL;
203
204         if (ct->c_end == 0L) {
205                 fseek(fp, 0L, SEEK_END);
206                 ct->c_end = ftell(fp);
207         }
208
209         if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
210                 fclose(fp);
211                 free_content(ct);
212                 return NULL;
213         }
214
215         fclose(fp);
216         return ct;
217 }
218
219
220 /*
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
223 **
224 ** toplevel =  1   # we are at the top level of the message
225 ** toplevel =  0   # we are inside message type or multipart type
226 **                 # other than multipart/digest
227 ** toplevel = -1   # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
229 */
230
231 static CT
232 get_content(FILE *in, char *file, int toplevel)
233 {
234         int compnum, state;
235         char buf[BUFSIZ], name[NAMESZ];
236         char *np, *vp;
237         CT ct;
238         HF hp;
239
240         /* allocate the content structure */
241         if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242                 adios(EX_OSERR, NULL, "out of memory");
243
244         ct->c_fp = in;
245         ct->c_file = getcpy(file);
246         ct->c_begin = ftell(ct->c_fp) + 1;
247
248         /*
249         ** Parse the header fields for this
250         ** content into a linked list.
251         */
252         for (compnum = 1, state = FLD;;) {
253                 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
254                 case FLD:
255                 case FLDPLUS:
256                         compnum++;
257
258                         /* get copies of the buffers */
259                         np = getcpy(name);
260                         vp = getcpy(buf);
261
262                         /* if necessary, get rest of field */
263                         while (state == FLDPLUS) {
264                                 state = m_getfld(state, name, buf,
265                                                 sizeof(buf), in);
266                                 vp = add(buf, vp);  /* add to previous value */
267                         }
268
269                         /* Now add the header data to the list */
270                         add_header(ct, np, vp);
271
272                         ct->c_begin = ftell(in) + 1;
273                         continue;
274
275                 case BODY:
276                         ct->c_begin = ftell(in) - strlen(buf);
277                         break;
278
279                 case FILEEOF:
280                         ct->c_begin = ftell(in);
281                         break;
282
283                 case LENERR:
284                 case FMTERR:
285                         adios(EX_DATAERR, NULL, "message format error in component #%d",
286                                         compnum);
287
288                 default:
289                         adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
290                 }
291
292                 /* break out of the loop */
293                 break;
294         }
295
296         /*
297         ** Read the content headers.  We will parse the
298         ** MIME related header fields into their various
299         ** structures and set internal flags related to
300         ** content type/subtype, etc.
301         */
302
303         hp = ct->c_first_hf;  /* start at first header field */
304         while (hp) {
305                 /* Get MIME-Version field */
306                 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
307                         int ucmp;
308                         char c;
309                         unsigned char *cp, *dp;
310
311                         if (ct->c_vrsn) {
312                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
313                                 goto next_header;
314                         }
315                         ct->c_vrsn = getcpy(hp->value);
316
317                         /* Now, cleanup this field */
318                         cp = ct->c_vrsn;
319
320                         while (isspace(*cp))
321                                 cp++;
322                         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
323                                 *dp++ = ' ';
324                         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
325                                 if (!isspace(*dp))
326                                         break;
327                         *++dp = '\0';
328                         if (debugsw)
329                                 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
330
331                         if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
332                                 goto out;
333
334                         for (dp = cp; istoken(*dp); dp++)
335                                 continue;
336                         c = *dp;
337                         *dp = '\0';
338                         ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
339                         *dp = c;
340                         if (!ucmp) {
341                                 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
342                         }
343
344                 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
345                         /* Get Content-Type field */
346                         struct str2init *s2i;
347                         CI ci = &ct->c_ctinfo;
348
349                         /* Check if we've already seen a Content-Type header */
350                         if (ct->c_ctline) {
351                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
352                                 goto next_header;
353                         }
354
355                         /* Parse the Content-Type field */
356                         if (get_ctinfo(hp->value, ct, 0) == NOTOK)
357                                 goto out;
358
359                         /*
360                         ** Set the Init function and the internal
361                         ** flag for this content type.
362                         */
363                         for (s2i = str2cts; s2i->si_key; s2i++)
364                                 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
365                                         break;
366                         if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
367                                 s2i++;
368                         ct->c_type = s2i->si_val;
369                         ct->c_ctinitfnx = s2i->si_init;
370
371                 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
372                         /* Get Content-Transfer-Encoding field */
373                         char c;
374                         unsigned char *cp, *dp;
375                         struct str2init *s2i;
376
377                         /*
378                         ** Check if we've already seen the
379                         ** Content-Transfer-Encoding field
380                         */
381                         if (ct->c_celine) {
382                                 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
383                                 goto next_header;
384                         }
385
386                         /* get copy of this field */
387                         ct->c_celine = cp = getcpy(hp->value);
388
389                         while (isspace(*cp))
390                                 cp++;
391                         for (dp = cp; istoken(*dp); dp++)
392                                 continue;
393                         c = *dp;
394                         *dp = '\0';
395
396                         /*
397                         ** Find the internal flag and Init function
398                         ** for this transfer encoding.
399                         */
400                         for (s2i = str2ces; s2i->si_key; s2i++)
401                                 if (!mh_strcasecmp(cp, s2i->si_key))
402                                         break;
403                         if (!s2i->si_key && !uprf(cp, "X-"))
404                                 s2i++;
405                         *dp = c;
406                         ct->c_encoding = s2i->si_val;
407
408                         /* Call the Init function for this encoding */
409                         if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
410                                 goto out;
411
412                 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
413                         /* Get Content-ID field */
414                         ct->c_id = add(hp->value, ct->c_id);
415
416                 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
417                         /* Get Content-Description field */
418                         ct->c_descr = add(hp->value, ct->c_descr);
419
420                 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
421                         /* Get Content-Disposition field */
422                         ct->c_dispo = add(hp->value, ct->c_dispo);
423                 }
424
425 next_header:
426                 hp = hp->next;  /* next header field */
427         }
428
429         /*
430         ** Check if we saw a Content-Type field.
431         ** If not, then assign a default value for
432         ** it, and the Init function.
433         */
434         if (!ct->c_ctline) {
435                 /*
436                 ** If we are inside a multipart/digest message,
437                 ** so default type is message/rfc822
438                 */
439                 if (toplevel < 0) {
440                         if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
441                                 goto out;
442                         ct->c_type = CT_MESSAGE;
443                         ct->c_ctinitfnx = InitMessage;
444                 } else {
445                         /*
446                         ** Else default type is text/plain
447                         */
448                         if (get_ctinfo("text/plain", ct, 0) == NOTOK)
449                                 goto out;
450                         ct->c_type = CT_TEXT;
451                         ct->c_ctinitfnx = InitText;
452                 }
453         }
454
455         /* Use default Transfer-Encoding, if necessary */
456         if (!ct->c_celine) {
457                 ct->c_encoding = CE_7BIT;
458                 Init7Bit(ct);
459         }
460
461         return ct;
462
463 out:
464         free_content(ct);
465         return NULL;
466 }
467
468
469 /*
470 ** small routine to add header field to list
471 */
472
473 int
474 add_header(CT ct, char *name, char *value)
475 {
476         HF hp;
477
478         /* allocate header field structure */
479         hp = mh_xmalloc(sizeof(*hp));
480
481         /* link data into header structure */
482         hp->name = name;
483         hp->value = value;
484         hp->next = NULL;
485
486         /* link header structure into the list */
487         if (ct->c_first_hf == NULL) {
488                 ct->c_first_hf = hp;  /* this is the first */
489                 ct->c_last_hf = hp;
490         } else {
491                 ct->c_last_hf->next = hp;  /* add it to the end */
492                 ct->c_last_hf = hp;
493         }
494
495         return 0;
496 }
497
498
499 /*
500 ** Make sure that buf contains at least one appearance of name,
501 ** followed by =.  If not, insert both name and value, just after
502 ** first semicolon, if any.  Note that name should not contain a
503 ** trailing =.  And quotes will be added around the value.  Typical
504 ** usage:  make sure that a Content-Disposition header contains
505 ** filename="foo".  If it doesn't and value does, use value from
506 ** that.
507 */
508 static char *
509 incl_name_value(unsigned char *buf, char *name, char *value) {
510         char *newbuf = buf;
511
512         /* Assume that name is non-null. */
513         if (buf && value) {
514                 char *name_plus_equal = concat(name, "=", NULL);
515
516                 if (!strstr(buf, name_plus_equal)) {
517                         char *insertion;
518                         unsigned char *cp;
519                         char *prefix, *suffix;
520
521                         /* Trim trailing space, esp. newline. */
522                         for (cp = &buf[strlen(buf) - 1];
523                                          cp >= buf && isspace(*cp); --cp) {
524                                 *cp = '\0';
525                         }
526
527                         insertion = concat("; ", name, "=", "\"", value, "\"",
528                                         NULL);
529
530                         /*
531                         ** Insert at first semicolon, if any.
532                         ** If none, append to end.
533                         */
534                         prefix = getcpy(buf);
535                         if ((cp = strchr(prefix, ';'))) {
536                                 suffix = concat(cp, NULL);
537                                 *cp = '\0';
538                                 newbuf = concat(prefix, insertion, suffix,
539                                                 "\n", NULL);
540                                 free(suffix);
541                         } else {
542                                 /* Append to end. */
543                                 newbuf = concat(buf, insertion, "\n", NULL);
544                         }
545
546                         free(prefix);
547                         free(insertion);
548                         free(buf);
549                 }
550
551                 free(name_plus_equal);
552         }
553
554         return newbuf;
555 }
556
557 /*
558 ** Extract just name_suffix="foo", if any, from value.  If there isn't
559 ** one, return the entire value.  Note that, for example, a name_suffix
560 ** of name will match filename="foo", and return foo.
561 */
562 static char *
563 extract_name_value(char *name_suffix, char *value) {
564         char *extracted_name_value = value;
565         char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
566         char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
567         char *cp;
568
569         free(name_suffix_plus_quote);
570         if (name_suffix_equals) {
571                 char *name_suffix_begin;
572
573                 /* Find first \". */
574                 for (cp = name_suffix_equals; *cp != '"'; ++cp)
575                         ;
576                 name_suffix_begin = ++cp;
577                 /* Find second \". */
578                 for (; *cp != '"'; ++cp)
579                         ;
580
581                 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
582                 memcpy(extracted_name_value, name_suffix_begin,
583                                 cp - name_suffix_begin);
584                 extracted_name_value[cp - name_suffix_begin] = '\0';
585         }
586
587         return extracted_name_value;
588 }
589
590 /*
591 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
592 ** directives.  Fills in the information of the CTinfo structure.
593 */
594 int
595 get_ctinfo(unsigned char *cp, CT ct, int magic)
596 {
597         int i;
598         unsigned char *dp;
599         char **ap, **ep;
600         char c;
601         CI ci;
602
603         ci = &ct->c_ctinfo;
604         i = strlen(invo_name) + 2;
605
606         /* store copy of Content-Type line */
607         cp = ct->c_ctline = getcpy(cp);
608
609         while (isspace(*cp))  /* trim leading spaces */
610                 cp++;
611
612         /* change newlines to spaces */
613         for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
614                 *dp++ = ' ';
615
616         /* trim trailing spaces */
617         for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
618                 if (!isspace(*dp))
619                         break;
620         *++dp = '\0';
621
622         if (debugsw)
623                 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
624
625         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
626                 return NOTOK;
627
628         for (dp = cp; istoken(*dp); dp++)
629                 continue;
630         c = *dp, *dp = '\0';
631         ci->ci_type = getcpy(cp);  /* store content type */
632         *dp = c, cp = dp;
633
634         if (!*ci->ci_type) {
635                 advise(NULL, "invalid %s: field in message %s (empty type)",
636                                 TYPE_FIELD, ct->c_file);
637                 return NOTOK;
638         }
639
640         /* down case the content type string */
641         for (dp = ci->ci_type; *dp; dp++)
642                 if (isalpha(*dp) && isupper(*dp))
643                         *dp = tolower(*dp);
644
645         while (isspace(*cp))
646                 cp++;
647
648         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
649                 return NOTOK;
650
651         if (*cp != '/') {
652                 if (!magic)
653                         ci->ci_subtype = getcpy("");
654                 goto magic_skip;
655         }
656
657         cp++;
658         while (isspace(*cp))
659                 cp++;
660
661         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
662                 return NOTOK;
663
664         for (dp = cp; istoken(*dp); dp++)
665                 continue;
666         c = *dp, *dp = '\0';
667         ci->ci_subtype = getcpy(cp);  /* store the content subtype */
668         *dp = c, cp = dp;
669
670         if (!*ci->ci_subtype) {
671                 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
672                 return NOTOK;
673         }
674
675         /* down case the content subtype string */
676         for (dp = ci->ci_subtype; *dp; dp++)
677                 if (isalpha(*dp) && isupper(*dp))
678                         *dp = tolower(*dp);
679
680 magic_skip:
681         while (isspace(*cp))
682                 cp++;
683
684         if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
685                 return NOTOK;
686
687         /*
688         ** Parse attribute/value pairs given with Content-Type
689         */
690         ep = (ap = ci->ci_attrs) + NPARMS;
691         while (*cp == ';') {
692                 char *vp;
693                 unsigned char *up;
694
695                 if (ap >= ep) {
696                         advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
697                         return NOTOK;
698                 }
699
700                 cp++;
701                 while (isspace(*cp))
702                         cp++;
703
704                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
705                         return NOTOK;
706
707                 if (*cp == 0) {
708                         advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
709                         return OK;
710                 }
711
712                 /* down case the attribute name */
713                 for (dp = cp; istoken(*dp); dp++)
714                         if (isalpha(*dp) && isupper(*dp))
715                                 *dp = tolower(*dp);
716
717                 for (up = dp; isspace(*dp);)
718                         dp++;
719                 if (dp == cp || *dp != '=') {
720                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
721                         return NOTOK;
722                 }
723
724                 vp = (*ap = getcpy(cp)) + (up - cp);
725                 *vp = '\0';
726                 for (dp++; isspace(*dp);)
727                         dp++;
728
729                 /* now add the attribute value */
730                 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
731
732                 if (*dp == '"') {
733                         for (cp = ++dp, dp = vp;;) {
734                                 switch (c = *cp++) {
735                                 case '\0':
736 bad_quote:
737                                         advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
738                                         return NOTOK;
739
740                                 case '\\':
741                                         *dp++ = c;
742                                         if ((c = *cp++) == '\0')
743                                                 goto bad_quote;
744                                         /* else fall... */
745
746                                 default:
747                                         *dp++ = c;
748                                         continue;
749
750                                 case '"':
751                                         *dp = '\0';
752                                         break;
753                                 }
754                                 break;
755                         }
756                 } else {
757                         for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
758                                 continue;
759                         *dp = '\0';
760                 }
761                 if (!*vp) {
762                         advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
763                         *ci->ci_values[ap - ci->ci_attrs] = '\0';
764                         *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
765                         continue;
766                 }
767                 ap++;
768
769                 while (isspace(*cp))
770                         cp++;
771
772                 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
773                         return NOTOK;
774         }
775
776         /*
777         ** Get any <Content-Id> given in buffer
778         */
779         if (magic && *cp == '<') {
780                 if (ct->c_id) {
781                         free(ct->c_id);
782                         ct->c_id = NULL;
783                 }
784                 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
785                         advise(NULL, "invalid ID in message %s", ct->c_file);
786                         return NOTOK;
787                 }
788                 c = *dp;
789                 *dp = '\0';
790                 if (*ct->c_id)
791                         ct->c_id = concat("<", ct->c_id, ">\n", NULL);
792                 else
793                         ct->c_id = NULL;
794                 *dp++ = c;
795                 cp = dp;
796
797                 while (isspace(*cp))
798                         cp++;
799         }
800
801         /*
802         ** Get any [Content-Description] given in buffer.
803         */
804         if (magic && *cp == '[') {
805                 ct->c_descr = ++cp;
806                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
807                         if (*dp == ']')
808                                 break;
809                 if (dp < cp) {
810                         advise(NULL, "invalid description in message %s",
811                                         ct->c_file);
812                         ct->c_descr = NULL;
813                         return NOTOK;
814                 }
815
816                 c = *dp;
817                 *dp = '\0';
818                 if (*ct->c_descr)
819                         ct->c_descr = concat(ct->c_descr, "\n", NULL);
820                 else
821                         ct->c_descr = NULL;
822                 *dp++ = c;
823                 cp = dp;
824
825                 while (isspace(*cp))
826                         cp++;
827         }
828
829         /*
830         ** Get any {Content-Disposition} given in buffer.
831         */
832         if (magic && *cp == '{') {
833                 ct->c_dispo = ++cp;
834                 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
835                         if (*dp == '}')
836                                 break;
837                 if (dp < cp) {
838                         advise(NULL, "invalid disposition in message %s",
839                                         ct->c_file);
840                         ct->c_dispo = NULL;
841                         return NOTOK;
842                 }
843
844                 c = *dp;
845                 *dp = '\0';
846                 if (*ct->c_dispo)
847                         ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
848                 else
849                         ct->c_dispo = NULL;
850                 *dp++ = c;
851                 cp = dp;
852
853                 while (isspace(*cp))
854                         cp++;
855         }
856
857         /*
858         ** Check if anything is left over
859         */
860         if (*cp) {
861                 if (magic) {
862                         ci->ci_magic = getcpy(cp);
863
864                         /*
865                         ** If there is a Content-Disposition header and
866                         ** it doesn't have a *filename=, extract it from
867                         ** the magic contents.  The mhbasename call skips
868                         ** any leading directory components.
869                         */
870                         if (ct->c_dispo)
871                                 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
872                         } else
873                                 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
874         }
875
876         return OK;
877 }
878
879
880 static int
881 get_comment(CT ct, unsigned char **ap, int istype)
882 {
883         int i;
884         char *bp;
885         unsigned char *cp;
886         char c, buffer[BUFSIZ], *dp;
887         CI ci;
888
889         ci = &ct->c_ctinfo;
890         cp = *ap;
891         bp = buffer;
892         cp++;
893
894         for (i = 0;;) {
895                 switch (c = *cp++) {
896                 case '\0':
897 invalid:
898                 advise(NULL, "invalid comment in message %s's %s: field",
899                                 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
900                 return NOTOK;
901
902                 case '\\':
903                         *bp++ = c;
904                         if ((c = *cp++) == '\0')
905                                 goto invalid;
906                         *bp++ = c;
907                         continue;
908
909                 case '(':
910                         i++;
911                         /* and fall... */
912                 default:
913                         *bp++ = c;
914                         continue;
915
916                 case ')':
917                         if (--i < 0)
918                                 break;
919                         *bp++ = c;
920                         continue;
921                 }
922                 break;
923         }
924         *bp = '\0';
925
926         if (istype) {
927                 if ((dp = ci->ci_comment)) {
928                         ci->ci_comment = concat(dp, " ", buffer, NULL);
929                         free(dp);
930                 } else {
931                         ci->ci_comment = getcpy(buffer);
932                 }
933         }
934
935         while (isspace(*cp))
936                 cp++;
937
938         *ap = cp;
939         return OK;
940 }
941
942
943 /*
944 ** CONTENTS
945 **
946 ** Handles content types audio, image, and video.
947 ** There's not much to do right here.
948 */
949
950 static int
951 InitGeneric(CT ct)
952 {
953         return OK;  /* not much to do here */
954 }
955
956
957 /*
958 ** TEXT
959 */
960
961 static int
962 InitText(CT ct)
963 {
964         char **ap, **ep;
965         struct k2v *kv;
966         struct text *t;
967         CI ci = &ct->c_ctinfo;
968
969         /* check for missing subtype */
970         if (!*ci->ci_subtype)
971                 ci->ci_subtype = add("plain", ci->ci_subtype);
972
973         /* match subtype */
974         for (kv = SubText; kv->kv_key; kv++)
975                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
976                         break;
977         ct->c_subtype = kv->kv_value;
978
979         /* allocate text character set structure */
980         if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
981                 adios(EX_OSERR, NULL, "out of memory");
982         ct->c_ctparams = (void *) t;
983
984         /* scan for charset parameter */
985         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
986                 if (!mh_strcasecmp(*ap, "charset"))
987                         break;
988
989         /* check if content specified a character set */
990         if (*ap) {
991                 /* store its name */
992                 ct->c_charset = getcpy(norm_charmap(*ep));
993                 /* match character set or set to CHARSET_UNKNOWN */
994                 for (kv = Charset; kv->kv_key; kv++) {
995                         if (!mh_strcasecmp(*ep, kv->kv_key)) {
996                                 break;
997                         }
998                 }
999                 t->tx_charset = kv->kv_value;
1000         } else {
1001                 t->tx_charset = CHARSET_UNSPECIFIED;
1002         }
1003
1004         return OK;
1005 }
1006
1007
1008 /*
1009 ** MULTIPART
1010 */
1011
1012 static int
1013 InitMultiPart(CT ct)
1014 {
1015         int inout;
1016         long last, pos;
1017         unsigned char *cp, *dp;
1018         char **ap, **ep;
1019         char *bp, buffer[BUFSIZ];
1020         struct multipart *m;
1021         struct k2v *kv;
1022         struct part *part, **next;
1023         CI ci = &ct->c_ctinfo;
1024         CT p;
1025         FILE *fp;
1026
1027         /*
1028         ** The encoding for multipart messages must be either
1029         ** 7bit, 8bit, or binary (per RFC2045).
1030         */
1031         if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1032                 && ct->c_encoding != CE_BINARY) {
1033                 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1034                 ct->c_encoding = CE_7BIT;
1035         }
1036
1037         /* match subtype */
1038         for (kv = SubMultiPart; kv->kv_key; kv++)
1039                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1040                         break;
1041         ct->c_subtype = kv->kv_value;
1042
1043         /*
1044         ** Check for "boundary" parameter, which is
1045         ** required for multipart messages.
1046         */
1047         bp = 0;
1048         for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1049                 if (!mh_strcasecmp(*ap, "boundary")) {
1050                         bp = *ep;
1051                         break;
1052                 }
1053         }
1054
1055         /* complain if boundary parameter is missing */
1056         if (!*ap) {
1057                 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1058                 return NOTOK;
1059         }
1060
1061         /* allocate primary structure for multipart info */
1062         if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1063                 adios(EX_OSERR, NULL, "out of memory");
1064         ct->c_ctparams = (void *) m;
1065
1066         /* check if boundary parameter contains only whitespace characters */
1067         for (cp = bp; isspace(*cp); cp++)
1068                 continue;
1069         if (!*cp) {
1070                 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1071                 return NOTOK;
1072         }
1073
1074         /* remove trailing whitespace from boundary parameter */
1075         for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1076                 if (!isspace(*dp))
1077                         break;
1078         *++dp = '\0';
1079
1080         /* record boundary separators */
1081         m->mp_start = concat(bp, "\n", NULL);
1082         m->mp_stop = concat(bp, "--\n", NULL);
1083
1084         if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1085                 advise(ct->c_file, "unable to open for reading");
1086                 return NOTOK;
1087         }
1088
1089         fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1090         last = ct->c_end;
1091         next = &m->mp_parts;
1092         part = NULL;
1093         inout = 1;
1094
1095         while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1096                 if (pos > last)
1097                         break;
1098
1099                 pos += strlen(buffer);
1100                 if (buffer[0] != '-' || buffer[1] != '-')
1101                         continue;
1102                 if (inout) {
1103                         if (strcmp(buffer + 2, m->mp_start)!=0)
1104                                 continue;
1105 next_part:
1106                         if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1107                                         == NULL)
1108                                 adios(EX_OSERR, NULL, "out of memory");
1109                         *next = part;
1110                         next = &part->mp_next;
1111
1112                         if (!(p = get_content(fp, ct->c_file,
1113                                 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1114                                 ct->c_fp = NULL;
1115                                 return NOTOK;
1116                         }
1117                         p->c_fp = NULL;
1118                         part->mp_part = p;
1119                         pos = p->c_begin;
1120                         fseek(fp, pos, SEEK_SET);
1121                         inout = 0;
1122                 } else {
1123                         if (strcmp(buffer + 2, m->mp_start) == 0) {
1124                                 inout = 1;
1125 end_part:
1126                                 p = part->mp_part;
1127                                 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1128                                 if (p->c_end < p->c_begin)
1129                                         p->c_begin = p->c_end;
1130                                 if (inout)
1131                                         goto next_part;
1132                                 goto last_part;
1133                         } else {
1134                                 if (strcmp(buffer + 2, m->mp_stop) == 0)
1135                                         goto end_part;
1136                         }
1137                 }
1138         }
1139
1140         advise(NULL, "bogus multipart content in message %s", ct->c_file);
1141         if (!inout && part) {
1142                 p = part->mp_part;
1143                 p->c_end = ct->c_end;
1144
1145                 if (p->c_begin >= p->c_end) {
1146                         for (next = &m->mp_parts; *next != part;
1147                                 next = &((*next)->mp_next))
1148                                 continue;
1149                         *next = NULL;
1150                         free_content(p);
1151                         free((char *) part);
1152                 }
1153         }
1154
1155 last_part:
1156         /* reverse the order of the parts for multipart/alternative */
1157         if (ct->c_subtype == MULTI_ALTERNATE)
1158                 reverse_parts(ct);
1159
1160         /*
1161         ** label all subparts with part number, and
1162         ** then initialize the content of the subpart.
1163         */
1164         {
1165                 int partnum;
1166                 char *pp;
1167                 char partnam[BUFSIZ];
1168
1169                 if (ct->c_partno) {
1170                         snprintf(partnam, sizeof(partnam), "%s.",
1171                                         ct->c_partno);
1172                         pp = partnam + strlen(partnam);
1173                 } else {
1174                         pp = partnam;
1175                 }
1176
1177                 for (part = m->mp_parts, partnum = 1; part;
1178                         part = part->mp_next, partnum++) {
1179                         p = part->mp_part;
1180
1181                         sprintf(pp, "%d", partnum);
1182                         p->c_partno = getcpy(partnam);
1183
1184                         /* initialize the content of the subparts */
1185                         if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1186                                 fclose(ct->c_fp);
1187                                 ct->c_fp = NULL;
1188                                 return NOTOK;
1189                         }
1190                 }
1191         }
1192
1193         fclose(ct->c_fp);
1194         ct->c_fp = NULL;
1195         return OK;
1196 }
1197
1198
1199 /*
1200 ** reverse the order of the parts of a multipart
1201 */
1202
1203 static void
1204 reverse_parts(CT ct)
1205 {
1206         int i;
1207         struct multipart *m;
1208         struct part **base, **bmp, **next, *part;
1209
1210         m = (struct multipart *) ct->c_ctparams;
1211
1212         /* if only one part, just return */
1213         if (!m->mp_parts || !m->mp_parts->mp_next)
1214                 return;
1215
1216         /* count number of parts */
1217         i = 0;
1218         for (part = m->mp_parts; part; part = part->mp_next)
1219                 i++;
1220
1221         /* allocate array of pointers to the parts */
1222         if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1223                 adios(EX_OSERR, NULL, "out of memory");
1224         bmp = base;
1225
1226         /* point at all the parts */
1227         for (part = m->mp_parts; part; part = part->mp_next)
1228                 *bmp++ = part;
1229         *bmp = NULL;
1230
1231         /* reverse the order of the parts */
1232         next = &m->mp_parts;
1233         for (bmp--; bmp >= base; bmp--) {
1234                 part = *bmp;
1235                 *next = part;
1236                 next = &part->mp_next;
1237         }
1238         *next = NULL;
1239
1240         /* free array of pointers */
1241         free((char *) base);
1242 }
1243
1244
1245 /*
1246 ** MESSAGE
1247 */
1248
1249 static int
1250 InitMessage(CT ct)
1251 {
1252         struct k2v *kv;
1253         CI ci = &ct->c_ctinfo;
1254
1255         if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1256                 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1257                 return NOTOK;
1258         }
1259
1260         /* check for missing subtype */
1261         if (!*ci->ci_subtype)
1262                 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1263
1264         /* match subtype */
1265         for (kv = SubMessage; kv->kv_key; kv++)
1266                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1267                         break;
1268         ct->c_subtype = kv->kv_value;
1269
1270         switch (ct->c_subtype) {
1271         case MESSAGE_RFC822:
1272                 break;
1273
1274         case MESSAGE_PARTIAL:
1275                 {
1276                 char **ap, **ep;
1277                 struct partial *p;
1278
1279                 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1280                 adios(EX_OSERR, NULL, "out of memory");
1281                 ct->c_ctparams = (void *) p;
1282
1283                 /*
1284                 ** scan for parameters "id", "number",
1285                 ** and "total"
1286                 */
1287                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1288                         if (!mh_strcasecmp(*ap, "id")) {
1289                                 p->pm_partid = getcpy(*ep);
1290                                 continue;
1291                         }
1292                         if (!mh_strcasecmp(*ap, "number")) {
1293                                 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1294 invalid_param:
1295                                         advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1296                                         return NOTOK;
1297                                 }
1298                                 continue;
1299                         }
1300                         if (!mh_strcasecmp(*ap, "total")) {
1301                                 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1302                                                 p->pm_maxno < 1)
1303                                         goto invalid_param;
1304                                 continue;
1305                         }
1306                 }
1307
1308                 if (!p->pm_partid || !p->pm_partno
1309                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1310                         advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1311                         return NOTOK;
1312                 }
1313                 }
1314                 break;
1315
1316         case MESSAGE_EXTERNAL:
1317                 {
1318                 CT p;
1319                 FILE *fp;
1320
1321                 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1322                         advise(ct->c_file, "unable to open for reading");
1323                         return NOTOK;
1324                 }
1325
1326                 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1327
1328                 if (!(p = get_content(fp, ct->c_file, 0))) {
1329                         ct->c_fp = NULL;
1330                         return NOTOK;
1331                 }
1332
1333                 p->c_fp = NULL;
1334                 p->c_end = p->c_begin;
1335
1336                 fclose(ct->c_fp);
1337                 ct->c_fp = NULL;
1338
1339                 switch (p->c_type) {
1340                 case CT_MULTIPART:
1341                         break;
1342
1343                 case CT_MESSAGE:
1344                         if (p->c_subtype != MESSAGE_RFC822)
1345                                 break;
1346                         /* else fall... */
1347                 default:
1348                         if (p->c_ctinitfnx)
1349                                 (*p->c_ctinitfnx) (p);
1350                         break;
1351                 }
1352                 }
1353                 break;
1354
1355         default:
1356                 break;
1357         }
1358
1359         return OK;
1360 }
1361
1362
1363 /*
1364 ** APPLICATION
1365 */
1366
1367 static int
1368 InitApplication(CT ct)
1369 {
1370         struct k2v *kv;
1371         CI ci = &ct->c_ctinfo;
1372
1373         /* match subtype */
1374         for (kv = SubApplication; kv->kv_key; kv++)
1375                 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1376                         break;
1377         ct->c_subtype = kv->kv_value;
1378
1379         return OK;
1380 }
1381
1382
1383 /*
1384 ** TRANSFER ENCODINGS
1385 */
1386
1387 static int
1388 init_encoding(CT ct, OpenCEFunc openfnx)
1389 {
1390         CE ce;
1391
1392         if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1393                 adios(EX_OSERR, NULL, "out of memory");
1394
1395         ct->c_cefile     = ce;
1396         ct->c_ceopenfnx  = openfnx;
1397         ct->c_ceclosefnx = close_encoding;
1398         ct->c_cesizefnx  = size_encoding;
1399
1400         return OK;
1401 }
1402
1403
1404 void
1405 close_encoding(CT ct)
1406 {
1407         CE ce;
1408
1409         if (!(ce = ct->c_cefile))
1410                 return;
1411
1412         if (ce->ce_fp) {
1413                 fclose(ce->ce_fp);
1414                 ce->ce_fp = NULL;
1415         }
1416 }
1417
1418
1419 static unsigned long
1420 size_encoding(CT ct)
1421 {
1422         int fd;
1423         unsigned long size;
1424         char *file;
1425         CE ce;
1426         struct stat st;
1427
1428         if (!(ce = ct->c_cefile))
1429                 return (ct->c_end - ct->c_begin);
1430
1431         if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1432                 return (long) st.st_size;
1433
1434         if (ce->ce_file) {
1435                 if (stat(ce->ce_file, &st) != NOTOK)
1436                         return (long) st.st_size;
1437                 else
1438                         return 0L;
1439         }
1440
1441         if (ct->c_encoding == CE_EXTERNAL)
1442                 return (ct->c_end - ct->c_begin);
1443
1444         file = NULL;
1445         if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1446                 return (ct->c_end - ct->c_begin);
1447
1448         if (fstat(fd, &st) != NOTOK)
1449                 size = (long) st.st_size;
1450         else
1451                 size = 0L;
1452
1453         (*ct->c_ceclosefnx) (ct);
1454         return size;
1455 }
1456
1457
1458 /*
1459 ** BASE64
1460 */
1461
1462 static unsigned char b642nib[0x80] = {
1463         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468         0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1469         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1470         0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471         0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1472         0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1473         0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1474         0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1475         0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1476         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1477         0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1478         0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1479 };
1480
1481
1482 static int
1483 InitBase64(CT ct)
1484 {
1485         return init_encoding(ct, openBase64);
1486 }
1487
1488
1489 static int
1490 openBase64(CT ct, char **file)
1491 {
1492         int bitno, cc;
1493         int fd, len, skip, own_ct_fp = 0;
1494         unsigned long bits;
1495         unsigned char value, *b, *b1, *b2, *b3;
1496         unsigned char *cp, *ep;
1497         char buffer[BUFSIZ];
1498         /* sbeck -- handle suffixes */
1499         CI ci;
1500         CE ce;
1501
1502         b  = (unsigned char *) &bits;
1503         b1 = &b[endian > 0 ? 1 : 2];
1504         b2 = &b[endian > 0 ? 2 : 1];
1505         b3 = &b[endian > 0 ? 3 : 0];
1506
1507         ce = ct->c_cefile;
1508         if (ce->ce_fp) {
1509                 fseek(ce->ce_fp, 0L, SEEK_SET);
1510                 goto ready_to_go;
1511         }
1512
1513         if (ce->ce_file) {
1514                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1515                         content_error(ce->ce_file, ct,
1516                                         "unable to fopen for reading");
1517                         return NOTOK;
1518                 }
1519                 goto ready_to_go;
1520         }
1521
1522         if (*file == NULL) {
1523                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1524                 ce->ce_unlink = 1;
1525         } else {
1526                 ce->ce_file = getcpy(*file);
1527                 ce->ce_unlink = 0;
1528         }
1529
1530         /* sbeck@cise.ufl.edu -- handle suffixes */
1531         ci = &ct->c_ctinfo;
1532         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1533                         invo_name, ci->ci_type, ci->ci_subtype);
1534         cp = context_find(buffer);
1535         if (cp == NULL || *cp == '\0') {
1536                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1537                                 ci->ci_type);
1538                 cp = context_find(buffer);
1539         }
1540         if (cp != NULL && *cp != '\0') {
1541                 if (ce->ce_unlink) {
1542                         /*
1543                         ** Temporary file already exists, so we rename to
1544                         ** version with extension.
1545                         */
1546                         char *file_org = strdup(ce->ce_file);
1547                         ce->ce_file = add(cp, ce->ce_file);
1548                         if (rename(file_org, ce->ce_file)) {
1549                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1550                                                 file_org);
1551                         }
1552                         free(file_org);
1553
1554                 } else {
1555                         ce->ce_file = add(cp, ce->ce_file);
1556                 }
1557         }
1558
1559         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1560                 content_error(ce->ce_file, ct,
1561                                 "unable to fopen for reading/writing");
1562                 return NOTOK;
1563         }
1564
1565         if ((len = ct->c_end - ct->c_begin) < 0)
1566                 adios(EX_SOFTWARE, NULL, "internal error(1)");
1567
1568         if (!ct->c_fp) {
1569                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1570                         content_error(ct->c_file, ct,
1571                                         "unable to open for reading");
1572                         return NOTOK;
1573                 }
1574                 own_ct_fp = 1;
1575         }
1576
1577         bitno = 18;
1578         bits = 0L;
1579         skip = 0;
1580
1581         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1582         while (len > 0) {
1583                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1584                 case NOTOK:
1585                         content_error(ct->c_file, ct, "error reading from");
1586                         goto clean_up;
1587
1588                 case OK:
1589                         content_error(NULL, ct, "premature eof");
1590                         goto clean_up;
1591
1592                 default:
1593                         if (cc > len)
1594                                 cc = len;
1595                         len -= cc;
1596
1597                         for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1598                                 switch (*cp) {
1599                                 default:
1600                                         if (isspace(*cp))
1601                                                 break;
1602                                         if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1603                                                 if (debugsw) {
1604                                                         fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1605                                                 }
1606                                                 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1607                                                 continue;
1608                                         }
1609
1610                                         bits |= value << bitno;
1611 test_end:
1612                                         if ((bitno -= 6) < 0) {
1613                                                 putc((char) *b1, ce->ce_fp);
1614                                                 if (skip < 2) {
1615                                                         putc((char) *b2, ce->ce_fp);
1616                                                         if (skip < 1) {
1617                                                                 putc((char) *b3, ce->ce_fp);
1618                                                         }
1619                                                 }
1620
1621                                                 if (ferror(ce->ce_fp)) {
1622                                                         content_error(ce->ce_file, ct,
1623                                                                                    "error writing to");
1624                                                         goto clean_up;
1625                                                 }
1626                                                 bitno = 18, bits = 0L, skip = 0;
1627                                         }
1628                                         break;
1629
1630                                 case '=':
1631                                         if (++skip > 3)
1632                                                 goto self_delimiting;
1633                                         goto test_end;
1634                                 }
1635                         }
1636                 }
1637         }
1638
1639         if (bitno != 18) {
1640                 if (debugsw)
1641                         fprintf(stderr, "premature ending (bitno %d)\n",
1642                                         bitno);
1643
1644                 content_error(NULL, ct, "invalid BASE64 encoding");
1645                 goto clean_up;
1646         }
1647
1648 self_delimiting:
1649         fseek(ct->c_fp, 0L, SEEK_SET);
1650
1651         if (fflush(ce->ce_fp)) {
1652                 content_error(ce->ce_file, ct, "error writing to");
1653                 goto clean_up;
1654         }
1655
1656         fseek(ce->ce_fp, 0L, SEEK_SET);
1657
1658 ready_to_go:
1659         *file = ce->ce_file;
1660         if (own_ct_fp) {
1661                 fclose(ct->c_fp);
1662                 ct->c_fp = NULL;
1663         }
1664         return fileno(ce->ce_fp);
1665
1666 clean_up:
1667         free_encoding(ct, 0);
1668         if (own_ct_fp) {
1669                 fclose(ct->c_fp);
1670                 ct->c_fp = NULL;
1671         }
1672         return NOTOK;
1673 }
1674
1675
1676 /*
1677 ** QUOTED PRINTABLE
1678 */
1679
1680 static char hex2nib[0x80] = {
1681         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1688         0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689         0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1690         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693         0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1694         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1697 };
1698
1699
1700 static int
1701 InitQuoted(CT ct)
1702 {
1703         return init_encoding(ct, openQuoted);
1704 }
1705
1706
1707 static int
1708 openQuoted(CT ct, char **file)
1709 {
1710         int cc, len, quoted, own_ct_fp = 0;
1711         unsigned char *cp, *ep;
1712         char buffer[BUFSIZ];
1713         unsigned char mask = 0;
1714         CE ce;
1715         /* sbeck -- handle suffixes */
1716         CI ci;
1717
1718         ce = ct->c_cefile;
1719         if (ce->ce_fp) {
1720                 fseek(ce->ce_fp, 0L, SEEK_SET);
1721                 goto ready_to_go;
1722         }
1723
1724         if (ce->ce_file) {
1725                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1726                         content_error(ce->ce_file, ct,
1727                                         "unable to fopen for reading");
1728                         return NOTOK;
1729                 }
1730                 goto ready_to_go;
1731         }
1732
1733         if (*file == NULL) {
1734                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1735                 ce->ce_unlink = 1;
1736         } else {
1737                 ce->ce_file = getcpy(*file);
1738                 ce->ce_unlink = 0;
1739         }
1740
1741         /* sbeck@cise.ufl.edu -- handle suffixes */
1742         ci = &ct->c_ctinfo;
1743         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1744                         invo_name, ci->ci_type, ci->ci_subtype);
1745         cp = context_find(buffer);
1746         if (cp == NULL || *cp == '\0') {
1747                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1748                                 ci->ci_type);
1749                 cp = context_find(buffer);
1750         }
1751         if (cp != NULL && *cp != '\0') {
1752                 if (ce->ce_unlink) {
1753                         /*
1754                         ** Temporary file already exists, so we rename to
1755                         ** version with extension.
1756                         */
1757                         char *file_org = strdup(ce->ce_file);
1758                         ce->ce_file = add(cp, ce->ce_file);
1759                         if (rename(file_org, ce->ce_file)) {
1760                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1761                                                 file_org);
1762                         }
1763                         free(file_org);
1764
1765                 } else {
1766                         ce->ce_file = add(cp, ce->ce_file);
1767                 }
1768         }
1769
1770         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1771                 content_error(ce->ce_file, ct,
1772                                 "unable to fopen for reading/writing");
1773                 return NOTOK;
1774         }
1775
1776         if ((len = ct->c_end - ct->c_begin) < 0)
1777                 adios(EX_SOFTWARE, NULL, "internal error(2)");
1778
1779         if (!ct->c_fp) {
1780                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1781                         content_error(ct->c_file, ct,
1782                                         "unable to open for reading");
1783                         return NOTOK;
1784                 }
1785                 own_ct_fp = 1;
1786         }
1787
1788         quoted = 0;
1789
1790         fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1791         while (len > 0) {
1792                 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1793                         content_error(NULL, ct, "premature eof");
1794                         goto clean_up;
1795                 }
1796
1797                 if ((cc = strlen(buffer)) > len)
1798                         cc = len;
1799                 len -= cc;
1800
1801                 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1802                         if (!isspace(*ep))
1803                                 break;
1804                 *++ep = '\n', ep++;
1805
1806                 for (; cp < ep; cp++) {
1807                         if (quoted > 0) {
1808                                 /* in an escape sequence */
1809                                 if (quoted == 1) {
1810                                         /* at byte 1 of an escape sequence */
1811                                         mask = hex2nib[*cp & 0x7f];
1812                                         /* next is byte 2 */
1813                                         quoted = 2;
1814                                 } else {
1815                                         /* at byte 2 of an escape sequence */
1816                                         mask <<= 4;
1817                                         mask |= hex2nib[*cp & 0x7f];
1818                                         putc(mask, ce->ce_fp);
1819                                         if (ferror(ce->ce_fp)) {
1820                                                 content_error(ce->ce_file, ct, "error writing to");
1821                                                 goto clean_up;
1822                                         }
1823                                         /*
1824                                         ** finished escape sequence; next may
1825                                         ** be literal or a new escape sequence
1826                                         */
1827                                         quoted = 0;
1828                                 }
1829                                 /* on to next byte */
1830                                 continue;
1831                         }
1832
1833                         /* not in an escape sequence */
1834                         if (*cp == '=') {
1835                                 /*
1836                                 ** starting an escape sequence,
1837                                 ** or invalid '='?
1838                                 */
1839                                 if (cp + 1 < ep && cp[1] == '\n') {
1840                                         /* "=\n" soft line break, eat the \n */
1841                                         cp++;
1842                                         continue;
1843                                 }
1844                                 if (cp + 1 >= ep || cp + 2 >= ep) {
1845                                         /*
1846                                         ** We don't have 2 bytes left,
1847                                         ** so this is an invalid escape
1848                                         ** sequence; just show the raw bytes
1849                                         ** (below).
1850                                         */
1851                                 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1852                                         /*
1853                                         ** Next 2 bytes are hex digits,
1854                                         ** making this a valid escape
1855                                         ** sequence; let's decode it (above).
1856                                         */
1857                                         quoted = 1;
1858                                         continue;
1859                                 } else {
1860                                         /*
1861                                         ** One or both of the next 2 is
1862                                         ** out of range, making this an
1863                                         ** invalid escape sequence; just
1864                                         ** show the raw bytes (below).
1865                                         */
1866                                 }
1867                         }
1868
1869                         /* Just show the raw byte. */
1870                         putc(*cp, ce->ce_fp);
1871                         if (ferror(ce->ce_fp)) {
1872                                 content_error(ce->ce_file, ct,
1873                                                 "error writing to");
1874                                 goto clean_up;
1875                         }
1876                 }
1877         }
1878         if (quoted) {
1879                 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1880                 goto clean_up;
1881         }
1882
1883         fseek(ct->c_fp, 0L, SEEK_SET);
1884
1885         if (fflush(ce->ce_fp)) {
1886                 content_error(ce->ce_file, ct, "error writing to");
1887                 goto clean_up;
1888         }
1889
1890         fseek(ce->ce_fp, 0L, SEEK_SET);
1891
1892 ready_to_go:
1893         *file = ce->ce_file;
1894         if (own_ct_fp) {
1895                 fclose(ct->c_fp);
1896                 ct->c_fp = NULL;
1897         }
1898         return fileno(ce->ce_fp);
1899
1900 clean_up:
1901         free_encoding(ct, 0);
1902         if (own_ct_fp) {
1903                 fclose(ct->c_fp);
1904                 ct->c_fp = NULL;
1905         }
1906         return NOTOK;
1907 }
1908
1909
1910 /*
1911 ** 7BIT
1912 */
1913
1914 static int
1915 Init7Bit(CT ct)
1916 {
1917         if (init_encoding(ct, open7Bit) == NOTOK)
1918                 return NOTOK;
1919
1920         ct->c_cesizefnx = NULL;  /* no need to decode for real size */
1921         return OK;
1922 }
1923
1924
1925 int
1926 open7Bit(CT ct, char **file)
1927 {
1928         int cc, fd, len, own_ct_fp = 0;
1929         char buffer[BUFSIZ];
1930         /* sbeck -- handle suffixes */
1931         char *cp;
1932         CI ci;
1933         CE ce;
1934
1935         ce = ct->c_cefile;
1936         if (ce->ce_fp) {
1937                 fseek(ce->ce_fp, 0L, SEEK_SET);
1938                 goto ready_to_go;
1939         }
1940
1941         if (ce->ce_file) {
1942                 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1943                         content_error(ce->ce_file, ct,
1944                                         "unable to fopen for reading");
1945                         return NOTOK;
1946                 }
1947                 goto ready_to_go;
1948         }
1949
1950         if (*file == NULL) {
1951                 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1952                 ce->ce_unlink = 1;
1953         } else {
1954                 ce->ce_file = getcpy(*file);
1955                 ce->ce_unlink = 0;
1956         }
1957
1958         /* sbeck@cise.ufl.edu -- handle suffixes */
1959         ci = &ct->c_ctinfo;
1960         snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1961                         invo_name, ci->ci_type, ci->ci_subtype);
1962         cp = context_find(buffer);
1963         if (cp == NULL || *cp == '\0') {
1964                 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1965                                 ci->ci_type);
1966                 cp = context_find(buffer);
1967         }
1968         if (cp != NULL && *cp != '\0') {
1969                 if (ce->ce_unlink) {
1970                         /*
1971                         ** Temporary file already exists, so we rename to
1972                         ** version with extension.
1973                         */
1974                         char *file_org = strdup(ce->ce_file);
1975                         ce->ce_file = add(cp, ce->ce_file);
1976                         if (rename(file_org, ce->ce_file)) {
1977                                 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1978                                                 file_org);
1979                         }
1980                         free(file_org);
1981
1982                 } else {
1983                         ce->ce_file = add(cp, ce->ce_file);
1984                 }
1985         }
1986
1987         if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1988                 content_error(ce->ce_file, ct,
1989                                 "unable to fopen for reading/writing");
1990                 return NOTOK;
1991         }
1992
1993         if (ct->c_type == CT_MULTIPART) {
1994                 char **ap, **ep;
1995                 CI ci = &ct->c_ctinfo;
1996
1997                 len = 0;
1998                 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1999                                 ci->ci_subtype);
2000                 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2001                                 strlen(ci->ci_subtype);
2002                 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2003                         putc(';', ce->ce_fp);
2004                         len++;
2005
2006                         snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2007                                         *ap, *ep);
2008
2009                         if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2010                                 fputs("\n\t", ce->ce_fp);
2011                                 len = 8;
2012                         } else {
2013                                 putc(' ', ce->ce_fp);
2014                                 len++;
2015                         }
2016                         fprintf(ce->ce_fp, "%s", buffer);
2017                         len += cc;
2018                 }
2019
2020                 if (ci->ci_comment) {
2021                         if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2022                                                 >= CPERLIN) {
2023                                 fputs("\n\t", ce->ce_fp);
2024                                 len = 8;
2025                         } else {
2026                                 putc(' ', ce->ce_fp);
2027                                 len++;
2028                         }
2029                         fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2030                         len += cc;
2031                 }
2032                 fprintf(ce->ce_fp, "\n");
2033                 if (ct->c_id)
2034                         fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2035                 if (ct->c_descr)
2036                         fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2037                 if (ct->c_dispo)
2038                         fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2039                 fprintf(ce->ce_fp, "\n");
2040         }
2041
2042         if ((len = ct->c_end - ct->c_begin) < 0)
2043                 adios(EX_SOFTWARE, NULL, "internal error(3)");
2044
2045         if (!ct->c_fp) {
2046                 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2047                         content_error(ct->c_file, ct,
2048                                         "unable to open for reading");
2049                         return NOTOK;
2050                 }
2051                 own_ct_fp = 1;
2052         }
2053
2054         lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2055         while (len > 0)
2056                 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2057                 case NOTOK:
2058                         content_error(ct->c_file, ct, "error reading from");
2059                         goto clean_up;
2060
2061                 case OK:
2062                         content_error(NULL, ct, "premature eof");
2063                         goto clean_up;
2064
2065                 default:
2066                         if (cc > len)
2067                                 cc = len;
2068                         len -= cc;
2069
2070                         fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2071                         if (ferror(ce->ce_fp)) {
2072                                 content_error(ce->ce_file, ct,
2073                                                 "error writing to");
2074                                 goto clean_up;
2075                         }
2076                 }
2077
2078         fseek(ct->c_fp, 0L, SEEK_SET);
2079
2080         if (fflush(ce->ce_fp)) {
2081                 content_error(ce->ce_file, ct, "error writing to");
2082                 goto clean_up;
2083         }
2084
2085         fseek(ce->ce_fp, 0L, SEEK_SET);
2086
2087 ready_to_go:
2088         *file = ce->ce_file;
2089         if (own_ct_fp) {
2090                 fclose(ct->c_fp);
2091                 ct->c_fp = NULL;
2092         }
2093         return fileno(ce->ce_fp);
2094
2095 clean_up:
2096         free_encoding(ct, 0);
2097         if (own_ct_fp) {
2098                 fclose(ct->c_fp);
2099                 ct->c_fp = NULL;
2100         }
2101         return NOTOK;
2102 }