2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
259 /* add the header data to the list */
260 add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
262 ct->c_begin = ftell(in) + 1;
266 ct->c_begin = ftell(in) - strlen(f.value);
270 ct->c_begin = ftell(in);
274 advise(NULL, "message format error in component #%d", compnum);
279 adios(EX_IOERR, "m_getfld2", "io error");
282 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
288 ** Read the content headers. We will parse the
289 ** MIME related header fields into their various
290 ** structures and set internal flags related to
291 ** content type/subtype, etc.
294 hp = ct->c_first_hf; /* start at first header field */
296 /* Get MIME-Version field */
297 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
300 unsigned char *cp, *dp;
303 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
306 ct->c_vrsn = mh_xstrdup(hp->value);
308 /* Now, cleanup this field */
313 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
315 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
320 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
322 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
325 for (dp = cp; istoken(*dp); dp++)
329 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
332 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
335 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
336 /* Get Content-Type field */
337 struct str2init *s2i;
338 CI ci = &ct->c_ctinfo;
340 /* Check if we've already seen a Content-Type header */
342 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
346 /* Parse the Content-Type field */
347 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
351 ** Set the Init function and the internal
352 ** flag for this content type.
354 for (s2i = str2cts; s2i->si_key; s2i++)
355 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
357 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
359 ct->c_type = s2i->si_val;
360 ct->c_ctinitfnx = s2i->si_init;
362 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
363 /* Get Content-Transfer-Encoding field */
365 unsigned char *cp, *dp;
366 struct str2init *s2i;
369 ** Check if we've already seen the
370 ** Content-Transfer-Encoding field
373 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
377 /* get copy of this field */
378 ct->c_celine = cp = mh_xstrdup(hp->value);
382 for (dp = cp; istoken(*dp); dp++)
388 ** Find the internal flag and Init function
389 ** for this transfer encoding.
391 for (s2i = str2ces; s2i->si_key; s2i++)
392 if (!mh_strcasecmp(cp, s2i->si_key))
394 if (!s2i->si_key && !uprf(cp, "X-"))
397 ct->c_encoding = s2i->si_val;
399 /* Call the Init function for this encoding */
400 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
403 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
404 /* Get Content-ID field */
405 ct->c_id = add(hp->value, ct->c_id);
407 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
408 /* Get Content-Description field */
409 ct->c_descr = add(hp->value, ct->c_descr);
411 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
412 /* Get Content-Disposition field */
413 ct->c_dispo = add(hp->value, ct->c_dispo);
417 hp = hp->next; /* next header field */
421 ** Check if we saw a Content-Type field.
422 ** If not, then assign a default value for
423 ** it, and the Init function.
427 ** If we are inside a multipart/digest message,
428 ** so default type is message/rfc822
431 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
433 ct->c_type = CT_MESSAGE;
434 ct->c_ctinitfnx = InitMessage;
437 ** Else default type is text/plain
439 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
441 ct->c_type = CT_TEXT;
442 ct->c_ctinitfnx = InitText;
446 /* Use default Transfer-Encoding, if necessary */
448 ct->c_encoding = CE_7BIT;
461 ** small routine to add header field to list
465 add_header(CT ct, char *name, char *value)
469 /* allocate header field structure */
470 hp = mh_xcalloc(1, sizeof(*hp));
472 /* link data into header structure */
477 /* link header structure into the list */
478 if (ct->c_first_hf == NULL) {
479 ct->c_first_hf = hp; /* this is the first */
482 ct->c_last_hf->next = hp; /* add it to the end */
491 ** Make sure that buf contains at least one appearance of name,
492 ** followed by =. If not, insert both name and value, just after
493 ** first semicolon, if any. Note that name should not contain a
494 ** trailing =. And quotes will be added around the value. Typical
495 ** usage: make sure that a Content-Disposition header contains
496 ** filename="foo". If it doesn't and value does, use value from
500 incl_name_value(unsigned char *buf, char *name, char *value) {
503 /* Assume that name is non-null. */
505 char *name_plus_equal = concat(name, "=", NULL);
507 if (!strstr(buf, name_plus_equal)) {
510 char *prefix, *suffix;
512 /* Trim trailing space, esp. newline. */
513 for (cp = &buf[strlen(buf) - 1];
514 cp >= buf && isspace(*cp); --cp) {
518 insertion = concat("; ", name, "=", "\"", value, "\"",
522 ** Insert at first semicolon, if any.
523 ** If none, append to end.
525 prefix = mh_xstrdup(buf);
526 if ((cp = strchr(prefix, ';'))) {
527 suffix = concat(cp, NULL);
529 newbuf = concat(prefix, insertion, suffix,
534 newbuf = concat(buf, insertion, "\n", NULL);
538 mh_free0(&insertion);
542 mh_free0(&name_plus_equal);
549 ** Extract just name_suffix="foo", if any, from value. If there isn't
550 ** one, return the entire value. Note that, for example, a name_suffix
551 ** of name will match filename="foo", and return foo.
554 extract_name_value(char *name_suffix, char *value) {
555 char *extracted_name_value = value;
556 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
557 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
560 mh_free0(&name_suffix_plus_quote);
561 if (name_suffix_equals) {
562 char *name_suffix_begin;
565 for (cp = name_suffix_equals; *cp != '"'; ++cp)
567 name_suffix_begin = ++cp;
568 /* Find second \". */
569 for (; *cp != '"'; ++cp)
572 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
573 memcpy(extracted_name_value, name_suffix_begin,
574 cp - name_suffix_begin);
575 extracted_name_value[cp - name_suffix_begin] = '\0';
578 return extracted_name_value;
582 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
583 ** directives. Fills in the information of the CTinfo structure.
586 get_ctinfo(unsigned char *cp, CT ct, int magic)
595 i = strlen(invo_name) + 2;
597 /* store copy of Content-Type line */
598 cp = ct->c_ctline = mh_xstrdup(cp);
600 while (isspace(*cp)) /* trim leading spaces */
603 /* change newlines to spaces */
604 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
607 /* trim trailing spaces */
608 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
614 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
616 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
619 for (dp = cp; istoken(*dp); dp++)
622 ci->ci_type = mh_xstrdup(cp); /* store content type */
626 advise(NULL, "invalid %s: field in message %s (empty type)",
627 TYPE_FIELD, ct->c_file);
631 /* down case the content type string */
632 for (dp = ci->ci_type; *dp; dp++)
633 if (isalpha(*dp) && isupper(*dp))
639 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
644 ci->ci_subtype = mh_xstrdup("");
652 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
655 for (dp = cp; istoken(*dp); dp++)
658 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
661 if (!*ci->ci_subtype) {
662 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
666 /* down case the content subtype string */
667 for (dp = ci->ci_subtype; *dp; dp++)
668 if (isalpha(*dp) && isupper(*dp))
675 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
679 ** Parse attribute/value pairs given with Content-Type
681 ep = (ap = ci->ci_attrs) + NPARMS;
687 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
695 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
699 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
703 /* down case the attribute name */
704 for (dp = cp; istoken(*dp); dp++)
705 if (isalpha(*dp) && isupper(*dp))
708 for (up = dp; isspace(*dp);)
710 if (dp == cp || *dp != '=') {
711 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
715 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
717 for (dp++; isspace(*dp);)
720 /* now add the attribute value */
721 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
724 for (cp = ++dp, dp = vp;;) {
728 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
733 if ((c = *cp++) == '\0')
748 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
753 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
754 *ci->ci_values[ap - ci->ci_attrs] = '\0';
755 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
763 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
768 ** Get any <Content-Id> given in buffer
770 if (magic && *cp == '<') {
772 mh_free0(&(ct->c_id));
774 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
775 advise(NULL, "invalid ID in message %s", ct->c_file);
781 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
792 ** Get any [Content-Description] given in buffer.
794 if (magic && *cp == '[') {
796 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
800 advise(NULL, "invalid description in message %s",
809 ct->c_descr = concat(ct->c_descr, "\n", NULL);
820 ** Get any {Content-Disposition} given in buffer.
822 if (magic && *cp == '{') {
824 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
828 advise(NULL, "invalid disposition in message %s",
837 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
848 ** Check if anything is left over
852 ci->ci_magic = mh_xstrdup(cp);
855 ** If there is a Content-Disposition header and
856 ** it doesn't have a *filename=, extract it from
857 ** the magic contents. The mhbasename call skips
858 ** any leading directory components.
861 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
863 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
871 get_comment(CT ct, unsigned char **ap, int istype)
876 char c, buffer[BUFSIZ], *dp;
888 advise(NULL, "invalid comment in message %s's %s: field",
889 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
894 if ((c = *cp++) == '\0')
917 if ((dp = ci->ci_comment)) {
918 ci->ci_comment = concat(dp, " ", buffer, NULL);
921 ci->ci_comment = mh_xstrdup(buffer);
936 ** Handles content types audio, image, and video.
937 ** There's not much to do right here.
943 return OK; /* not much to do here */
957 CI ci = &ct->c_ctinfo;
959 /* check for missing subtype */
960 if (!*ci->ci_subtype)
961 ci->ci_subtype = add("plain", ci->ci_subtype);
964 for (kv = SubText; kv->kv_key; kv++)
965 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
967 ct->c_subtype = kv->kv_value;
969 /* allocate text character set structure */
970 t = mh_xcalloc(1, sizeof(*t));
971 ct->c_ctparams = (void *) t;
973 /* scan for charset parameter */
974 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
975 if (!mh_strcasecmp(*ap, "charset"))
978 /* check if content specified a character set */
981 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
982 /* match character set or set to CHARSET_UNKNOWN */
983 for (kv = Charset; kv->kv_key; kv++) {
984 if (!mh_strcasecmp(*ep, kv->kv_key)) {
988 t->tx_charset = kv->kv_value;
990 t->tx_charset = CHARSET_UNSPECIFIED;
1002 InitMultiPart(CT ct)
1006 unsigned char *cp, *dp;
1008 char *bp, buffer[BUFSIZ];
1009 struct multipart *m;
1011 struct part *part, **next;
1012 CI ci = &ct->c_ctinfo;
1017 ** The encoding for multipart messages must be either
1018 ** 7bit, 8bit, or binary (per RFC2045).
1020 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1021 && ct->c_encoding != CE_BINARY) {
1022 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1023 ct->c_encoding = CE_7BIT;
1027 for (kv = SubMultiPart; kv->kv_key; kv++)
1028 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1030 ct->c_subtype = kv->kv_value;
1033 ** Check for "boundary" parameter, which is
1034 ** required for multipart messages.
1037 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1038 if (!mh_strcasecmp(*ap, "boundary")) {
1044 /* complain if boundary parameter is missing */
1046 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1050 /* allocate primary structure for multipart info */
1051 m = mh_xcalloc(1, sizeof(*m));
1052 ct->c_ctparams = (void *) m;
1054 /* check if boundary parameter contains only whitespace characters */
1055 for (cp = bp; isspace(*cp); cp++)
1058 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1062 /* remove trailing whitespace from boundary parameter */
1063 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1068 /* record boundary separators */
1069 m->mp_start = concat(bp, "\n", NULL);
1070 m->mp_stop = concat(bp, "--\n", NULL);
1072 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1073 advise(ct->c_file, "unable to open for reading");
1077 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1079 next = &m->mp_parts;
1083 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1087 pos += strlen(buffer);
1088 if (buffer[0] != '-' || buffer[1] != '-')
1091 if (strcmp(buffer + 2, m->mp_start)!=0)
1094 part = mh_xcalloc(1, sizeof(*part));
1096 next = &part->mp_next;
1098 if (!(p = get_content(fp, ct->c_file,
1099 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1106 fseek(fp, pos, SEEK_SET);
1109 if (strcmp(buffer + 2, m->mp_start) == 0) {
1113 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1114 if (p->c_end < p->c_begin)
1115 p->c_begin = p->c_end;
1120 if (strcmp(buffer + 2, m->mp_stop) == 0)
1126 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1127 if (!inout && part) {
1129 p->c_end = ct->c_end;
1131 if (p->c_begin >= p->c_end) {
1132 for (next = &m->mp_parts; *next != part;
1133 next = &((*next)->mp_next))
1142 /* reverse the order of the parts for multipart/alternative */
1143 if (ct->c_subtype == MULTI_ALTERNATE)
1147 ** label all subparts with part number, and
1148 ** then initialize the content of the subpart.
1153 char partnam[BUFSIZ];
1156 snprintf(partnam, sizeof(partnam), "%s.",
1158 pp = partnam + strlen(partnam);
1163 for (part = m->mp_parts, partnum = 1; part;
1164 part = part->mp_next, partnum++) {
1167 sprintf(pp, "%d", partnum);
1168 p->c_partno = mh_xstrdup(partnam);
1170 /* initialize the content of the subparts */
1171 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1186 ** reverse the order of the parts of a multipart
1190 reverse_parts(CT ct)
1193 struct multipart *m;
1194 struct part **base, **bmp, **next, *part;
1196 m = (struct multipart *) ct->c_ctparams;
1198 /* if only one part, just return */
1199 if (!m->mp_parts || !m->mp_parts->mp_next)
1202 /* count number of parts */
1204 for (part = m->mp_parts; part; part = part->mp_next)
1207 /* allocate array of pointers to the parts */
1208 base = mh_xcalloc(i + 1, sizeof(*base));
1211 /* point at all the parts */
1212 for (part = m->mp_parts; part; part = part->mp_next)
1216 /* reverse the order of the parts */
1217 next = &m->mp_parts;
1218 for (bmp--; bmp >= base; bmp--) {
1221 next = &part->mp_next;
1225 /* free array of pointers */
1238 CI ci = &ct->c_ctinfo;
1240 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1241 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1245 /* check for missing subtype */
1246 if (!*ci->ci_subtype)
1247 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1250 for (kv = SubMessage; kv->kv_key; kv++)
1251 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1253 ct->c_subtype = kv->kv_value;
1255 switch (ct->c_subtype) {
1256 case MESSAGE_RFC822:
1259 case MESSAGE_PARTIAL:
1264 p = mh_xcalloc(1, sizeof(*p));
1265 ct->c_ctparams = (void *) p;
1268 ** scan for parameters "id", "number",
1271 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1272 if (!mh_strcasecmp(*ap, "id")) {
1273 p->pm_partid = mh_xstrdup(*ep);
1276 if (!mh_strcasecmp(*ap, "number")) {
1277 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1279 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1284 if (!mh_strcasecmp(*ap, "total")) {
1285 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1292 if (!p->pm_partid || !p->pm_partno
1293 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1294 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1300 case MESSAGE_EXTERNAL:
1305 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1306 advise(ct->c_file, "unable to open for reading");
1310 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1312 if (!(p = get_content(fp, ct->c_file, 0))) {
1318 p->c_end = p->c_begin;
1323 switch (p->c_type) {
1328 if (p->c_subtype != MESSAGE_RFC822)
1333 (*p->c_ctinitfnx) (p);
1352 InitApplication(CT ct)
1355 CI ci = &ct->c_ctinfo;
1358 for (kv = SubApplication; kv->kv_key; kv++)
1359 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1361 ct->c_subtype = kv->kv_value;
1368 ** TRANSFER ENCODINGS
1372 init_encoding(CT ct, OpenCEFunc openfnx)
1376 ce = mh_xcalloc(1, sizeof(*ce));
1379 ct->c_ceopenfnx = openfnx;
1380 ct->c_ceclosefnx = close_encoding;
1381 ct->c_cesizefnx = size_encoding;
1388 close_encoding(CT ct)
1392 if (!(ce = ct->c_cefile))
1402 static unsigned long
1403 size_encoding(CT ct)
1411 if (!(ce = ct->c_cefile))
1412 return (ct->c_end - ct->c_begin);
1414 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1415 return (long) st.st_size;
1418 if (stat(ce->ce_file, &st) != NOTOK)
1419 return (long) st.st_size;
1424 if (ct->c_encoding == CE_EXTERNAL)
1425 return (ct->c_end - ct->c_begin);
1428 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1429 return (ct->c_end - ct->c_begin);
1431 if (fstat(fd, &st) != NOTOK)
1432 size = (long) st.st_size;
1436 (*ct->c_ceclosefnx) (ct);
1445 static unsigned char b642nib[0x80] = {
1446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1452 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1453 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1454 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1455 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1456 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1457 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1458 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1459 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1460 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1461 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1468 return init_encoding(ct, openBase64);
1473 openBase64(CT ct, char **file)
1476 int fd, len, skip, own_ct_fp = 0;
1478 unsigned char value, *b, *b1, *b2, *b3;
1479 unsigned char *cp, *ep;
1480 char buffer[BUFSIZ];
1481 /* sbeck -- handle suffixes */
1485 b = (unsigned char *) &bits;
1486 b1 = &b[endian > 0 ? 1 : 2];
1487 b2 = &b[endian > 0 ? 2 : 1];
1488 b3 = &b[endian > 0 ? 3 : 0];
1492 fseek(ce->ce_fp, 0L, SEEK_SET);
1497 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1498 content_error(ce->ce_file, ct,
1499 "unable to fopen for reading");
1505 if (*file == NULL) {
1506 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1509 ce->ce_file = mh_xstrdup(*file);
1513 /* sbeck@cise.ufl.edu -- handle suffixes */
1515 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1516 invo_name, ci->ci_type, ci->ci_subtype);
1517 cp = context_find(buffer);
1518 if (cp == NULL || *cp == '\0') {
1519 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1521 cp = context_find(buffer);
1523 if (cp != NULL && *cp != '\0') {
1524 if (ce->ce_unlink) {
1526 ** Temporary file already exists, so we rename to
1527 ** version with extension.
1529 char *file_org = mh_xstrdup(ce->ce_file);
1530 ce->ce_file = add(cp, ce->ce_file);
1531 if (rename(file_org, ce->ce_file)) {
1532 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1535 mh_free0(&file_org);
1538 ce->ce_file = add(cp, ce->ce_file);
1542 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1543 content_error(ce->ce_file, ct,
1544 "unable to fopen for reading/writing");
1548 if ((len = ct->c_end - ct->c_begin) < 0)
1549 adios(EX_SOFTWARE, NULL, "internal error(1)");
1552 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1553 content_error(ct->c_file, ct,
1554 "unable to open for reading");
1564 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1566 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1568 content_error(ct->c_file, ct, "error reading from");
1572 content_error(NULL, ct, "premature eof");
1580 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1585 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1587 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1589 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1593 bits |= value << bitno;
1595 if ((bitno -= 6) < 0) {
1596 putc((char) *b1, ce->ce_fp);
1598 putc((char) *b2, ce->ce_fp);
1600 putc((char) *b3, ce->ce_fp);
1604 if (ferror(ce->ce_fp)) {
1605 content_error(ce->ce_file, ct,
1606 "error writing to");
1609 bitno = 18, bits = 0L, skip = 0;
1615 goto self_delimiting;
1624 fprintf(stderr, "premature ending (bitno %d)\n",
1627 content_error(NULL, ct, "invalid BASE64 encoding");
1632 fseek(ct->c_fp, 0L, SEEK_SET);
1634 if (fflush(ce->ce_fp)) {
1635 content_error(ce->ce_file, ct, "error writing to");
1639 fseek(ce->ce_fp, 0L, SEEK_SET);
1642 *file = ce->ce_file;
1647 return fileno(ce->ce_fp);
1650 free_encoding(ct, 0);
1663 static char hex2nib[0x80] = {
1664 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1671 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1677 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1686 return init_encoding(ct, openQuoted);
1691 openQuoted(CT ct, char **file)
1693 int cc, len, quoted, own_ct_fp = 0;
1694 unsigned char *cp, *ep;
1695 char buffer[BUFSIZ];
1696 unsigned char mask = 0;
1698 /* sbeck -- handle suffixes */
1703 fseek(ce->ce_fp, 0L, SEEK_SET);
1708 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1709 content_error(ce->ce_file, ct,
1710 "unable to fopen for reading");
1716 if (*file == NULL) {
1717 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1720 ce->ce_file = mh_xstrdup(*file);
1724 /* sbeck@cise.ufl.edu -- handle suffixes */
1726 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1727 invo_name, ci->ci_type, ci->ci_subtype);
1728 cp = context_find(buffer);
1729 if (cp == NULL || *cp == '\0') {
1730 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1732 cp = context_find(buffer);
1734 if (cp != NULL && *cp != '\0') {
1735 if (ce->ce_unlink) {
1737 ** Temporary file already exists, so we rename to
1738 ** version with extension.
1740 char *file_org = mh_xstrdup(ce->ce_file);
1741 ce->ce_file = add(cp, ce->ce_file);
1742 if (rename(file_org, ce->ce_file)) {
1743 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1746 mh_free0(&file_org);
1749 ce->ce_file = add(cp, ce->ce_file);
1753 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1754 content_error(ce->ce_file, ct,
1755 "unable to fopen for reading/writing");
1759 if ((len = ct->c_end - ct->c_begin) < 0)
1760 adios(EX_SOFTWARE, NULL, "internal error(2)");
1763 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1764 content_error(ct->c_file, ct,
1765 "unable to open for reading");
1773 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1775 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1776 content_error(NULL, ct, "premature eof");
1780 if ((cc = strlen(buffer)) > len)
1784 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1789 for (; cp < ep; cp++) {
1791 /* in an escape sequence */
1793 /* at byte 1 of an escape sequence */
1794 mask = hex2nib[*cp & 0x7f];
1795 /* next is byte 2 */
1798 /* at byte 2 of an escape sequence */
1800 mask |= hex2nib[*cp & 0x7f];
1801 putc(mask, ce->ce_fp);
1802 if (ferror(ce->ce_fp)) {
1803 content_error(ce->ce_file, ct, "error writing to");
1807 ** finished escape sequence; next may
1808 ** be literal or a new escape sequence
1812 /* on to next byte */
1816 /* not in an escape sequence */
1819 ** starting an escape sequence,
1822 if (cp + 1 < ep && cp[1] == '\n') {
1823 /* "=\n" soft line break, eat the \n */
1827 if (cp + 1 >= ep || cp + 2 >= ep) {
1829 ** We don't have 2 bytes left,
1830 ** so this is an invalid escape
1831 ** sequence; just show the raw bytes
1834 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1836 ** Next 2 bytes are hex digits,
1837 ** making this a valid escape
1838 ** sequence; let's decode it (above).
1844 ** One or both of the next 2 is
1845 ** out of range, making this an
1846 ** invalid escape sequence; just
1847 ** show the raw bytes (below).
1852 /* Just show the raw byte. */
1853 putc(*cp, ce->ce_fp);
1854 if (ferror(ce->ce_fp)) {
1855 content_error(ce->ce_file, ct,
1856 "error writing to");
1862 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1866 fseek(ct->c_fp, 0L, SEEK_SET);
1868 if (fflush(ce->ce_fp)) {
1869 content_error(ce->ce_file, ct, "error writing to");
1873 fseek(ce->ce_fp, 0L, SEEK_SET);
1876 *file = ce->ce_file;
1881 return fileno(ce->ce_fp);
1884 free_encoding(ct, 0);
1900 if (init_encoding(ct, open7Bit) == NOTOK)
1903 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1909 open7Bit(CT ct, char **file)
1911 int cc, fd, len, own_ct_fp = 0;
1912 char buffer[BUFSIZ];
1913 /* sbeck -- handle suffixes */
1920 fseek(ce->ce_fp, 0L, SEEK_SET);
1925 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1926 content_error(ce->ce_file, ct,
1927 "unable to fopen for reading");
1933 if (*file == NULL) {
1934 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1937 ce->ce_file = mh_xstrdup(*file);
1941 /* sbeck@cise.ufl.edu -- handle suffixes */
1943 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1944 invo_name, ci->ci_type, ci->ci_subtype);
1945 cp = context_find(buffer);
1946 if (cp == NULL || *cp == '\0') {
1947 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1949 cp = context_find(buffer);
1951 if (cp != NULL && *cp != '\0') {
1952 if (ce->ce_unlink) {
1954 ** Temporary file already exists, so we rename to
1955 ** version with extension.
1957 char *file_org = mh_xstrdup(ce->ce_file);
1958 ce->ce_file = add(cp, ce->ce_file);
1959 if (rename(file_org, ce->ce_file)) {
1960 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1963 mh_free0(&file_org);
1966 ce->ce_file = add(cp, ce->ce_file);
1970 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1971 content_error(ce->ce_file, ct,
1972 "unable to fopen for reading/writing");
1976 if (ct->c_type == CT_MULTIPART) {
1978 CI ci = &ct->c_ctinfo;
1981 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1983 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1984 strlen(ci->ci_subtype);
1985 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1986 putc(';', ce->ce_fp);
1989 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1992 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1993 fputs("\n\t", ce->ce_fp);
1996 putc(' ', ce->ce_fp);
1999 fprintf(ce->ce_fp, "%s", buffer);
2003 if (ci->ci_comment) {
2004 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2006 fputs("\n\t", ce->ce_fp);
2009 putc(' ', ce->ce_fp);
2012 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2015 fprintf(ce->ce_fp, "\n");
2017 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2019 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2021 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2022 fprintf(ce->ce_fp, "\n");
2025 if ((len = ct->c_end - ct->c_begin) < 0)
2026 adios(EX_SOFTWARE, NULL, "internal error(3)");
2029 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2030 content_error(ct->c_file, ct,
2031 "unable to open for reading");
2037 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2039 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2041 content_error(ct->c_file, ct, "error reading from");
2045 content_error(NULL, ct, "premature eof");
2053 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2054 if (ferror(ce->ce_fp)) {
2055 content_error(ce->ce_file, ct,
2056 "error writing to");
2061 fseek(ct->c_fp, 0L, SEEK_SET);
2063 if (fflush(ce->ce_fp)) {
2064 content_error(ce->ce_file, ct, "error writing to");
2068 fseek(ce->ce_fp, 0L, SEEK_SET);
2071 *file = ce->ce_file;
2076 return fileno(ce->ce_fp);
2079 free_encoding(ct, 0);