2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
254 advise(NULL, "To long field");
260 /* add the header data to the list */
261 add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
263 ct->c_begin = ftell(in) + 1;
267 ct->c_begin = ftell(in) - strlen(f.value);
271 ct->c_begin = ftell(in);
275 advise(NULL, "message format error in component #%d", compnum);
280 adios(EX_IOERR, "m_getfld2", "io error");
283 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
289 ** Read the content headers. We will parse the
290 ** MIME related header fields into their various
291 ** structures and set internal flags related to
292 ** content type/subtype, etc.
295 hp = ct->c_first_hf; /* start at first header field */
297 /* Get MIME-Version field */
298 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
301 unsigned char *cp, *dp;
304 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
307 ct->c_vrsn = mh_xstrdup(hp->value);
309 /* Now, cleanup this field */
314 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
316 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
321 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
323 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
326 for (dp = cp; istoken(*dp); dp++)
330 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
333 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
336 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
337 /* Get Content-Type field */
338 struct str2init *s2i;
339 CI ci = &ct->c_ctinfo;
341 /* Check if we've already seen a Content-Type header */
343 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
347 /* Parse the Content-Type field */
348 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
352 ** Set the Init function and the internal
353 ** flag for this content type.
355 for (s2i = str2cts; s2i->si_key; s2i++)
356 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
358 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
360 ct->c_type = s2i->si_val;
361 ct->c_ctinitfnx = s2i->si_init;
363 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
364 /* Get Content-Transfer-Encoding field */
366 unsigned char *cp, *dp;
367 struct str2init *s2i;
370 ** Check if we've already seen the
371 ** Content-Transfer-Encoding field
374 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
378 /* get copy of this field */
379 ct->c_celine = cp = mh_xstrdup(hp->value);
383 for (dp = cp; istoken(*dp); dp++)
389 ** Find the internal flag and Init function
390 ** for this transfer encoding.
392 for (s2i = str2ces; s2i->si_key; s2i++)
393 if (!mh_strcasecmp(cp, s2i->si_key))
395 if (!s2i->si_key && !uprf(cp, "X-"))
398 ct->c_encoding = s2i->si_val;
400 /* Call the Init function for this encoding */
401 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
404 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
405 /* Get Content-ID field */
406 ct->c_id = add(hp->value, ct->c_id);
408 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
409 /* Get Content-Description field */
410 ct->c_descr = add(hp->value, ct->c_descr);
412 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
413 /* Get Content-Disposition field */
414 ct->c_dispo = add(hp->value, ct->c_dispo);
418 hp = hp->next; /* next header field */
422 ** Check if we saw a Content-Type field.
423 ** If not, then assign a default value for
424 ** it, and the Init function.
428 ** If we are inside a multipart/digest message,
429 ** so default type is message/rfc822
432 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
434 ct->c_type = CT_MESSAGE;
435 ct->c_ctinitfnx = InitMessage;
438 ** Else default type is text/plain
440 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
442 ct->c_type = CT_TEXT;
443 ct->c_ctinitfnx = InitText;
447 /* Use default Transfer-Encoding, if necessary */
449 ct->c_encoding = CE_7BIT;
462 ** small routine to add header field to list
466 add_header(CT ct, char *name, char *value)
470 /* allocate header field structure */
471 hp = mh_xcalloc(1, sizeof(*hp));
473 /* link data into header structure */
478 /* link header structure into the list */
479 if (ct->c_first_hf == NULL) {
480 ct->c_first_hf = hp; /* this is the first */
483 ct->c_last_hf->next = hp; /* add it to the end */
492 ** Make sure that buf contains at least one appearance of name,
493 ** followed by =. If not, insert both name and value, just after
494 ** first semicolon, if any. Note that name should not contain a
495 ** trailing =. And quotes will be added around the value. Typical
496 ** usage: make sure that a Content-Disposition header contains
497 ** filename="foo". If it doesn't and value does, use value from
501 incl_name_value(unsigned char *buf, char *name, char *value) {
504 /* Assume that name is non-null. */
506 char *name_plus_equal = concat(name, "=", NULL);
508 if (!strstr(buf, name_plus_equal)) {
511 char *prefix, *suffix;
513 /* Trim trailing space, esp. newline. */
514 for (cp = &buf[strlen(buf) - 1];
515 cp >= buf && isspace(*cp); --cp) {
519 insertion = concat("; ", name, "=", "\"", value, "\"",
523 ** Insert at first semicolon, if any.
524 ** If none, append to end.
526 prefix = mh_xstrdup(buf);
527 if ((cp = strchr(prefix, ';'))) {
528 suffix = concat(cp, NULL);
530 newbuf = concat(prefix, insertion, suffix,
535 newbuf = concat(buf, insertion, "\n", NULL);
539 mh_free0(&insertion);
543 mh_free0(&name_plus_equal);
550 ** Extract just name_suffix="foo", if any, from value. If there isn't
551 ** one, return the entire value. Note that, for example, a name_suffix
552 ** of name will match filename="foo", and return foo.
555 extract_name_value(char *name_suffix, char *value) {
556 char *extracted_name_value = value;
557 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
558 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
561 mh_free0(&name_suffix_plus_quote);
562 if (name_suffix_equals) {
563 char *name_suffix_begin;
566 for (cp = name_suffix_equals; *cp != '"'; ++cp)
568 name_suffix_begin = ++cp;
569 /* Find second \". */
570 for (; *cp != '"'; ++cp)
573 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
574 memcpy(extracted_name_value, name_suffix_begin,
575 cp - name_suffix_begin);
576 extracted_name_value[cp - name_suffix_begin] = '\0';
579 return extracted_name_value;
583 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
584 ** directives. Fills in the information of the CTinfo structure.
587 get_ctinfo(unsigned char *cp, CT ct, int magic)
596 i = strlen(invo_name) + 2;
598 /* store copy of Content-Type line */
599 cp = ct->c_ctline = mh_xstrdup(cp);
601 while (isspace(*cp)) /* trim leading spaces */
604 /* change newlines to spaces */
605 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
608 /* trim trailing spaces */
609 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
615 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
617 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
620 for (dp = cp; istoken(*dp); dp++)
623 ci->ci_type = mh_xstrdup(cp); /* store content type */
627 advise(NULL, "invalid %s: field in message %s (empty type)",
628 TYPE_FIELD, ct->c_file);
632 /* down case the content type string */
633 for (dp = ci->ci_type; *dp; dp++)
634 if (isalpha(*dp) && isupper(*dp))
640 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
645 ci->ci_subtype = mh_xstrdup("");
653 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
656 for (dp = cp; istoken(*dp); dp++)
659 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
662 if (!*ci->ci_subtype) {
663 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
667 /* down case the content subtype string */
668 for (dp = ci->ci_subtype; *dp; dp++)
669 if (isalpha(*dp) && isupper(*dp))
676 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
680 ** Parse attribute/value pairs given with Content-Type
682 ep = (ap = ci->ci_attrs) + NPARMS;
688 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
696 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
700 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
704 /* down case the attribute name */
705 for (dp = cp; istoken(*dp); dp++)
706 if (isalpha(*dp) && isupper(*dp))
709 for (up = dp; isspace(*dp);)
711 if (dp == cp || *dp != '=') {
712 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
716 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
718 for (dp++; isspace(*dp);)
721 /* now add the attribute value */
722 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
725 for (cp = ++dp, dp = vp;;) {
729 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
734 if ((c = *cp++) == '\0')
749 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
754 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
755 *ci->ci_values[ap - ci->ci_attrs] = '\0';
756 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
764 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
769 ** Get any <Content-Id> given in buffer
771 if (magic && *cp == '<') {
773 mh_free0(&(ct->c_id));
775 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
776 advise(NULL, "invalid ID in message %s", ct->c_file);
782 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
793 ** Get any [Content-Description] given in buffer.
795 if (magic && *cp == '[') {
797 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
801 advise(NULL, "invalid description in message %s",
810 ct->c_descr = concat(ct->c_descr, "\n", NULL);
821 ** Get any {Content-Disposition} given in buffer.
823 if (magic && *cp == '{') {
825 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
829 advise(NULL, "invalid disposition in message %s",
838 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
849 ** Check if anything is left over
853 ci->ci_magic = mh_xstrdup(cp);
856 ** If there is a Content-Disposition header and
857 ** it doesn't have a *filename=, extract it from
858 ** the magic contents. The mhbasename call skips
859 ** any leading directory components.
862 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
864 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
872 get_comment(CT ct, unsigned char **ap, int istype)
877 char c, buffer[BUFSIZ], *dp;
889 advise(NULL, "invalid comment in message %s's %s: field",
890 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
895 if ((c = *cp++) == '\0')
918 if ((dp = ci->ci_comment)) {
919 ci->ci_comment = concat(dp, " ", buffer, NULL);
922 ci->ci_comment = mh_xstrdup(buffer);
937 ** Handles content types audio, image, and video.
938 ** There's not much to do right here.
944 return OK; /* not much to do here */
958 CI ci = &ct->c_ctinfo;
960 /* check for missing subtype */
961 if (!*ci->ci_subtype)
962 ci->ci_subtype = add("plain", ci->ci_subtype);
965 for (kv = SubText; kv->kv_key; kv++)
966 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
968 ct->c_subtype = kv->kv_value;
970 /* allocate text character set structure */
971 t = mh_xcalloc(1, sizeof(*t));
972 ct->c_ctparams = (void *) t;
974 /* scan for charset parameter */
975 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
976 if (!mh_strcasecmp(*ap, "charset"))
979 /* check if content specified a character set */
982 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
983 /* match character set or set to CHARSET_UNKNOWN */
984 for (kv = Charset; kv->kv_key; kv++) {
985 if (!mh_strcasecmp(*ep, kv->kv_key)) {
989 t->tx_charset = kv->kv_value;
991 t->tx_charset = CHARSET_UNSPECIFIED;
1003 InitMultiPart(CT ct)
1007 unsigned char *cp, *dp;
1009 char *bp, buffer[BUFSIZ];
1010 struct multipart *m;
1012 struct part *part, **next;
1013 CI ci = &ct->c_ctinfo;
1018 ** The encoding for multipart messages must be either
1019 ** 7bit, 8bit, or binary (per RFC2045).
1021 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1022 && ct->c_encoding != CE_BINARY) {
1023 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1024 ct->c_encoding = CE_7BIT;
1028 for (kv = SubMultiPart; kv->kv_key; kv++)
1029 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1031 ct->c_subtype = kv->kv_value;
1034 ** Check for "boundary" parameter, which is
1035 ** required for multipart messages.
1038 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1039 if (!mh_strcasecmp(*ap, "boundary")) {
1045 /* complain if boundary parameter is missing */
1047 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1051 /* allocate primary structure for multipart info */
1052 m = mh_xcalloc(1, sizeof(*m));
1053 ct->c_ctparams = (void *) m;
1055 /* check if boundary parameter contains only whitespace characters */
1056 for (cp = bp; isspace(*cp); cp++)
1059 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1063 /* remove trailing whitespace from boundary parameter */
1064 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1069 /* record boundary separators */
1070 m->mp_start = concat(bp, "\n", NULL);
1071 m->mp_stop = concat(bp, "--\n", NULL);
1073 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1074 advise(ct->c_file, "unable to open for reading");
1078 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1080 next = &m->mp_parts;
1084 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1088 pos += strlen(buffer);
1089 if (buffer[0] != '-' || buffer[1] != '-')
1092 if (strcmp(buffer + 2, m->mp_start)!=0)
1095 part = mh_xcalloc(1, sizeof(*part));
1097 next = &part->mp_next;
1099 if (!(p = get_content(fp, ct->c_file,
1100 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1107 fseek(fp, pos, SEEK_SET);
1110 if (strcmp(buffer + 2, m->mp_start) == 0) {
1114 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1115 if (p->c_end < p->c_begin)
1116 p->c_begin = p->c_end;
1121 if (strcmp(buffer + 2, m->mp_stop) == 0)
1127 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1128 if (!inout && part) {
1130 p->c_end = ct->c_end;
1132 if (p->c_begin >= p->c_end) {
1133 for (next = &m->mp_parts; *next != part;
1134 next = &((*next)->mp_next))
1143 /* reverse the order of the parts for multipart/alternative */
1144 if (ct->c_subtype == MULTI_ALTERNATE)
1148 ** label all subparts with part number, and
1149 ** then initialize the content of the subpart.
1154 char partnam[BUFSIZ];
1157 snprintf(partnam, sizeof(partnam), "%s.",
1159 pp = partnam + strlen(partnam);
1164 for (part = m->mp_parts, partnum = 1; part;
1165 part = part->mp_next, partnum++) {
1168 sprintf(pp, "%d", partnum);
1169 p->c_partno = mh_xstrdup(partnam);
1171 /* initialize the content of the subparts */
1172 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1187 ** reverse the order of the parts of a multipart
1191 reverse_parts(CT ct)
1194 struct multipart *m;
1195 struct part **base, **bmp, **next, *part;
1197 m = (struct multipart *) ct->c_ctparams;
1199 /* if only one part, just return */
1200 if (!m->mp_parts || !m->mp_parts->mp_next)
1203 /* count number of parts */
1205 for (part = m->mp_parts; part; part = part->mp_next)
1208 /* allocate array of pointers to the parts */
1209 base = mh_xcalloc(i + 1, sizeof(*base));
1212 /* point at all the parts */
1213 for (part = m->mp_parts; part; part = part->mp_next)
1217 /* reverse the order of the parts */
1218 next = &m->mp_parts;
1219 for (bmp--; bmp >= base; bmp--) {
1222 next = &part->mp_next;
1226 /* free array of pointers */
1239 CI ci = &ct->c_ctinfo;
1241 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1242 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1246 /* check for missing subtype */
1247 if (!*ci->ci_subtype)
1248 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1251 for (kv = SubMessage; kv->kv_key; kv++)
1252 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1254 ct->c_subtype = kv->kv_value;
1256 switch (ct->c_subtype) {
1257 case MESSAGE_RFC822:
1260 case MESSAGE_PARTIAL:
1265 p = mh_xcalloc(1, sizeof(*p));
1266 ct->c_ctparams = (void *) p;
1269 ** scan for parameters "id", "number",
1272 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1273 if (!mh_strcasecmp(*ap, "id")) {
1274 p->pm_partid = mh_xstrdup(*ep);
1277 if (!mh_strcasecmp(*ap, "number")) {
1278 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1280 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1285 if (!mh_strcasecmp(*ap, "total")) {
1286 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1293 if (!p->pm_partid || !p->pm_partno
1294 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1295 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1301 case MESSAGE_EXTERNAL:
1306 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1307 advise(ct->c_file, "unable to open for reading");
1311 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1313 if (!(p = get_content(fp, ct->c_file, 0))) {
1319 p->c_end = p->c_begin;
1324 switch (p->c_type) {
1329 if (p->c_subtype != MESSAGE_RFC822)
1334 (*p->c_ctinitfnx) (p);
1353 InitApplication(CT ct)
1356 CI ci = &ct->c_ctinfo;
1359 for (kv = SubApplication; kv->kv_key; kv++)
1360 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1362 ct->c_subtype = kv->kv_value;
1369 ** TRANSFER ENCODINGS
1373 init_encoding(CT ct, OpenCEFunc openfnx)
1377 ce = mh_xcalloc(1, sizeof(*ce));
1380 ct->c_ceopenfnx = openfnx;
1381 ct->c_ceclosefnx = close_encoding;
1382 ct->c_cesizefnx = size_encoding;
1389 close_encoding(CT ct)
1393 if (!(ce = ct->c_cefile))
1403 static unsigned long
1404 size_encoding(CT ct)
1412 if (!(ce = ct->c_cefile))
1413 return (ct->c_end - ct->c_begin);
1415 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1416 return (long) st.st_size;
1419 if (stat(ce->ce_file, &st) != NOTOK)
1420 return (long) st.st_size;
1425 if (ct->c_encoding == CE_EXTERNAL)
1426 return (ct->c_end - ct->c_begin);
1429 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1430 return (ct->c_end - ct->c_begin);
1432 if (fstat(fd, &st) != NOTOK)
1433 size = (long) st.st_size;
1437 (*ct->c_ceclosefnx) (ct);
1446 static unsigned char b642nib[0x80] = {
1447 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1448 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1452 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1453 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1454 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1456 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1457 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1458 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1459 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1460 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1461 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1462 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1469 return init_encoding(ct, openBase64);
1474 openBase64(CT ct, char **file)
1477 int fd, len, skip, own_ct_fp = 0;
1479 unsigned char value, *b, *b1, *b2, *b3;
1480 unsigned char *cp, *ep;
1481 char buffer[BUFSIZ];
1482 /* sbeck -- handle suffixes */
1486 b = (unsigned char *) &bits;
1487 b1 = &b[endian > 0 ? 1 : 2];
1488 b2 = &b[endian > 0 ? 2 : 1];
1489 b3 = &b[endian > 0 ? 3 : 0];
1493 fseek(ce->ce_fp, 0L, SEEK_SET);
1498 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1499 content_error(ce->ce_file, ct,
1500 "unable to fopen for reading");
1506 if (*file == NULL) {
1507 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1510 ce->ce_file = mh_xstrdup(*file);
1514 /* sbeck@cise.ufl.edu -- handle suffixes */
1516 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1517 invo_name, ci->ci_type, ci->ci_subtype);
1518 cp = context_find(buffer);
1519 if (cp == NULL || *cp == '\0') {
1520 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1522 cp = context_find(buffer);
1524 if (cp != NULL && *cp != '\0') {
1525 if (ce->ce_unlink) {
1527 ** Temporary file already exists, so we rename to
1528 ** version with extension.
1530 char *file_org = mh_xstrdup(ce->ce_file);
1531 ce->ce_file = add(cp, ce->ce_file);
1532 if (rename(file_org, ce->ce_file)) {
1533 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1536 mh_free0(&file_org);
1539 ce->ce_file = add(cp, ce->ce_file);
1543 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1544 content_error(ce->ce_file, ct,
1545 "unable to fopen for reading/writing");
1549 if ((len = ct->c_end - ct->c_begin) < 0)
1550 adios(EX_SOFTWARE, NULL, "internal error(1)");
1553 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1554 content_error(ct->c_file, ct,
1555 "unable to open for reading");
1565 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1567 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1569 content_error(ct->c_file, ct, "error reading from");
1573 content_error(NULL, ct, "premature eof");
1581 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1586 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1588 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1590 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1594 bits |= value << bitno;
1596 if ((bitno -= 6) < 0) {
1597 putc((char) *b1, ce->ce_fp);
1599 putc((char) *b2, ce->ce_fp);
1601 putc((char) *b3, ce->ce_fp);
1605 if (ferror(ce->ce_fp)) {
1606 content_error(ce->ce_file, ct,
1607 "error writing to");
1610 bitno = 18, bits = 0L, skip = 0;
1616 goto self_delimiting;
1625 fprintf(stderr, "premature ending (bitno %d)\n",
1628 content_error(NULL, ct, "invalid BASE64 encoding");
1633 fseek(ct->c_fp, 0L, SEEK_SET);
1635 if (fflush(ce->ce_fp)) {
1636 content_error(ce->ce_file, ct, "error writing to");
1640 fseek(ce->ce_fp, 0L, SEEK_SET);
1643 *file = ce->ce_file;
1648 return fileno(ce->ce_fp);
1651 free_encoding(ct, 0);
1664 static char hex2nib[0x80] = {
1665 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1672 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1678 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1687 return init_encoding(ct, openQuoted);
1692 openQuoted(CT ct, char **file)
1694 int cc, len, quoted, own_ct_fp = 0;
1695 unsigned char *cp, *ep;
1696 char buffer[BUFSIZ];
1697 unsigned char mask = 0;
1699 /* sbeck -- handle suffixes */
1704 fseek(ce->ce_fp, 0L, SEEK_SET);
1709 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1710 content_error(ce->ce_file, ct,
1711 "unable to fopen for reading");
1717 if (*file == NULL) {
1718 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1721 ce->ce_file = mh_xstrdup(*file);
1725 /* sbeck@cise.ufl.edu -- handle suffixes */
1727 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1728 invo_name, ci->ci_type, ci->ci_subtype);
1729 cp = context_find(buffer);
1730 if (cp == NULL || *cp == '\0') {
1731 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1733 cp = context_find(buffer);
1735 if (cp != NULL && *cp != '\0') {
1736 if (ce->ce_unlink) {
1738 ** Temporary file already exists, so we rename to
1739 ** version with extension.
1741 char *file_org = mh_xstrdup(ce->ce_file);
1742 ce->ce_file = add(cp, ce->ce_file);
1743 if (rename(file_org, ce->ce_file)) {
1744 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1747 mh_free0(&file_org);
1750 ce->ce_file = add(cp, ce->ce_file);
1754 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1755 content_error(ce->ce_file, ct,
1756 "unable to fopen for reading/writing");
1760 if ((len = ct->c_end - ct->c_begin) < 0)
1761 adios(EX_SOFTWARE, NULL, "internal error(2)");
1764 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1765 content_error(ct->c_file, ct,
1766 "unable to open for reading");
1774 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1776 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1777 content_error(NULL, ct, "premature eof");
1781 if ((cc = strlen(buffer)) > len)
1785 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1790 for (; cp < ep; cp++) {
1792 /* in an escape sequence */
1794 /* at byte 1 of an escape sequence */
1795 mask = hex2nib[*cp & 0x7f];
1796 /* next is byte 2 */
1799 /* at byte 2 of an escape sequence */
1801 mask |= hex2nib[*cp & 0x7f];
1802 putc(mask, ce->ce_fp);
1803 if (ferror(ce->ce_fp)) {
1804 content_error(ce->ce_file, ct, "error writing to");
1808 ** finished escape sequence; next may
1809 ** be literal or a new escape sequence
1813 /* on to next byte */
1817 /* not in an escape sequence */
1820 ** starting an escape sequence,
1823 if (cp + 1 < ep && cp[1] == '\n') {
1824 /* "=\n" soft line break, eat the \n */
1828 if (cp + 1 >= ep || cp + 2 >= ep) {
1830 ** We don't have 2 bytes left,
1831 ** so this is an invalid escape
1832 ** sequence; just show the raw bytes
1835 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1837 ** Next 2 bytes are hex digits,
1838 ** making this a valid escape
1839 ** sequence; let's decode it (above).
1845 ** One or both of the next 2 is
1846 ** out of range, making this an
1847 ** invalid escape sequence; just
1848 ** show the raw bytes (below).
1853 /* Just show the raw byte. */
1854 putc(*cp, ce->ce_fp);
1855 if (ferror(ce->ce_fp)) {
1856 content_error(ce->ce_file, ct,
1857 "error writing to");
1863 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1867 fseek(ct->c_fp, 0L, SEEK_SET);
1869 if (fflush(ce->ce_fp)) {
1870 content_error(ce->ce_file, ct, "error writing to");
1874 fseek(ce->ce_fp, 0L, SEEK_SET);
1877 *file = ce->ce_file;
1882 return fileno(ce->ce_fp);
1885 free_encoding(ct, 0);
1901 if (init_encoding(ct, open7Bit) == NOTOK)
1904 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1910 open7Bit(CT ct, char **file)
1912 int cc, fd, len, own_ct_fp = 0;
1913 char buffer[BUFSIZ];
1914 /* sbeck -- handle suffixes */
1921 fseek(ce->ce_fp, 0L, SEEK_SET);
1926 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1927 content_error(ce->ce_file, ct,
1928 "unable to fopen for reading");
1934 if (*file == NULL) {
1935 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1938 ce->ce_file = mh_xstrdup(*file);
1942 /* sbeck@cise.ufl.edu -- handle suffixes */
1944 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1945 invo_name, ci->ci_type, ci->ci_subtype);
1946 cp = context_find(buffer);
1947 if (cp == NULL || *cp == '\0') {
1948 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1950 cp = context_find(buffer);
1952 if (cp != NULL && *cp != '\0') {
1953 if (ce->ce_unlink) {
1955 ** Temporary file already exists, so we rename to
1956 ** version with extension.
1958 char *file_org = mh_xstrdup(ce->ce_file);
1959 ce->ce_file = add(cp, ce->ce_file);
1960 if (rename(file_org, ce->ce_file)) {
1961 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1964 mh_free0(&file_org);
1967 ce->ce_file = add(cp, ce->ce_file);
1971 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1972 content_error(ce->ce_file, ct,
1973 "unable to fopen for reading/writing");
1977 if (ct->c_type == CT_MULTIPART) {
1979 CI ci = &ct->c_ctinfo;
1982 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1984 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1985 strlen(ci->ci_subtype);
1986 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1987 putc(';', ce->ce_fp);
1990 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1993 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1994 fputs("\n\t", ce->ce_fp);
1997 putc(' ', ce->ce_fp);
2000 fprintf(ce->ce_fp, "%s", buffer);
2004 if (ci->ci_comment) {
2005 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2007 fputs("\n\t", ce->ce_fp);
2010 putc(' ', ce->ce_fp);
2013 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2016 fprintf(ce->ce_fp, "\n");
2018 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2020 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2022 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2023 fprintf(ce->ce_fp, "\n");
2026 if ((len = ct->c_end - ct->c_begin) < 0)
2027 adios(EX_SOFTWARE, NULL, "internal error(3)");
2030 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2031 content_error(ct->c_file, ct,
2032 "unable to open for reading");
2038 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2040 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2042 content_error(ct->c_file, ct, "error reading from");
2046 content_error(NULL, ct, "premature eof");
2054 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2055 if (ferror(ce->ce_fp)) {
2056 content_error(ce->ce_file, ct,
2057 "error writing to");
2062 fseek(ct->c_fp, 0L, SEEK_SET);
2064 if (fflush(ce->ce_fp)) {
2065 content_error(ce->ce_file, ct, "error writing to");
2069 fseek(ce->ce_fp, 0L, SEEK_SET);
2072 *file = ce->ce_file;
2077 return fileno(ce->ce_fp);
2080 free_encoding(ct, 0);