2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
258 ct->crlf = f.value[f.valuelen-2] == '\r';
262 /* add the header data to the list */
263 add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
265 ct->c_begin = ftell(in) + 1;
269 ct->c_begin = ftell(in) - strlen(f.value);
273 ct->c_begin = ftell(in);
277 advise(NULL, "message format error in component #%d", compnum);
282 adios(EX_IOERR, "m_getfld2", "io error");
285 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
291 ** Read the content headers. We will parse the
292 ** MIME related header fields into their various
293 ** structures and set internal flags related to
294 ** content type/subtype, etc.
297 hp = ct->c_first_hf; /* start at first header field */
299 /* Get MIME-Version field */
300 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
303 unsigned char *cp, *dp;
306 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
309 ct->c_vrsn = mh_xstrdup(hp->value);
311 /* Now, cleanup this field */
316 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
318 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
323 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
325 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
328 for (dp = cp; istoken(*dp); dp++)
332 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
335 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
338 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
339 /* Get Content-Type field */
340 struct str2init *s2i;
341 CI ci = &ct->c_ctinfo;
343 /* Check if we've already seen a Content-Type header */
345 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
349 /* Parse the Content-Type field */
350 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
354 ** Set the Init function and the internal
355 ** flag for this content type.
357 for (s2i = str2cts; s2i->si_key; s2i++)
358 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
360 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
362 ct->c_type = s2i->si_val;
363 ct->c_ctinitfnx = s2i->si_init;
365 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
366 /* Get Content-Transfer-Encoding field */
368 unsigned char *cp, *dp;
369 struct str2init *s2i;
372 ** Check if we've already seen the
373 ** Content-Transfer-Encoding field
376 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
380 /* get copy of this field */
381 ct->c_celine = cp = mh_xstrdup(hp->value);
385 for (dp = cp; istoken(*dp); dp++)
391 ** Find the internal flag and Init function
392 ** for this transfer encoding.
394 for (s2i = str2ces; s2i->si_key; s2i++)
395 if (!mh_strcasecmp(cp, s2i->si_key))
397 if (!s2i->si_key && !uprf(cp, "X-"))
400 ct->c_encoding = s2i->si_val;
402 /* Call the Init function for this encoding */
403 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
406 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
407 /* Get Content-ID field */
408 ct->c_id = add(hp->value, ct->c_id);
410 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
411 /* Get Content-Description field */
412 ct->c_descr = add(hp->value, ct->c_descr);
414 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
415 /* Get Content-Disposition field */
416 ct->c_dispo = add(hp->value, ct->c_dispo);
420 hp = hp->next; /* next header field */
424 ** Check if we saw a Content-Type field.
425 ** If not, then assign a default value for
426 ** it, and the Init function.
430 ** If we are inside a multipart/digest message,
431 ** so default type is message/rfc822
434 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
436 ct->c_type = CT_MESSAGE;
437 ct->c_ctinitfnx = InitMessage;
440 ** Else default type is text/plain
442 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
444 ct->c_type = CT_TEXT;
445 ct->c_ctinitfnx = InitText;
449 /* Use default Transfer-Encoding, if necessary */
451 ct->c_encoding = CE_7BIT;
464 ** small routine to add header field to list
468 add_header(CT ct, char *name, char *value)
472 /* allocate header field structure */
473 hp = mh_xcalloc(1, sizeof(*hp));
475 /* link data into header structure */
480 /* link header structure into the list */
481 if (ct->c_first_hf == NULL) {
482 ct->c_first_hf = hp; /* this is the first */
485 ct->c_last_hf->next = hp; /* add it to the end */
494 ** Make sure that buf contains at least one appearance of name,
495 ** followed by =. If not, insert both name and value, just after
496 ** first semicolon, if any. Note that name should not contain a
497 ** trailing =. And quotes will be added around the value. Typical
498 ** usage: make sure that a Content-Disposition header contains
499 ** filename="foo". If it doesn't and value does, use value from
503 incl_name_value(unsigned char *buf, char *name, char *value) {
506 /* Assume that name is non-null. */
508 char *name_plus_equal = concat(name, "=", NULL);
510 if (!strstr(buf, name_plus_equal)) {
513 char *prefix, *suffix;
515 /* Trim trailing space, esp. newline. */
516 for (cp = &buf[strlen(buf) - 1];
517 cp >= buf && isspace(*cp); --cp) {
521 insertion = concat("; ", name, "=", "\"", value, "\"",
525 ** Insert at first semicolon, if any.
526 ** If none, append to end.
528 prefix = mh_xstrdup(buf);
529 if ((cp = strchr(prefix, ';'))) {
530 suffix = concat(cp, NULL);
532 newbuf = concat(prefix, insertion, suffix,
537 newbuf = concat(buf, insertion, "\n", NULL);
541 mh_free0(&insertion);
545 mh_free0(&name_plus_equal);
552 ** Extract just name_suffix="foo", if any, from value. If there isn't
553 ** one, return the entire value. Note that, for example, a name_suffix
554 ** of name will match filename="foo", and return foo.
557 extract_name_value(char *name_suffix, char *value) {
558 char *extracted_name_value;
559 char *name_suffix_plus_quote;
560 char *name_suffix_equals;
566 extracted_name_value = value;
567 name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
568 name_suffix_equals = strstr(value, name_suffix_plus_quote);
569 mh_free0(&name_suffix_plus_quote);
570 if (name_suffix_equals) {
571 char *name_suffix_begin;
574 for (cp = name_suffix_equals; *cp != '"'; ++cp)
576 name_suffix_begin = ++cp;
577 /* Find second \". */
578 for (; *cp != '"'; ++cp)
581 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
582 memcpy(extracted_name_value, name_suffix_begin,
583 cp - name_suffix_begin);
584 extracted_name_value[cp - name_suffix_begin] = '\0';
587 return extracted_name_value;
591 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
592 ** directives. Fills in the information of the CTinfo structure.
595 get_ctinfo(unsigned char *cp, CT ct, int magic)
604 i = strlen(invo_name) + 2;
606 /* store copy of Content-Type line */
607 cp = ct->c_ctline = mh_xstrdup(cp);
609 while (isspace(*cp)) /* trim leading spaces */
612 /* change newlines to spaces */
613 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
616 /* trim trailing spaces */
617 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
623 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
625 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
628 for (dp = cp; istoken(*dp); dp++)
631 ci->ci_type = mh_xstrdup(cp); /* store content type */
635 advise(NULL, "invalid %s: field in message %s (empty type)",
636 TYPE_FIELD, ct->c_file);
640 /* down case the content type string */
641 for (dp = ci->ci_type; *dp; dp++)
642 if (isalpha(*dp) && isupper(*dp))
648 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
653 ci->ci_subtype = mh_xstrdup("");
661 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
664 for (dp = cp; istoken(*dp); dp++)
667 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
670 if (!*ci->ci_subtype) {
671 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
675 /* down case the content subtype string */
676 for (dp = ci->ci_subtype; *dp; dp++)
677 if (isalpha(*dp) && isupper(*dp))
684 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
688 ** Parse attribute/value pairs given with Content-Type
690 ep = (ap = ci->ci_attrs) + NPARMS;
696 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
704 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
708 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
712 /* down case the attribute name */
713 for (dp = cp; istoken(*dp); dp++)
714 if (isalpha(*dp) && isupper(*dp))
717 for (up = dp; isspace(*dp);)
719 if (dp == cp || *dp != '=') {
720 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
724 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
726 for (dp++; isspace(*dp);)
729 /* now add the attribute value */
730 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
733 for (cp = ++dp, dp = vp;;) {
737 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
742 if ((c = *cp++) == '\0')
757 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
762 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
763 *ci->ci_values[ap - ci->ci_attrs] = '\0';
764 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
772 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
777 ** Get any <Content-Id> given in buffer
779 if (magic && *cp == '<') {
781 mh_free0(&(ct->c_id));
783 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784 advise(NULL, "invalid ID in message %s", ct->c_file);
790 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
801 ** Get any [Content-Description] given in buffer.
803 if (magic && *cp == '[') {
805 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
809 advise(NULL, "invalid description in message %s",
818 ct->c_descr = concat(ct->c_descr, "\n", NULL);
829 ** Get any {Content-Disposition} given in buffer.
831 if (magic && *cp == '{') {
833 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
837 advise(NULL, "invalid disposition in message %s",
846 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
857 ** Check if anything is left over
861 ci->ci_magic = mh_xstrdup(cp);
864 ** If there is a Content-Disposition header and
865 ** it doesn't have a *filename=, extract it from
866 ** the magic contents. The mhbasename call skips
867 ** any leading directory components.
870 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
872 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
880 get_comment(CT ct, unsigned char **ap, int istype)
885 char c, buffer[BUFSIZ], *dp;
897 advise(NULL, "invalid comment in message %s's %s: field",
898 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
903 if ((c = *cp++) == '\0')
926 if ((dp = ci->ci_comment)) {
927 ci->ci_comment = concat(dp, " ", buffer, NULL);
930 ci->ci_comment = mh_xstrdup(buffer);
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
952 return OK; /* not much to do here */
966 CI ci = &ct->c_ctinfo;
968 /* check for missing subtype */
969 if (!*ci->ci_subtype)
970 ci->ci_subtype = add("plain", ci->ci_subtype);
973 for (kv = SubText; kv->kv_key; kv++)
974 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
976 ct->c_subtype = kv->kv_value;
978 /* allocate text character set structure */
979 t = mh_xcalloc(1, sizeof(*t));
980 ct->c_ctparams = (void *) t;
982 /* scan for charset parameter */
983 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
984 if (!mh_strcasecmp(*ap, "charset"))
987 /* check if content specified a character set */
990 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
991 /* match character set or set to CHARSET_UNKNOWN */
992 for (kv = Charset; kv->kv_key; kv++) {
993 if (!mh_strcasecmp(*ep, kv->kv_key)) {
997 t->tx_charset = kv->kv_value;
999 t->tx_charset = CHARSET_UNSPECIFIED;
1011 InitMultiPart(CT ct)
1015 unsigned char *cp, *dp;
1017 char *bp, buffer[BUFSIZ];
1018 struct multipart *m;
1020 struct part *part, **next;
1021 CI ci = &ct->c_ctinfo;
1026 ** The encoding for multipart messages must be either
1027 ** 7bit, 8bit, or binary (per RFC2045).
1029 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1030 && ct->c_encoding != CE_BINARY) {
1031 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1032 ct->c_encoding = CE_7BIT;
1036 for (kv = SubMultiPart; kv->kv_key; kv++)
1037 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1039 ct->c_subtype = kv->kv_value;
1042 ** Check for "boundary" parameter, which is
1043 ** required for multipart messages.
1046 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1047 if (!mh_strcasecmp(*ap, "boundary")) {
1053 /* complain if boundary parameter is missing */
1055 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1059 /* allocate primary structure for multipart info */
1060 m = mh_xcalloc(1, sizeof(*m));
1061 ct->c_ctparams = (void *) m;
1063 /* check if boundary parameter contains only whitespace characters */
1064 for (cp = bp; isspace(*cp); cp++)
1067 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1071 /* remove trailing whitespace from boundary parameter */
1072 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1077 /* record boundary separators */
1079 m->mp_start = concat(bp, "\n", NULL);
1080 m->mp_stop = concat(bp, "--\n", NULL);
1082 m->mp_start = concat(bp, "\r\n", NULL);
1083 m->mp_stop = concat(bp, "--\r\n", NULL);
1087 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1088 advise(ct->c_file, "unable to open for reading");
1092 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1094 next = &m->mp_parts;
1098 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1102 pos += strlen(buffer);
1103 if (buffer[0] != '-' || buffer[1] != '-')
1106 if (strcmp(buffer + 2, m->mp_start)!=0)
1109 part = mh_xcalloc(1, sizeof(*part));
1111 next = &part->mp_next;
1113 if (!(p = get_content(fp, ct->c_file,
1114 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1121 fseek(fp, pos, SEEK_SET);
1124 if (strcmp(buffer + 2, m->mp_start) == 0) {
1128 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1129 if (p->c_end < p->c_begin)
1130 p->c_begin = p->c_end;
1135 if (strcmp(buffer + 2, m->mp_stop) == 0)
1141 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1142 if (!inout && part) {
1144 p->c_end = ct->c_end;
1146 if (p->c_begin >= p->c_end) {
1147 for (next = &m->mp_parts; *next != part;
1148 next = &((*next)->mp_next))
1157 /* reverse the order of the parts for multipart/alternative */
1158 if (ct->c_subtype == MULTI_ALTERNATE)
1162 ** label all subparts with part number, and
1163 ** then initialize the content of the subpart.
1168 char partnam[BUFSIZ];
1171 snprintf(partnam, sizeof(partnam), "%s.",
1173 pp = partnam + strlen(partnam);
1178 for (part = m->mp_parts, partnum = 1; part;
1179 part = part->mp_next, partnum++) {
1182 sprintf(pp, "%d", partnum);
1183 p->c_partno = mh_xstrdup(partnam);
1185 /* initialize the content of the subparts */
1186 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1201 ** reverse the order of the parts of a multipart
1205 reverse_parts(CT ct)
1208 struct multipart *m;
1209 struct part **base, **bmp, **next, *part;
1211 m = (struct multipart *) ct->c_ctparams;
1213 /* if only one part, just return */
1214 if (!m->mp_parts || !m->mp_parts->mp_next)
1217 /* count number of parts */
1219 for (part = m->mp_parts; part; part = part->mp_next)
1222 /* allocate array of pointers to the parts */
1223 base = mh_xcalloc(i + 1, sizeof(*base));
1226 /* point at all the parts */
1227 for (part = m->mp_parts; part; part = part->mp_next)
1231 /* reverse the order of the parts */
1232 next = &m->mp_parts;
1233 for (bmp--; bmp >= base; bmp--) {
1236 next = &part->mp_next;
1240 /* free array of pointers */
1253 CI ci = &ct->c_ctinfo;
1255 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT) && (ct->c_encoding != CE_BINARY)) {
1256 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1257 ct->c_encoding = CE_7BIT;
1260 /* check for missing subtype */
1261 if (!*ci->ci_subtype)
1262 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1265 for (kv = SubMessage; kv->kv_key; kv++)
1266 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1268 ct->c_subtype = kv->kv_value;
1270 switch (ct->c_subtype) {
1271 case MESSAGE_RFC822:
1274 case MESSAGE_PARTIAL:
1279 p = mh_xcalloc(1, sizeof(*p));
1280 ct->c_ctparams = (void *) p;
1283 ** scan for parameters "id", "number",
1286 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1287 if (!mh_strcasecmp(*ap, "id")) {
1288 p->pm_partid = mh_xstrdup(*ep);
1291 if (!mh_strcasecmp(*ap, "number")) {
1292 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1294 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1299 if (!mh_strcasecmp(*ap, "total")) {
1300 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1307 if (!p->pm_partid || !p->pm_partno
1308 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1309 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1315 case MESSAGE_EXTERNAL:
1320 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1321 advise(ct->c_file, "unable to open for reading");
1325 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1327 if (!(p = get_content(fp, ct->c_file, 0))) {
1333 p->c_end = p->c_begin;
1338 switch (p->c_type) {
1343 if (p->c_subtype != MESSAGE_RFC822)
1348 (*p->c_ctinitfnx) (p);
1367 InitApplication(CT ct)
1370 CI ci = &ct->c_ctinfo;
1373 for (kv = SubApplication; kv->kv_key; kv++)
1374 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1376 ct->c_subtype = kv->kv_value;
1383 ** TRANSFER ENCODINGS
1387 init_encoding(CT ct, OpenCEFunc openfnx)
1391 ce = mh_xcalloc(1, sizeof(*ce));
1394 ct->c_ceopenfnx = openfnx;
1395 ct->c_ceclosefnx = close_encoding;
1396 ct->c_cesizefnx = size_encoding;
1403 close_encoding(CT ct)
1407 if (!(ce = ct->c_cefile))
1417 static unsigned long
1418 size_encoding(CT ct)
1426 if (!(ce = ct->c_cefile))
1427 return (ct->c_end - ct->c_begin);
1429 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1430 return (long) st.st_size;
1433 if (stat(ce->ce_file, &st) != NOTOK)
1434 return (long) st.st_size;
1439 if (ct->c_encoding == CE_EXTERNAL)
1440 return (ct->c_end - ct->c_begin);
1443 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1444 return (ct->c_end - ct->c_begin);
1446 if (fstat(fd, &st) != NOTOK)
1447 size = (long) st.st_size;
1451 (*ct->c_ceclosefnx) (ct);
1460 static unsigned char b642nib[0x80] = {
1461 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1462 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1467 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1468 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1470 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1471 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1472 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1473 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1474 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1475 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1476 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1483 return init_encoding(ct, openBase64);
1488 openBase64(CT ct, char **file)
1491 int fd, len, skip, own_ct_fp = 0;
1493 unsigned char value, *b, *b1, *b2, *b3;
1494 unsigned char *cp, *ep;
1495 char buffer[BUFSIZ];
1496 /* sbeck -- handle suffixes */
1500 b = (unsigned char *) &bits;
1501 b1 = &b[endian > 0 ? 1 : 2];
1502 b2 = &b[endian > 0 ? 2 : 1];
1503 b3 = &b[endian > 0 ? 3 : 0];
1507 fseek(ce->ce_fp, 0L, SEEK_SET);
1512 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1513 content_error(ce->ce_file, ct,
1514 "unable to fopen for reading");
1520 if (*file == NULL) {
1521 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1524 ce->ce_file = mh_xstrdup(*file);
1528 /* sbeck@cise.ufl.edu -- handle suffixes */
1530 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1531 invo_name, ci->ci_type, ci->ci_subtype);
1532 cp = context_find(buffer);
1533 if (cp == NULL || *cp == '\0') {
1534 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1536 cp = context_find(buffer);
1538 if (cp != NULL && *cp != '\0') {
1539 if (ce->ce_unlink) {
1541 ** Temporary file already exists, so we rename to
1542 ** version with extension.
1544 char *file_org = mh_xstrdup(ce->ce_file);
1545 ce->ce_file = add(cp, ce->ce_file);
1546 if (rename(file_org, ce->ce_file)) {
1547 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1550 mh_free0(&file_org);
1553 ce->ce_file = add(cp, ce->ce_file);
1557 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1558 content_error(ce->ce_file, ct,
1559 "unable to fopen for reading/writing");
1563 if ((len = ct->c_end - ct->c_begin) < 0)
1564 adios(EX_SOFTWARE, NULL, "internal error(1)");
1567 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1568 content_error(ct->c_file, ct,
1569 "unable to open for reading");
1579 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1581 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1583 content_error(ct->c_file, ct, "error reading from");
1587 content_error(NULL, ct, "premature eof");
1595 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1600 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1602 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1604 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1608 bits |= value << bitno;
1610 if ((bitno -= 6) < 0) {
1611 putc((char) *b1, ce->ce_fp);
1613 putc((char) *b2, ce->ce_fp);
1615 putc((char) *b3, ce->ce_fp);
1619 if (ferror(ce->ce_fp)) {
1620 content_error(ce->ce_file, ct,
1621 "error writing to");
1624 bitno = 18, bits = 0L, skip = 0;
1630 goto self_delimiting;
1639 fprintf(stderr, "premature ending (bitno %d)\n",
1642 content_error(NULL, ct, "invalid BASE64 encoding");
1647 fseek(ct->c_fp, 0L, SEEK_SET);
1649 if (fflush(ce->ce_fp)) {
1650 content_error(ce->ce_file, ct, "error writing to");
1654 fseek(ce->ce_fp, 0L, SEEK_SET);
1657 *file = ce->ce_file;
1662 return fileno(ce->ce_fp);
1665 free_encoding(ct, 0);
1678 static char hex2nib[0x80] = {
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1686 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1701 return init_encoding(ct, openQuoted);
1706 openQuoted(CT ct, char **file)
1708 int cc, len, quoted, own_ct_fp = 0;
1709 unsigned char *cp, *ep;
1710 char buffer[BUFSIZ];
1711 unsigned char mask = 0;
1713 /* sbeck -- handle suffixes */
1718 fseek(ce->ce_fp, 0L, SEEK_SET);
1723 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1724 content_error(ce->ce_file, ct,
1725 "unable to fopen for reading");
1731 if (*file == NULL) {
1732 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1735 ce->ce_file = mh_xstrdup(*file);
1739 /* sbeck@cise.ufl.edu -- handle suffixes */
1741 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1742 invo_name, ci->ci_type, ci->ci_subtype);
1743 cp = context_find(buffer);
1744 if (cp == NULL || *cp == '\0') {
1745 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1747 cp = context_find(buffer);
1749 if (cp != NULL && *cp != '\0') {
1750 if (ce->ce_unlink) {
1752 ** Temporary file already exists, so we rename to
1753 ** version with extension.
1755 char *file_org = mh_xstrdup(ce->ce_file);
1756 ce->ce_file = add(cp, ce->ce_file);
1757 if (rename(file_org, ce->ce_file)) {
1758 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1761 mh_free0(&file_org);
1764 ce->ce_file = add(cp, ce->ce_file);
1768 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1769 content_error(ce->ce_file, ct,
1770 "unable to fopen for reading/writing");
1774 if ((len = ct->c_end - ct->c_begin) < 0)
1775 adios(EX_SOFTWARE, NULL, "internal error(2)");
1778 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1779 content_error(ct->c_file, ct,
1780 "unable to open for reading");
1788 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1790 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1791 content_error(NULL, ct, "premature eof");
1795 if ((cc = strlen(buffer)) > len)
1799 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1804 for (; cp < ep; cp++) {
1806 /* in an escape sequence */
1808 /* at byte 1 of an escape sequence */
1809 mask = hex2nib[*cp & 0x7f];
1810 /* next is byte 2 */
1813 /* at byte 2 of an escape sequence */
1815 mask |= hex2nib[*cp & 0x7f];
1816 putc(mask, ce->ce_fp);
1817 if (ferror(ce->ce_fp)) {
1818 content_error(ce->ce_file, ct, "error writing to");
1822 ** finished escape sequence; next may
1823 ** be literal or a new escape sequence
1827 /* on to next byte */
1831 /* not in an escape sequence */
1834 ** starting an escape sequence,
1837 if (cp + 1 < ep && cp[1] == '\n') {
1838 /* "=\n" soft line break, eat the \n */
1842 if (cp + 1 >= ep || cp + 2 >= ep) {
1844 ** We don't have 2 bytes left,
1845 ** so this is an invalid escape
1846 ** sequence; just show the raw bytes
1849 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1851 ** Next 2 bytes are hex digits,
1852 ** making this a valid escape
1853 ** sequence; let's decode it (above).
1859 ** One or both of the next 2 is
1860 ** out of range, making this an
1861 ** invalid escape sequence; just
1862 ** show the raw bytes (below).
1867 /* Just show the raw byte. */
1868 putc(*cp, ce->ce_fp);
1869 if (ferror(ce->ce_fp)) {
1870 content_error(ce->ce_file, ct,
1871 "error writing to");
1877 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1881 fseek(ct->c_fp, 0L, SEEK_SET);
1883 if (fflush(ce->ce_fp)) {
1884 content_error(ce->ce_file, ct, "error writing to");
1888 fseek(ce->ce_fp, 0L, SEEK_SET);
1891 *file = ce->ce_file;
1896 return fileno(ce->ce_fp);
1899 free_encoding(ct, 0);
1915 if (init_encoding(ct, open7Bit) == NOTOK)
1918 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1924 open7Bit(CT ct, char **file)
1926 int cc, fd, len, own_ct_fp = 0;
1927 char buffer[BUFSIZ];
1928 /* sbeck -- handle suffixes */
1935 fseek(ce->ce_fp, 0L, SEEK_SET);
1940 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1941 content_error(ce->ce_file, ct,
1942 "unable to fopen for reading");
1948 if (*file == NULL) {
1949 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1952 ce->ce_file = mh_xstrdup(*file);
1956 /* sbeck@cise.ufl.edu -- handle suffixes */
1958 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1959 invo_name, ci->ci_type, ci->ci_subtype);
1960 cp = context_find(buffer);
1961 if (cp == NULL || *cp == '\0') {
1962 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1964 cp = context_find(buffer);
1966 if (cp != NULL && *cp != '\0') {
1967 if (ce->ce_unlink) {
1969 ** Temporary file already exists, so we rename to
1970 ** version with extension.
1972 char *file_org = mh_xstrdup(ce->ce_file);
1973 ce->ce_file = add(cp, ce->ce_file);
1974 if (rename(file_org, ce->ce_file)) {
1975 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1978 mh_free0(&file_org);
1981 ce->ce_file = add(cp, ce->ce_file);
1985 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1986 content_error(ce->ce_file, ct,
1987 "unable to fopen for reading/writing");
1991 if (ct->c_type == CT_MULTIPART) {
1993 CI ci = &ct->c_ctinfo;
1996 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1998 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1999 strlen(ci->ci_subtype);
2000 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2001 putc(';', ce->ce_fp);
2004 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2007 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2008 fputs("\n\t", ce->ce_fp);
2011 putc(' ', ce->ce_fp);
2014 fprintf(ce->ce_fp, "%s", buffer);
2018 if (ci->ci_comment) {
2019 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2021 fputs("\n\t", ce->ce_fp);
2024 putc(' ', ce->ce_fp);
2027 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2030 fprintf(ce->ce_fp, "\n");
2032 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2034 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2036 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2037 fprintf(ce->ce_fp, "\n");
2040 if ((len = ct->c_end - ct->c_begin) < 0)
2041 adios(EX_SOFTWARE, NULL, "internal error(3)");
2044 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2045 content_error(ct->c_file, ct,
2046 "unable to open for reading");
2052 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2054 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2056 content_error(ct->c_file, ct, "error reading from");
2060 content_error(NULL, ct, "premature eof");
2068 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2069 if (ferror(ce->ce_fp)) {
2070 content_error(ce->ce_file, ct,
2071 "error writing to");
2076 fseek(ct->c_fp, 0L, SEEK_SET);
2078 if (fflush(ce->ce_fp)) {
2079 content_error(ce->ce_file, ct, "error writing to");
2083 fseek(ce->ce_fp, 0L, SEEK_SET);
2086 *file = ce->ce_file;
2091 return fileno(ce->ce_fp);
2094 free_encoding(ct, 0);