2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
256 /* add the header data to the list */
257 add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
259 ct->c_begin = ftell(in) + 1;
263 ct->c_begin = ftell(in) - strlen(f.value);
267 ct->c_begin = ftell(in);
273 adios(EX_DATAERR, NULL, "message format error in component #%d",
277 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
283 ** Read the content headers. We will parse the
284 ** MIME related header fields into their various
285 ** structures and set internal flags related to
286 ** content type/subtype, etc.
289 hp = ct->c_first_hf; /* start at first header field */
291 /* Get MIME-Version field */
292 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
295 unsigned char *cp, *dp;
298 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
301 ct->c_vrsn = mh_xstrdup(hp->value);
303 /* Now, cleanup this field */
308 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
310 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
315 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
317 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
320 for (dp = cp; istoken(*dp); dp++)
324 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
327 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
330 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
331 /* Get Content-Type field */
332 struct str2init *s2i;
333 CI ci = &ct->c_ctinfo;
335 /* Check if we've already seen a Content-Type header */
337 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
341 /* Parse the Content-Type field */
342 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
346 ** Set the Init function and the internal
347 ** flag for this content type.
349 for (s2i = str2cts; s2i->si_key; s2i++)
350 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
352 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
354 ct->c_type = s2i->si_val;
355 ct->c_ctinitfnx = s2i->si_init;
357 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
358 /* Get Content-Transfer-Encoding field */
360 unsigned char *cp, *dp;
361 struct str2init *s2i;
364 ** Check if we've already seen the
365 ** Content-Transfer-Encoding field
368 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
372 /* get copy of this field */
373 ct->c_celine = cp = mh_xstrdup(hp->value);
377 for (dp = cp; istoken(*dp); dp++)
383 ** Find the internal flag and Init function
384 ** for this transfer encoding.
386 for (s2i = str2ces; s2i->si_key; s2i++)
387 if (!mh_strcasecmp(cp, s2i->si_key))
389 if (!s2i->si_key && !uprf(cp, "X-"))
392 ct->c_encoding = s2i->si_val;
394 /* Call the Init function for this encoding */
395 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
398 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
399 /* Get Content-ID field */
400 ct->c_id = add(hp->value, ct->c_id);
402 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
403 /* Get Content-Description field */
404 ct->c_descr = add(hp->value, ct->c_descr);
406 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
407 /* Get Content-Disposition field */
408 ct->c_dispo = add(hp->value, ct->c_dispo);
412 hp = hp->next; /* next header field */
416 ** Check if we saw a Content-Type field.
417 ** If not, then assign a default value for
418 ** it, and the Init function.
422 ** If we are inside a multipart/digest message,
423 ** so default type is message/rfc822
426 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
428 ct->c_type = CT_MESSAGE;
429 ct->c_ctinitfnx = InitMessage;
432 ** Else default type is text/plain
434 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
436 ct->c_type = CT_TEXT;
437 ct->c_ctinitfnx = InitText;
441 /* Use default Transfer-Encoding, if necessary */
443 ct->c_encoding = CE_7BIT;
456 ** small routine to add header field to list
460 add_header(CT ct, char *name, char *value)
464 /* allocate header field structure */
465 hp = mh_xcalloc(1, sizeof(*hp));
467 /* link data into header structure */
472 /* link header structure into the list */
473 if (ct->c_first_hf == NULL) {
474 ct->c_first_hf = hp; /* this is the first */
477 ct->c_last_hf->next = hp; /* add it to the end */
486 ** Make sure that buf contains at least one appearance of name,
487 ** followed by =. If not, insert both name and value, just after
488 ** first semicolon, if any. Note that name should not contain a
489 ** trailing =. And quotes will be added around the value. Typical
490 ** usage: make sure that a Content-Disposition header contains
491 ** filename="foo". If it doesn't and value does, use value from
495 incl_name_value(unsigned char *buf, char *name, char *value) {
498 /* Assume that name is non-null. */
500 char *name_plus_equal = concat(name, "=", NULL);
502 if (!strstr(buf, name_plus_equal)) {
505 char *prefix, *suffix;
507 /* Trim trailing space, esp. newline. */
508 for (cp = &buf[strlen(buf) - 1];
509 cp >= buf && isspace(*cp); --cp) {
513 insertion = concat("; ", name, "=", "\"", value, "\"",
517 ** Insert at first semicolon, if any.
518 ** If none, append to end.
520 prefix = mh_xstrdup(buf);
521 if ((cp = strchr(prefix, ';'))) {
522 suffix = concat(cp, NULL);
524 newbuf = concat(prefix, insertion, suffix,
529 newbuf = concat(buf, insertion, "\n", NULL);
533 mh_free0(&insertion);
537 mh_free0(&name_plus_equal);
544 ** Extract just name_suffix="foo", if any, from value. If there isn't
545 ** one, return the entire value. Note that, for example, a name_suffix
546 ** of name will match filename="foo", and return foo.
549 extract_name_value(char *name_suffix, char *value) {
550 char *extracted_name_value = value;
551 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
552 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
555 mh_free0(&name_suffix_plus_quote);
556 if (name_suffix_equals) {
557 char *name_suffix_begin;
560 for (cp = name_suffix_equals; *cp != '"'; ++cp)
562 name_suffix_begin = ++cp;
563 /* Find second \". */
564 for (; *cp != '"'; ++cp)
567 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
568 memcpy(extracted_name_value, name_suffix_begin,
569 cp - name_suffix_begin);
570 extracted_name_value[cp - name_suffix_begin] = '\0';
573 return extracted_name_value;
577 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
578 ** directives. Fills in the information of the CTinfo structure.
581 get_ctinfo(unsigned char *cp, CT ct, int magic)
590 i = strlen(invo_name) + 2;
592 /* store copy of Content-Type line */
593 cp = ct->c_ctline = mh_xstrdup(cp);
595 while (isspace(*cp)) /* trim leading spaces */
598 /* change newlines to spaces */
599 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
602 /* trim trailing spaces */
603 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
609 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
611 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
614 for (dp = cp; istoken(*dp); dp++)
617 ci->ci_type = mh_xstrdup(cp); /* store content type */
621 advise(NULL, "invalid %s: field in message %s (empty type)",
622 TYPE_FIELD, ct->c_file);
626 /* down case the content type string */
627 for (dp = ci->ci_type; *dp; dp++)
628 if (isalpha(*dp) && isupper(*dp))
634 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
639 ci->ci_subtype = mh_xstrdup("");
647 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
650 for (dp = cp; istoken(*dp); dp++)
653 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
656 if (!*ci->ci_subtype) {
657 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
661 /* down case the content subtype string */
662 for (dp = ci->ci_subtype; *dp; dp++)
663 if (isalpha(*dp) && isupper(*dp))
670 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
674 ** Parse attribute/value pairs given with Content-Type
676 ep = (ap = ci->ci_attrs) + NPARMS;
682 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
690 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
694 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
698 /* down case the attribute name */
699 for (dp = cp; istoken(*dp); dp++)
700 if (isalpha(*dp) && isupper(*dp))
703 for (up = dp; isspace(*dp);)
705 if (dp == cp || *dp != '=') {
706 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
710 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
712 for (dp++; isspace(*dp);)
715 /* now add the attribute value */
716 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
719 for (cp = ++dp, dp = vp;;) {
723 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
728 if ((c = *cp++) == '\0')
743 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
748 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
749 *ci->ci_values[ap - ci->ci_attrs] = '\0';
750 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
758 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
763 ** Get any <Content-Id> given in buffer
765 if (magic && *cp == '<') {
767 mh_free0(&(ct->c_id));
769 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
770 advise(NULL, "invalid ID in message %s", ct->c_file);
776 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
787 ** Get any [Content-Description] given in buffer.
789 if (magic && *cp == '[') {
791 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
795 advise(NULL, "invalid description in message %s",
804 ct->c_descr = concat(ct->c_descr, "\n", NULL);
815 ** Get any {Content-Disposition} given in buffer.
817 if (magic && *cp == '{') {
819 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
823 advise(NULL, "invalid disposition in message %s",
832 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
843 ** Check if anything is left over
847 ci->ci_magic = mh_xstrdup(cp);
850 ** If there is a Content-Disposition header and
851 ** it doesn't have a *filename=, extract it from
852 ** the magic contents. The mhbasename call skips
853 ** any leading directory components.
856 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
858 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
866 get_comment(CT ct, unsigned char **ap, int istype)
871 char c, buffer[BUFSIZ], *dp;
883 advise(NULL, "invalid comment in message %s's %s: field",
884 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
889 if ((c = *cp++) == '\0')
912 if ((dp = ci->ci_comment)) {
913 ci->ci_comment = concat(dp, " ", buffer, NULL);
916 ci->ci_comment = mh_xstrdup(buffer);
931 ** Handles content types audio, image, and video.
932 ** There's not much to do right here.
938 return OK; /* not much to do here */
952 CI ci = &ct->c_ctinfo;
954 /* check for missing subtype */
955 if (!*ci->ci_subtype)
956 ci->ci_subtype = add("plain", ci->ci_subtype);
959 for (kv = SubText; kv->kv_key; kv++)
960 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
962 ct->c_subtype = kv->kv_value;
964 /* allocate text character set structure */
965 t = mh_xcalloc(1, sizeof(*t));
966 ct->c_ctparams = (void *) t;
968 /* scan for charset parameter */
969 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
970 if (!mh_strcasecmp(*ap, "charset"))
973 /* check if content specified a character set */
976 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
977 /* match character set or set to CHARSET_UNKNOWN */
978 for (kv = Charset; kv->kv_key; kv++) {
979 if (!mh_strcasecmp(*ep, kv->kv_key)) {
983 t->tx_charset = kv->kv_value;
985 t->tx_charset = CHARSET_UNSPECIFIED;
1001 unsigned char *cp, *dp;
1003 char *bp, buffer[BUFSIZ];
1004 struct multipart *m;
1006 struct part *part, **next;
1007 CI ci = &ct->c_ctinfo;
1012 ** The encoding for multipart messages must be either
1013 ** 7bit, 8bit, or binary (per RFC2045).
1015 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1016 && ct->c_encoding != CE_BINARY) {
1017 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1018 ct->c_encoding = CE_7BIT;
1022 for (kv = SubMultiPart; kv->kv_key; kv++)
1023 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1025 ct->c_subtype = kv->kv_value;
1028 ** Check for "boundary" parameter, which is
1029 ** required for multipart messages.
1032 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1033 if (!mh_strcasecmp(*ap, "boundary")) {
1039 /* complain if boundary parameter is missing */
1041 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1045 /* allocate primary structure for multipart info */
1046 m = mh_xcalloc(1, sizeof(*m));
1047 ct->c_ctparams = (void *) m;
1049 /* check if boundary parameter contains only whitespace characters */
1050 for (cp = bp; isspace(*cp); cp++)
1053 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1057 /* remove trailing whitespace from boundary parameter */
1058 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1063 /* record boundary separators */
1064 m->mp_start = concat(bp, "\n", NULL);
1065 m->mp_stop = concat(bp, "--\n", NULL);
1067 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1068 advise(ct->c_file, "unable to open for reading");
1072 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1074 next = &m->mp_parts;
1078 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1082 pos += strlen(buffer);
1083 if (buffer[0] != '-' || buffer[1] != '-')
1086 if (strcmp(buffer + 2, m->mp_start)!=0)
1089 part = mh_xcalloc(1, sizeof(*part));
1091 next = &part->mp_next;
1093 if (!(p = get_content(fp, ct->c_file,
1094 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1101 fseek(fp, pos, SEEK_SET);
1104 if (strcmp(buffer + 2, m->mp_start) == 0) {
1108 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1109 if (p->c_end < p->c_begin)
1110 p->c_begin = p->c_end;
1115 if (strcmp(buffer + 2, m->mp_stop) == 0)
1121 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1122 if (!inout && part) {
1124 p->c_end = ct->c_end;
1126 if (p->c_begin >= p->c_end) {
1127 for (next = &m->mp_parts; *next != part;
1128 next = &((*next)->mp_next))
1137 /* reverse the order of the parts for multipart/alternative */
1138 if (ct->c_subtype == MULTI_ALTERNATE)
1142 ** label all subparts with part number, and
1143 ** then initialize the content of the subpart.
1148 char partnam[BUFSIZ];
1151 snprintf(partnam, sizeof(partnam), "%s.",
1153 pp = partnam + strlen(partnam);
1158 for (part = m->mp_parts, partnum = 1; part;
1159 part = part->mp_next, partnum++) {
1162 sprintf(pp, "%d", partnum);
1163 p->c_partno = mh_xstrdup(partnam);
1165 /* initialize the content of the subparts */
1166 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1181 ** reverse the order of the parts of a multipart
1185 reverse_parts(CT ct)
1188 struct multipart *m;
1189 struct part **base, **bmp, **next, *part;
1191 m = (struct multipart *) ct->c_ctparams;
1193 /* if only one part, just return */
1194 if (!m->mp_parts || !m->mp_parts->mp_next)
1197 /* count number of parts */
1199 for (part = m->mp_parts; part; part = part->mp_next)
1202 /* allocate array of pointers to the parts */
1203 base = mh_xcalloc(i + 1, sizeof(*base));
1206 /* point at all the parts */
1207 for (part = m->mp_parts; part; part = part->mp_next)
1211 /* reverse the order of the parts */
1212 next = &m->mp_parts;
1213 for (bmp--; bmp >= base; bmp--) {
1216 next = &part->mp_next;
1220 /* free array of pointers */
1233 CI ci = &ct->c_ctinfo;
1235 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1236 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1240 /* check for missing subtype */
1241 if (!*ci->ci_subtype)
1242 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1245 for (kv = SubMessage; kv->kv_key; kv++)
1246 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1248 ct->c_subtype = kv->kv_value;
1250 switch (ct->c_subtype) {
1251 case MESSAGE_RFC822:
1254 case MESSAGE_PARTIAL:
1259 p = mh_xcalloc(1, sizeof(*p));
1260 ct->c_ctparams = (void *) p;
1263 ** scan for parameters "id", "number",
1266 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1267 if (!mh_strcasecmp(*ap, "id")) {
1268 p->pm_partid = mh_xstrdup(*ep);
1271 if (!mh_strcasecmp(*ap, "number")) {
1272 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1274 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1279 if (!mh_strcasecmp(*ap, "total")) {
1280 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1287 if (!p->pm_partid || !p->pm_partno
1288 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1289 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1295 case MESSAGE_EXTERNAL:
1300 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1301 advise(ct->c_file, "unable to open for reading");
1305 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1307 if (!(p = get_content(fp, ct->c_file, 0))) {
1313 p->c_end = p->c_begin;
1318 switch (p->c_type) {
1323 if (p->c_subtype != MESSAGE_RFC822)
1328 (*p->c_ctinitfnx) (p);
1347 InitApplication(CT ct)
1350 CI ci = &ct->c_ctinfo;
1353 for (kv = SubApplication; kv->kv_key; kv++)
1354 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1356 ct->c_subtype = kv->kv_value;
1363 ** TRANSFER ENCODINGS
1367 init_encoding(CT ct, OpenCEFunc openfnx)
1371 ce = mh_xcalloc(1, sizeof(*ce));
1374 ct->c_ceopenfnx = openfnx;
1375 ct->c_ceclosefnx = close_encoding;
1376 ct->c_cesizefnx = size_encoding;
1383 close_encoding(CT ct)
1387 if (!(ce = ct->c_cefile))
1397 static unsigned long
1398 size_encoding(CT ct)
1406 if (!(ce = ct->c_cefile))
1407 return (ct->c_end - ct->c_begin);
1409 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1410 return (long) st.st_size;
1413 if (stat(ce->ce_file, &st) != NOTOK)
1414 return (long) st.st_size;
1419 if (ct->c_encoding == CE_EXTERNAL)
1420 return (ct->c_end - ct->c_begin);
1423 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1424 return (ct->c_end - ct->c_begin);
1426 if (fstat(fd, &st) != NOTOK)
1427 size = (long) st.st_size;
1431 (*ct->c_ceclosefnx) (ct);
1440 static unsigned char b642nib[0x80] = {
1441 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1446 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1447 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1448 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1449 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1450 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1451 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1452 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1453 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1454 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1455 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1456 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1463 return init_encoding(ct, openBase64);
1468 openBase64(CT ct, char **file)
1471 int fd, len, skip, own_ct_fp = 0;
1473 unsigned char value, *b, *b1, *b2, *b3;
1474 unsigned char *cp, *ep;
1475 char buffer[BUFSIZ];
1476 /* sbeck -- handle suffixes */
1480 b = (unsigned char *) &bits;
1481 b1 = &b[endian > 0 ? 1 : 2];
1482 b2 = &b[endian > 0 ? 2 : 1];
1483 b3 = &b[endian > 0 ? 3 : 0];
1487 fseek(ce->ce_fp, 0L, SEEK_SET);
1492 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1493 content_error(ce->ce_file, ct,
1494 "unable to fopen for reading");
1500 if (*file == NULL) {
1501 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1504 ce->ce_file = mh_xstrdup(*file);
1508 /* sbeck@cise.ufl.edu -- handle suffixes */
1510 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1511 invo_name, ci->ci_type, ci->ci_subtype);
1512 cp = context_find(buffer);
1513 if (cp == NULL || *cp == '\0') {
1514 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1516 cp = context_find(buffer);
1518 if (cp != NULL && *cp != '\0') {
1519 if (ce->ce_unlink) {
1521 ** Temporary file already exists, so we rename to
1522 ** version with extension.
1524 char *file_org = mh_xstrdup(ce->ce_file);
1525 ce->ce_file = add(cp, ce->ce_file);
1526 if (rename(file_org, ce->ce_file)) {
1527 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1530 mh_free0(&file_org);
1533 ce->ce_file = add(cp, ce->ce_file);
1537 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1538 content_error(ce->ce_file, ct,
1539 "unable to fopen for reading/writing");
1543 if ((len = ct->c_end - ct->c_begin) < 0)
1544 adios(EX_SOFTWARE, NULL, "internal error(1)");
1547 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1548 content_error(ct->c_file, ct,
1549 "unable to open for reading");
1559 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1561 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1563 content_error(ct->c_file, ct, "error reading from");
1567 content_error(NULL, ct, "premature eof");
1575 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1580 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1582 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1584 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1588 bits |= value << bitno;
1590 if ((bitno -= 6) < 0) {
1591 putc((char) *b1, ce->ce_fp);
1593 putc((char) *b2, ce->ce_fp);
1595 putc((char) *b3, ce->ce_fp);
1599 if (ferror(ce->ce_fp)) {
1600 content_error(ce->ce_file, ct,
1601 "error writing to");
1604 bitno = 18, bits = 0L, skip = 0;
1610 goto self_delimiting;
1619 fprintf(stderr, "premature ending (bitno %d)\n",
1622 content_error(NULL, ct, "invalid BASE64 encoding");
1627 fseek(ct->c_fp, 0L, SEEK_SET);
1629 if (fflush(ce->ce_fp)) {
1630 content_error(ce->ce_file, ct, "error writing to");
1634 fseek(ce->ce_fp, 0L, SEEK_SET);
1637 *file = ce->ce_file;
1642 return fileno(ce->ce_fp);
1645 free_encoding(ct, 0);
1658 static char hex2nib[0x80] = {
1659 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1660 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1661 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1662 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1663 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1664 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1666 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1667 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1668 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1672 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1681 return init_encoding(ct, openQuoted);
1686 openQuoted(CT ct, char **file)
1688 int cc, len, quoted, own_ct_fp = 0;
1689 unsigned char *cp, *ep;
1690 char buffer[BUFSIZ];
1691 unsigned char mask = 0;
1693 /* sbeck -- handle suffixes */
1698 fseek(ce->ce_fp, 0L, SEEK_SET);
1703 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1704 content_error(ce->ce_file, ct,
1705 "unable to fopen for reading");
1711 if (*file == NULL) {
1712 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1715 ce->ce_file = mh_xstrdup(*file);
1719 /* sbeck@cise.ufl.edu -- handle suffixes */
1721 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1722 invo_name, ci->ci_type, ci->ci_subtype);
1723 cp = context_find(buffer);
1724 if (cp == NULL || *cp == '\0') {
1725 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1727 cp = context_find(buffer);
1729 if (cp != NULL && *cp != '\0') {
1730 if (ce->ce_unlink) {
1732 ** Temporary file already exists, so we rename to
1733 ** version with extension.
1735 char *file_org = mh_xstrdup(ce->ce_file);
1736 ce->ce_file = add(cp, ce->ce_file);
1737 if (rename(file_org, ce->ce_file)) {
1738 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1741 mh_free0(&file_org);
1744 ce->ce_file = add(cp, ce->ce_file);
1748 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1749 content_error(ce->ce_file, ct,
1750 "unable to fopen for reading/writing");
1754 if ((len = ct->c_end - ct->c_begin) < 0)
1755 adios(EX_SOFTWARE, NULL, "internal error(2)");
1758 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1759 content_error(ct->c_file, ct,
1760 "unable to open for reading");
1768 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1770 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1771 content_error(NULL, ct, "premature eof");
1775 if ((cc = strlen(buffer)) > len)
1779 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1784 for (; cp < ep; cp++) {
1786 /* in an escape sequence */
1788 /* at byte 1 of an escape sequence */
1789 mask = hex2nib[*cp & 0x7f];
1790 /* next is byte 2 */
1793 /* at byte 2 of an escape sequence */
1795 mask |= hex2nib[*cp & 0x7f];
1796 putc(mask, ce->ce_fp);
1797 if (ferror(ce->ce_fp)) {
1798 content_error(ce->ce_file, ct, "error writing to");
1802 ** finished escape sequence; next may
1803 ** be literal or a new escape sequence
1807 /* on to next byte */
1811 /* not in an escape sequence */
1814 ** starting an escape sequence,
1817 if (cp + 1 < ep && cp[1] == '\n') {
1818 /* "=\n" soft line break, eat the \n */
1822 if (cp + 1 >= ep || cp + 2 >= ep) {
1824 ** We don't have 2 bytes left,
1825 ** so this is an invalid escape
1826 ** sequence; just show the raw bytes
1829 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1831 ** Next 2 bytes are hex digits,
1832 ** making this a valid escape
1833 ** sequence; let's decode it (above).
1839 ** One or both of the next 2 is
1840 ** out of range, making this an
1841 ** invalid escape sequence; just
1842 ** show the raw bytes (below).
1847 /* Just show the raw byte. */
1848 putc(*cp, ce->ce_fp);
1849 if (ferror(ce->ce_fp)) {
1850 content_error(ce->ce_file, ct,
1851 "error writing to");
1857 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1861 fseek(ct->c_fp, 0L, SEEK_SET);
1863 if (fflush(ce->ce_fp)) {
1864 content_error(ce->ce_file, ct, "error writing to");
1868 fseek(ce->ce_fp, 0L, SEEK_SET);
1871 *file = ce->ce_file;
1876 return fileno(ce->ce_fp);
1879 free_encoding(ct, 0);
1895 if (init_encoding(ct, open7Bit) == NOTOK)
1898 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1904 open7Bit(CT ct, char **file)
1906 int cc, fd, len, own_ct_fp = 0;
1907 char buffer[BUFSIZ];
1908 /* sbeck -- handle suffixes */
1915 fseek(ce->ce_fp, 0L, SEEK_SET);
1920 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1921 content_error(ce->ce_file, ct,
1922 "unable to fopen for reading");
1928 if (*file == NULL) {
1929 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1932 ce->ce_file = mh_xstrdup(*file);
1936 /* sbeck@cise.ufl.edu -- handle suffixes */
1938 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1939 invo_name, ci->ci_type, ci->ci_subtype);
1940 cp = context_find(buffer);
1941 if (cp == NULL || *cp == '\0') {
1942 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1944 cp = context_find(buffer);
1946 if (cp != NULL && *cp != '\0') {
1947 if (ce->ce_unlink) {
1949 ** Temporary file already exists, so we rename to
1950 ** version with extension.
1952 char *file_org = mh_xstrdup(ce->ce_file);
1953 ce->ce_file = add(cp, ce->ce_file);
1954 if (rename(file_org, ce->ce_file)) {
1955 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1958 mh_free0(&file_org);
1961 ce->ce_file = add(cp, ce->ce_file);
1965 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1966 content_error(ce->ce_file, ct,
1967 "unable to fopen for reading/writing");
1971 if (ct->c_type == CT_MULTIPART) {
1973 CI ci = &ct->c_ctinfo;
1976 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1978 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1979 strlen(ci->ci_subtype);
1980 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1981 putc(';', ce->ce_fp);
1984 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1987 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1988 fputs("\n\t", ce->ce_fp);
1991 putc(' ', ce->ce_fp);
1994 fprintf(ce->ce_fp, "%s", buffer);
1998 if (ci->ci_comment) {
1999 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2001 fputs("\n\t", ce->ce_fp);
2004 putc(' ', ce->ce_fp);
2007 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2010 fprintf(ce->ce_fp, "\n");
2012 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2014 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2016 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2017 fprintf(ce->ce_fp, "\n");
2020 if ((len = ct->c_end - ct->c_begin) < 0)
2021 adios(EX_SOFTWARE, NULL, "internal error(3)");
2024 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2025 content_error(ct->c_file, ct,
2026 "unable to open for reading");
2032 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2034 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2036 content_error(ct->c_file, ct, "error reading from");
2040 content_error(NULL, ct, "premature eof");
2048 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2049 if (ferror(ce->ce_fp)) {
2050 content_error(ce->ce_file, ct,
2051 "error writing to");
2056 fseek(ct->c_fp, 0L, SEEK_SET);
2058 if (fflush(ce->ce_fp)) {
2059 content_error(ce->ce_file, ct, "error writing to");
2063 fseek(ce->ce_fp, 0L, SEEK_SET);
2066 *file = ce->ce_file;
2071 return fileno(ce->ce_fp);
2074 free_encoding(ct, 0);