2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
259 /* add the header data to the list */
260 add_header(ct, mh_xstrdup(f.name), mh_xstrdup(f.value));
262 ct->c_begin = ftell(in) + 1;
266 ct->c_begin = ftell(in) - strlen(f.value);
270 ct->c_begin = ftell(in);
274 advise(NULL, "message format error in component #%d", compnum);
279 adios(EX_IOERR, "m_getfld2", "io error");
282 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
288 ** Read the content headers. We will parse the
289 ** MIME related header fields into their various
290 ** structures and set internal flags related to
291 ** content type/subtype, etc.
294 hp = ct->c_first_hf; /* start at first header field */
296 /* Get MIME-Version field */
297 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
300 unsigned char *cp, *dp;
303 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
306 ct->c_vrsn = mh_xstrdup(hp->value);
308 /* Now, cleanup this field */
313 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
315 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
320 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
322 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
325 for (dp = cp; istoken(*dp); dp++)
329 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
332 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
335 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
336 /* Get Content-Type field */
337 struct str2init *s2i;
338 CI ci = &ct->c_ctinfo;
340 /* Check if we've already seen a Content-Type header */
342 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
346 /* Parse the Content-Type field */
347 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
351 ** Set the Init function and the internal
352 ** flag for this content type.
354 for (s2i = str2cts; s2i->si_key; s2i++)
355 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
357 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
359 ct->c_type = s2i->si_val;
360 ct->c_ctinitfnx = s2i->si_init;
362 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
363 /* Get Content-Transfer-Encoding field */
365 unsigned char *cp, *dp;
366 struct str2init *s2i;
369 ** Check if we've already seen the
370 ** Content-Transfer-Encoding field
373 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
377 /* get copy of this field */
378 ct->c_celine = cp = mh_xstrdup(hp->value);
382 for (dp = cp; istoken(*dp); dp++)
388 ** Find the internal flag and Init function
389 ** for this transfer encoding.
391 for (s2i = str2ces; s2i->si_key; s2i++)
392 if (!mh_strcasecmp(cp, s2i->si_key))
394 if (!s2i->si_key && !uprf(cp, "X-"))
397 ct->c_encoding = s2i->si_val;
399 /* Call the Init function for this encoding */
400 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
403 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
404 /* Get Content-ID field */
405 ct->c_id = add(hp->value, ct->c_id);
407 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
408 /* Get Content-Description field */
409 ct->c_descr = add(hp->value, ct->c_descr);
411 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
412 /* Get Content-Disposition field */
413 ct->c_dispo = add(hp->value, ct->c_dispo);
417 hp = hp->next; /* next header field */
421 ** Check if we saw a Content-Type field.
422 ** If not, then assign a default value for
423 ** it, and the Init function.
427 ** If we are inside a multipart/digest message,
428 ** so default type is message/rfc822
431 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
433 ct->c_type = CT_MESSAGE;
434 ct->c_ctinitfnx = InitMessage;
437 ** Else default type is text/plain
439 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
441 ct->c_type = CT_TEXT;
442 ct->c_ctinitfnx = InitText;
446 /* Use default Transfer-Encoding, if necessary */
448 ct->c_encoding = CE_7BIT;
461 ** small routine to add header field to list
465 add_header(CT ct, char *name, char *value)
469 /* allocate header field structure */
470 hp = mh_xcalloc(1, sizeof(*hp));
472 /* link data into header structure */
477 /* link header structure into the list */
478 if (ct->c_first_hf == NULL) {
479 ct->c_first_hf = hp; /* this is the first */
482 ct->c_last_hf->next = hp; /* add it to the end */
491 ** Make sure that buf contains at least one appearance of name,
492 ** followed by =. If not, insert both name and value, just after
493 ** first semicolon, if any. Note that name should not contain a
494 ** trailing =. And quotes will be added around the value. Typical
495 ** usage: make sure that a Content-Disposition header contains
496 ** filename="foo". If it doesn't and value does, use value from
500 incl_name_value(unsigned char *buf, char *name, char *value) {
503 /* Assume that name is non-null. */
505 char *name_plus_equal = concat(name, "=", NULL);
507 if (!strstr(buf, name_plus_equal)) {
510 char *prefix, *suffix;
512 /* Trim trailing space, esp. newline. */
513 for (cp = &buf[strlen(buf) - 1];
514 cp >= buf && isspace(*cp); --cp) {
518 insertion = concat("; ", name, "=", "\"", value, "\"",
522 ** Insert at first semicolon, if any.
523 ** If none, append to end.
525 prefix = mh_xstrdup(buf);
526 if ((cp = strchr(prefix, ';'))) {
527 suffix = concat(cp, NULL);
529 newbuf = concat(prefix, insertion, suffix,
534 newbuf = concat(buf, insertion, "\n", NULL);
538 mh_free0(&insertion);
542 mh_free0(&name_plus_equal);
549 ** Extract just name_suffix="foo", if any, from value. If there isn't
550 ** one, return the entire value. Note that, for example, a name_suffix
551 ** of name will match filename="foo", and return foo.
554 extract_name_value(char *name_suffix, char *value) {
555 char *extracted_name_value;
556 char *name_suffix_plus_quote;
557 char *name_suffix_equals;
563 extracted_name_value = value;
564 name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
565 name_suffix_equals = strstr(value, name_suffix_plus_quote);
566 mh_free0(&name_suffix_plus_quote);
567 if (name_suffix_equals) {
568 char *name_suffix_begin;
571 for (cp = name_suffix_equals; *cp != '"'; ++cp)
573 name_suffix_begin = ++cp;
574 /* Find second \". */
575 for (; *cp != '"'; ++cp)
578 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
579 memcpy(extracted_name_value, name_suffix_begin,
580 cp - name_suffix_begin);
581 extracted_name_value[cp - name_suffix_begin] = '\0';
584 return extracted_name_value;
588 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
589 ** directives. Fills in the information of the CTinfo structure.
592 get_ctinfo(unsigned char *cp, CT ct, int magic)
601 i = strlen(invo_name) + 2;
603 /* store copy of Content-Type line */
604 cp = ct->c_ctline = mh_xstrdup(cp);
606 while (isspace(*cp)) /* trim leading spaces */
609 /* change newlines to spaces */
610 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
613 /* trim trailing spaces */
614 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
620 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
622 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
625 for (dp = cp; istoken(*dp); dp++)
628 ci->ci_type = mh_xstrdup(cp); /* store content type */
632 advise(NULL, "invalid %s: field in message %s (empty type)",
633 TYPE_FIELD, ct->c_file);
637 /* down case the content type string */
638 for (dp = ci->ci_type; *dp; dp++)
639 if (isalpha(*dp) && isupper(*dp))
645 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
650 ci->ci_subtype = mh_xstrdup("");
658 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
661 for (dp = cp; istoken(*dp); dp++)
664 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
667 if (!*ci->ci_subtype) {
668 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
672 /* down case the content subtype string */
673 for (dp = ci->ci_subtype; *dp; dp++)
674 if (isalpha(*dp) && isupper(*dp))
681 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
685 ** Parse attribute/value pairs given with Content-Type
687 ep = (ap = ci->ci_attrs) + NPARMS;
693 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
701 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
705 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
709 /* down case the attribute name */
710 for (dp = cp; istoken(*dp); dp++)
711 if (isalpha(*dp) && isupper(*dp))
714 for (up = dp; isspace(*dp);)
716 if (dp == cp || *dp != '=') {
717 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
721 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
723 for (dp++; isspace(*dp);)
726 /* now add the attribute value */
727 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
730 for (cp = ++dp, dp = vp;;) {
734 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
739 if ((c = *cp++) == '\0')
754 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
759 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
760 *ci->ci_values[ap - ci->ci_attrs] = '\0';
761 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
769 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
774 ** Get any <Content-Id> given in buffer
776 if (magic && *cp == '<') {
778 mh_free0(&(ct->c_id));
780 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
781 advise(NULL, "invalid ID in message %s", ct->c_file);
787 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
798 ** Get any [Content-Description] given in buffer.
800 if (magic && *cp == '[') {
802 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
806 advise(NULL, "invalid description in message %s",
815 ct->c_descr = concat(ct->c_descr, "\n", NULL);
826 ** Get any {Content-Disposition} given in buffer.
828 if (magic && *cp == '{') {
830 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
834 advise(NULL, "invalid disposition in message %s",
843 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
854 ** Check if anything is left over
858 ci->ci_magic = mh_xstrdup(cp);
861 ** If there is a Content-Disposition header and
862 ** it doesn't have a *filename=, extract it from
863 ** the magic contents. The mhbasename call skips
864 ** any leading directory components.
867 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
869 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
877 get_comment(CT ct, unsigned char **ap, int istype)
882 char c, buffer[BUFSIZ], *dp;
894 advise(NULL, "invalid comment in message %s's %s: field",
895 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
900 if ((c = *cp++) == '\0')
923 if ((dp = ci->ci_comment)) {
924 ci->ci_comment = concat(dp, " ", buffer, NULL);
927 ci->ci_comment = mh_xstrdup(buffer);
942 ** Handles content types audio, image, and video.
943 ** There's not much to do right here.
949 return OK; /* not much to do here */
963 CI ci = &ct->c_ctinfo;
965 /* check for missing subtype */
966 if (!*ci->ci_subtype)
967 ci->ci_subtype = add("plain", ci->ci_subtype);
970 for (kv = SubText; kv->kv_key; kv++)
971 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
973 ct->c_subtype = kv->kv_value;
975 /* allocate text character set structure */
976 t = mh_xcalloc(1, sizeof(*t));
977 ct->c_ctparams = (void *) t;
979 /* scan for charset parameter */
980 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
981 if (!mh_strcasecmp(*ap, "charset"))
984 /* check if content specified a character set */
987 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
988 /* match character set or set to CHARSET_UNKNOWN */
989 for (kv = Charset; kv->kv_key; kv++) {
990 if (!mh_strcasecmp(*ep, kv->kv_key)) {
994 t->tx_charset = kv->kv_value;
996 t->tx_charset = CHARSET_UNSPECIFIED;
1008 InitMultiPart(CT ct)
1012 unsigned char *cp, *dp;
1014 char *bp, buffer[BUFSIZ];
1015 struct multipart *m;
1017 struct part *part, **next;
1018 CI ci = &ct->c_ctinfo;
1023 ** The encoding for multipart messages must be either
1024 ** 7bit, 8bit, or binary (per RFC2045).
1026 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1027 && ct->c_encoding != CE_BINARY) {
1028 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1029 ct->c_encoding = CE_7BIT;
1033 for (kv = SubMultiPart; kv->kv_key; kv++)
1034 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1036 ct->c_subtype = kv->kv_value;
1039 ** Check for "boundary" parameter, which is
1040 ** required for multipart messages.
1043 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1044 if (!mh_strcasecmp(*ap, "boundary")) {
1050 /* complain if boundary parameter is missing */
1052 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1056 /* allocate primary structure for multipart info */
1057 m = mh_xcalloc(1, sizeof(*m));
1058 ct->c_ctparams = (void *) m;
1060 /* check if boundary parameter contains only whitespace characters */
1061 for (cp = bp; isspace(*cp); cp++)
1064 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1068 /* remove trailing whitespace from boundary parameter */
1069 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1074 /* record boundary separators */
1075 m->mp_start = concat(bp, "\n", NULL);
1076 m->mp_stop = concat(bp, "--\n", NULL);
1078 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1079 advise(ct->c_file, "unable to open for reading");
1083 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1085 next = &m->mp_parts;
1089 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1093 pos += strlen(buffer);
1094 if (buffer[0] != '-' || buffer[1] != '-')
1097 if (strcmp(buffer + 2, m->mp_start)!=0)
1100 part = mh_xcalloc(1, sizeof(*part));
1102 next = &part->mp_next;
1104 if (!(p = get_content(fp, ct->c_file,
1105 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1112 fseek(fp, pos, SEEK_SET);
1115 if (strcmp(buffer + 2, m->mp_start) == 0) {
1119 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1120 if (p->c_end < p->c_begin)
1121 p->c_begin = p->c_end;
1126 if (strcmp(buffer + 2, m->mp_stop) == 0)
1132 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1133 if (!inout && part) {
1135 p->c_end = ct->c_end;
1137 if (p->c_begin >= p->c_end) {
1138 for (next = &m->mp_parts; *next != part;
1139 next = &((*next)->mp_next))
1148 /* reverse the order of the parts for multipart/alternative */
1149 if (ct->c_subtype == MULTI_ALTERNATE)
1153 ** label all subparts with part number, and
1154 ** then initialize the content of the subpart.
1159 char partnam[BUFSIZ];
1162 snprintf(partnam, sizeof(partnam), "%s.",
1164 pp = partnam + strlen(partnam);
1169 for (part = m->mp_parts, partnum = 1; part;
1170 part = part->mp_next, partnum++) {
1173 sprintf(pp, "%d", partnum);
1174 p->c_partno = mh_xstrdup(partnam);
1176 /* initialize the content of the subparts */
1177 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1192 ** reverse the order of the parts of a multipart
1196 reverse_parts(CT ct)
1199 struct multipart *m;
1200 struct part **base, **bmp, **next, *part;
1202 m = (struct multipart *) ct->c_ctparams;
1204 /* if only one part, just return */
1205 if (!m->mp_parts || !m->mp_parts->mp_next)
1208 /* count number of parts */
1210 for (part = m->mp_parts; part; part = part->mp_next)
1213 /* allocate array of pointers to the parts */
1214 base = mh_xcalloc(i + 1, sizeof(*base));
1217 /* point at all the parts */
1218 for (part = m->mp_parts; part; part = part->mp_next)
1222 /* reverse the order of the parts */
1223 next = &m->mp_parts;
1224 for (bmp--; bmp >= base; bmp--) {
1227 next = &part->mp_next;
1231 /* free array of pointers */
1244 CI ci = &ct->c_ctinfo;
1246 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT) && (ct->c_encoding != CE_BINARY)) {
1247 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1248 ct->c_encoding = CE_7BIT;
1251 /* check for missing subtype */
1252 if (!*ci->ci_subtype)
1253 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1256 for (kv = SubMessage; kv->kv_key; kv++)
1257 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1259 ct->c_subtype = kv->kv_value;
1261 switch (ct->c_subtype) {
1262 case MESSAGE_RFC822:
1265 case MESSAGE_PARTIAL:
1270 p = mh_xcalloc(1, sizeof(*p));
1271 ct->c_ctparams = (void *) p;
1274 ** scan for parameters "id", "number",
1277 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1278 if (!mh_strcasecmp(*ap, "id")) {
1279 p->pm_partid = mh_xstrdup(*ep);
1282 if (!mh_strcasecmp(*ap, "number")) {
1283 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1285 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1290 if (!mh_strcasecmp(*ap, "total")) {
1291 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1298 if (!p->pm_partid || !p->pm_partno
1299 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1300 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1306 case MESSAGE_EXTERNAL:
1311 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1312 advise(ct->c_file, "unable to open for reading");
1316 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1318 if (!(p = get_content(fp, ct->c_file, 0))) {
1324 p->c_end = p->c_begin;
1329 switch (p->c_type) {
1334 if (p->c_subtype != MESSAGE_RFC822)
1339 (*p->c_ctinitfnx) (p);
1358 InitApplication(CT ct)
1361 CI ci = &ct->c_ctinfo;
1364 for (kv = SubApplication; kv->kv_key; kv++)
1365 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1367 ct->c_subtype = kv->kv_value;
1374 ** TRANSFER ENCODINGS
1378 init_encoding(CT ct, OpenCEFunc openfnx)
1382 ce = mh_xcalloc(1, sizeof(*ce));
1385 ct->c_ceopenfnx = openfnx;
1386 ct->c_ceclosefnx = close_encoding;
1387 ct->c_cesizefnx = size_encoding;
1394 close_encoding(CT ct)
1398 if (!(ce = ct->c_cefile))
1408 static unsigned long
1409 size_encoding(CT ct)
1417 if (!(ce = ct->c_cefile))
1418 return (ct->c_end - ct->c_begin);
1420 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1421 return (long) st.st_size;
1424 if (stat(ce->ce_file, &st) != NOTOK)
1425 return (long) st.st_size;
1430 if (ct->c_encoding == CE_EXTERNAL)
1431 return (ct->c_end - ct->c_begin);
1434 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1435 return (ct->c_end - ct->c_begin);
1437 if (fstat(fd, &st) != NOTOK)
1438 size = (long) st.st_size;
1442 (*ct->c_ceclosefnx) (ct);
1451 static unsigned char b642nib[0x80] = {
1452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1457 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1458 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1459 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1460 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1461 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1462 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1463 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1464 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1465 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1466 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1467 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1474 return init_encoding(ct, openBase64);
1479 openBase64(CT ct, char **file)
1482 int fd, len, skip, own_ct_fp = 0;
1484 unsigned char value, *b, *b1, *b2, *b3;
1485 unsigned char *cp, *ep;
1486 char buffer[BUFSIZ];
1487 /* sbeck -- handle suffixes */
1491 b = (unsigned char *) &bits;
1492 b1 = &b[endian > 0 ? 1 : 2];
1493 b2 = &b[endian > 0 ? 2 : 1];
1494 b3 = &b[endian > 0 ? 3 : 0];
1498 fseek(ce->ce_fp, 0L, SEEK_SET);
1503 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1504 content_error(ce->ce_file, ct,
1505 "unable to fopen for reading");
1511 if (*file == NULL) {
1512 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1515 ce->ce_file = mh_xstrdup(*file);
1519 /* sbeck@cise.ufl.edu -- handle suffixes */
1521 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1522 invo_name, ci->ci_type, ci->ci_subtype);
1523 cp = context_find(buffer);
1524 if (cp == NULL || *cp == '\0') {
1525 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1527 cp = context_find(buffer);
1529 if (cp != NULL && *cp != '\0') {
1530 if (ce->ce_unlink) {
1532 ** Temporary file already exists, so we rename to
1533 ** version with extension.
1535 char *file_org = mh_xstrdup(ce->ce_file);
1536 ce->ce_file = add(cp, ce->ce_file);
1537 if (rename(file_org, ce->ce_file)) {
1538 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1541 mh_free0(&file_org);
1544 ce->ce_file = add(cp, ce->ce_file);
1548 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1549 content_error(ce->ce_file, ct,
1550 "unable to fopen for reading/writing");
1554 if ((len = ct->c_end - ct->c_begin) < 0)
1555 adios(EX_SOFTWARE, NULL, "internal error(1)");
1558 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1559 content_error(ct->c_file, ct,
1560 "unable to open for reading");
1570 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1572 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1574 content_error(ct->c_file, ct, "error reading from");
1578 content_error(NULL, ct, "premature eof");
1586 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1591 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1593 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1595 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1599 bits |= value << bitno;
1601 if ((bitno -= 6) < 0) {
1602 putc((char) *b1, ce->ce_fp);
1604 putc((char) *b2, ce->ce_fp);
1606 putc((char) *b3, ce->ce_fp);
1610 if (ferror(ce->ce_fp)) {
1611 content_error(ce->ce_file, ct,
1612 "error writing to");
1615 bitno = 18, bits = 0L, skip = 0;
1621 goto self_delimiting;
1630 fprintf(stderr, "premature ending (bitno %d)\n",
1633 content_error(NULL, ct, "invalid BASE64 encoding");
1638 fseek(ct->c_fp, 0L, SEEK_SET);
1640 if (fflush(ce->ce_fp)) {
1641 content_error(ce->ce_file, ct, "error writing to");
1645 fseek(ce->ce_fp, 0L, SEEK_SET);
1648 *file = ce->ce_file;
1653 return fileno(ce->ce_fp);
1656 free_encoding(ct, 0);
1669 static char hex2nib[0x80] = {
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1677 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1692 return init_encoding(ct, openQuoted);
1697 openQuoted(CT ct, char **file)
1699 int cc, len, quoted, own_ct_fp = 0;
1700 unsigned char *cp, *ep;
1701 char buffer[BUFSIZ];
1702 unsigned char mask = 0;
1704 /* sbeck -- handle suffixes */
1709 fseek(ce->ce_fp, 0L, SEEK_SET);
1714 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1715 content_error(ce->ce_file, ct,
1716 "unable to fopen for reading");
1722 if (*file == NULL) {
1723 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1726 ce->ce_file = mh_xstrdup(*file);
1730 /* sbeck@cise.ufl.edu -- handle suffixes */
1732 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1733 invo_name, ci->ci_type, ci->ci_subtype);
1734 cp = context_find(buffer);
1735 if (cp == NULL || *cp == '\0') {
1736 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1738 cp = context_find(buffer);
1740 if (cp != NULL && *cp != '\0') {
1741 if (ce->ce_unlink) {
1743 ** Temporary file already exists, so we rename to
1744 ** version with extension.
1746 char *file_org = mh_xstrdup(ce->ce_file);
1747 ce->ce_file = add(cp, ce->ce_file);
1748 if (rename(file_org, ce->ce_file)) {
1749 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1752 mh_free0(&file_org);
1755 ce->ce_file = add(cp, ce->ce_file);
1759 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1760 content_error(ce->ce_file, ct,
1761 "unable to fopen for reading/writing");
1765 if ((len = ct->c_end - ct->c_begin) < 0)
1766 adios(EX_SOFTWARE, NULL, "internal error(2)");
1769 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1770 content_error(ct->c_file, ct,
1771 "unable to open for reading");
1779 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1781 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1782 content_error(NULL, ct, "premature eof");
1786 if ((cc = strlen(buffer)) > len)
1790 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1795 for (; cp < ep; cp++) {
1797 /* in an escape sequence */
1799 /* at byte 1 of an escape sequence */
1800 mask = hex2nib[*cp & 0x7f];
1801 /* next is byte 2 */
1804 /* at byte 2 of an escape sequence */
1806 mask |= hex2nib[*cp & 0x7f];
1807 putc(mask, ce->ce_fp);
1808 if (ferror(ce->ce_fp)) {
1809 content_error(ce->ce_file, ct, "error writing to");
1813 ** finished escape sequence; next may
1814 ** be literal or a new escape sequence
1818 /* on to next byte */
1822 /* not in an escape sequence */
1825 ** starting an escape sequence,
1828 if (cp + 1 < ep && cp[1] == '\n') {
1829 /* "=\n" soft line break, eat the \n */
1833 if (cp + 1 >= ep || cp + 2 >= ep) {
1835 ** We don't have 2 bytes left,
1836 ** so this is an invalid escape
1837 ** sequence; just show the raw bytes
1840 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1842 ** Next 2 bytes are hex digits,
1843 ** making this a valid escape
1844 ** sequence; let's decode it (above).
1850 ** One or both of the next 2 is
1851 ** out of range, making this an
1852 ** invalid escape sequence; just
1853 ** show the raw bytes (below).
1858 /* Just show the raw byte. */
1859 putc(*cp, ce->ce_fp);
1860 if (ferror(ce->ce_fp)) {
1861 content_error(ce->ce_file, ct,
1862 "error writing to");
1868 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1872 fseek(ct->c_fp, 0L, SEEK_SET);
1874 if (fflush(ce->ce_fp)) {
1875 content_error(ce->ce_file, ct, "error writing to");
1879 fseek(ce->ce_fp, 0L, SEEK_SET);
1882 *file = ce->ce_file;
1887 return fileno(ce->ce_fp);
1890 free_encoding(ct, 0);
1906 if (init_encoding(ct, open7Bit) == NOTOK)
1909 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1915 open7Bit(CT ct, char **file)
1917 int cc, fd, len, own_ct_fp = 0;
1918 char buffer[BUFSIZ];
1919 /* sbeck -- handle suffixes */
1926 fseek(ce->ce_fp, 0L, SEEK_SET);
1931 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1932 content_error(ce->ce_file, ct,
1933 "unable to fopen for reading");
1939 if (*file == NULL) {
1940 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1943 ce->ce_file = mh_xstrdup(*file);
1947 /* sbeck@cise.ufl.edu -- handle suffixes */
1949 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1950 invo_name, ci->ci_type, ci->ci_subtype);
1951 cp = context_find(buffer);
1952 if (cp == NULL || *cp == '\0') {
1953 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1955 cp = context_find(buffer);
1957 if (cp != NULL && *cp != '\0') {
1958 if (ce->ce_unlink) {
1960 ** Temporary file already exists, so we rename to
1961 ** version with extension.
1963 char *file_org = mh_xstrdup(ce->ce_file);
1964 ce->ce_file = add(cp, ce->ce_file);
1965 if (rename(file_org, ce->ce_file)) {
1966 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1969 mh_free0(&file_org);
1972 ce->ce_file = add(cp, ce->ce_file);
1976 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1977 content_error(ce->ce_file, ct,
1978 "unable to fopen for reading/writing");
1982 if (ct->c_type == CT_MULTIPART) {
1984 CI ci = &ct->c_ctinfo;
1987 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1989 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1990 strlen(ci->ci_subtype);
1991 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1992 putc(';', ce->ce_fp);
1995 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1998 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1999 fputs("\n\t", ce->ce_fp);
2002 putc(' ', ce->ce_fp);
2005 fprintf(ce->ce_fp, "%s", buffer);
2009 if (ci->ci_comment) {
2010 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2012 fputs("\n\t", ce->ce_fp);
2015 putc(' ', ce->ce_fp);
2018 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2021 fprintf(ce->ce_fp, "\n");
2023 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2025 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2027 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2028 fprintf(ce->ce_fp, "\n");
2031 if ((len = ct->c_end - ct->c_begin) < 0)
2032 adios(EX_SOFTWARE, NULL, "internal error(3)");
2035 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2036 content_error(ct->c_file, ct,
2037 "unable to open for reading");
2043 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2045 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2047 content_error(ct->c_file, ct, "error reading from");
2051 content_error(NULL, ct, "premature eof");
2059 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2060 if (ferror(ce->ce_fp)) {
2061 content_error(ce->ce_file, ct,
2062 "error writing to");
2067 fseek(ct->c_fp, 0L, SEEK_SET);
2069 if (fflush(ce->ce_fp)) {
2070 content_error(ce->ce_file, ct, "error writing to");
2074 fseek(ce->ce_fp, 0L, SEEK_SET);
2077 *file = ce->ce_file;
2082 return fileno(ce->ce_fp);
2085 free_encoding(ct, 0);