2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 char buf[BUFSIZ], name[NAMESZ];
240 /* allocate the content structure */
241 ct = mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = mh_xstrdup(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD;;) {
252 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
257 /* get copies of the buffers */
258 np = mh_xstrdup(name);
259 vp = mh_xstrdup(buf);
261 /* if necessary, get rest of field */
262 while (state == FLDPLUS) {
263 state = m_getfld(state, name, buf,
265 vp = add(buf, vp); /* add to previous value */
268 /* Now add the header data to the list */
269 add_header(ct, np, vp);
271 ct->c_begin = ftell(in) + 1;
275 ct->c_begin = ftell(in) - strlen(buf);
279 ct->c_begin = ftell(in);
284 adios(EX_DATAERR, NULL, "message format error in component #%d",
288 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
291 /* break out of the loop */
296 ** Read the content headers. We will parse the
297 ** MIME related header fields into their various
298 ** structures and set internal flags related to
299 ** content type/subtype, etc.
302 hp = ct->c_first_hf; /* start at first header field */
304 /* Get MIME-Version field */
305 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
308 unsigned char *cp, *dp;
311 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
314 ct->c_vrsn = mh_xstrdup(hp->value);
316 /* Now, cleanup this field */
321 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
323 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
328 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
330 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
333 for (dp = cp; istoken(*dp); dp++)
337 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
340 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
343 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
344 /* Get Content-Type field */
345 struct str2init *s2i;
346 CI ci = &ct->c_ctinfo;
348 /* Check if we've already seen a Content-Type header */
350 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
354 /* Parse the Content-Type field */
355 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
359 ** Set the Init function and the internal
360 ** flag for this content type.
362 for (s2i = str2cts; s2i->si_key; s2i++)
363 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
365 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
367 ct->c_type = s2i->si_val;
368 ct->c_ctinitfnx = s2i->si_init;
370 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
371 /* Get Content-Transfer-Encoding field */
373 unsigned char *cp, *dp;
374 struct str2init *s2i;
377 ** Check if we've already seen the
378 ** Content-Transfer-Encoding field
381 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
385 /* get copy of this field */
386 ct->c_celine = cp = mh_xstrdup(hp->value);
390 for (dp = cp; istoken(*dp); dp++)
396 ** Find the internal flag and Init function
397 ** for this transfer encoding.
399 for (s2i = str2ces; s2i->si_key; s2i++)
400 if (!mh_strcasecmp(cp, s2i->si_key))
402 if (!s2i->si_key && !uprf(cp, "X-"))
405 ct->c_encoding = s2i->si_val;
407 /* Call the Init function for this encoding */
408 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
411 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
412 /* Get Content-ID field */
413 ct->c_id = add(hp->value, ct->c_id);
415 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
416 /* Get Content-Description field */
417 ct->c_descr = add(hp->value, ct->c_descr);
419 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
420 /* Get Content-Disposition field */
421 ct->c_dispo = add(hp->value, ct->c_dispo);
425 hp = hp->next; /* next header field */
429 ** Check if we saw a Content-Type field.
430 ** If not, then assign a default value for
431 ** it, and the Init function.
435 ** If we are inside a multipart/digest message,
436 ** so default type is message/rfc822
439 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
441 ct->c_type = CT_MESSAGE;
442 ct->c_ctinitfnx = InitMessage;
445 ** Else default type is text/plain
447 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
449 ct->c_type = CT_TEXT;
450 ct->c_ctinitfnx = InitText;
454 /* Use default Transfer-Encoding, if necessary */
456 ct->c_encoding = CE_7BIT;
469 ** small routine to add header field to list
473 add_header(CT ct, char *name, char *value)
477 /* allocate header field structure */
478 hp = mh_xcalloc(1, sizeof(*hp));
480 /* link data into header structure */
485 /* link header structure into the list */
486 if (ct->c_first_hf == NULL) {
487 ct->c_first_hf = hp; /* this is the first */
490 ct->c_last_hf->next = hp; /* add it to the end */
499 ** Make sure that buf contains at least one appearance of name,
500 ** followed by =. If not, insert both name and value, just after
501 ** first semicolon, if any. Note that name should not contain a
502 ** trailing =. And quotes will be added around the value. Typical
503 ** usage: make sure that a Content-Disposition header contains
504 ** filename="foo". If it doesn't and value does, use value from
508 incl_name_value(unsigned char *buf, char *name, char *value) {
511 /* Assume that name is non-null. */
513 char *name_plus_equal = concat(name, "=", NULL);
515 if (!strstr(buf, name_plus_equal)) {
518 char *prefix, *suffix;
520 /* Trim trailing space, esp. newline. */
521 for (cp = &buf[strlen(buf) - 1];
522 cp >= buf && isspace(*cp); --cp) {
526 insertion = concat("; ", name, "=", "\"", value, "\"",
530 ** Insert at first semicolon, if any.
531 ** If none, append to end.
533 prefix = mh_xstrdup(buf);
534 if ((cp = strchr(prefix, ';'))) {
535 suffix = concat(cp, NULL);
537 newbuf = concat(prefix, insertion, suffix,
542 newbuf = concat(buf, insertion, "\n", NULL);
546 mh_free0(&insertion);
550 mh_free0(&name_plus_equal);
557 ** Extract just name_suffix="foo", if any, from value. If there isn't
558 ** one, return the entire value. Note that, for example, a name_suffix
559 ** of name will match filename="foo", and return foo.
562 extract_name_value(char *name_suffix, char *value) {
563 char *extracted_name_value = value;
564 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
565 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
568 mh_free0(&name_suffix_plus_quote);
569 if (name_suffix_equals) {
570 char *name_suffix_begin;
573 for (cp = name_suffix_equals; *cp != '"'; ++cp)
575 name_suffix_begin = ++cp;
576 /* Find second \". */
577 for (; *cp != '"'; ++cp)
580 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
581 memcpy(extracted_name_value, name_suffix_begin,
582 cp - name_suffix_begin);
583 extracted_name_value[cp - name_suffix_begin] = '\0';
586 return extracted_name_value;
590 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
591 ** directives. Fills in the information of the CTinfo structure.
594 get_ctinfo(unsigned char *cp, CT ct, int magic)
603 i = strlen(invo_name) + 2;
605 /* store copy of Content-Type line */
606 cp = ct->c_ctline = mh_xstrdup(cp);
608 while (isspace(*cp)) /* trim leading spaces */
611 /* change newlines to spaces */
612 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
615 /* trim trailing spaces */
616 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
622 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
624 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
627 for (dp = cp; istoken(*dp); dp++)
630 ci->ci_type = mh_xstrdup(cp); /* store content type */
634 advise(NULL, "invalid %s: field in message %s (empty type)",
635 TYPE_FIELD, ct->c_file);
639 /* down case the content type string */
640 for (dp = ci->ci_type; *dp; dp++)
641 if (isalpha(*dp) && isupper(*dp))
647 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
652 ci->ci_subtype = mh_xstrdup("");
660 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
663 for (dp = cp; istoken(*dp); dp++)
666 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
669 if (!*ci->ci_subtype) {
670 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
674 /* down case the content subtype string */
675 for (dp = ci->ci_subtype; *dp; dp++)
676 if (isalpha(*dp) && isupper(*dp))
683 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
687 ** Parse attribute/value pairs given with Content-Type
689 ep = (ap = ci->ci_attrs) + NPARMS;
695 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
703 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
707 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
711 /* down case the attribute name */
712 for (dp = cp; istoken(*dp); dp++)
713 if (isalpha(*dp) && isupper(*dp))
716 for (up = dp; isspace(*dp);)
718 if (dp == cp || *dp != '=') {
719 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
723 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
725 for (dp++; isspace(*dp);)
728 /* now add the attribute value */
729 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
732 for (cp = ++dp, dp = vp;;) {
736 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
741 if ((c = *cp++) == '\0')
756 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
761 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
762 *ci->ci_values[ap - ci->ci_attrs] = '\0';
763 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
771 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
776 ** Get any <Content-Id> given in buffer
778 if (magic && *cp == '<') {
780 mh_free0(&(ct->c_id));
782 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
783 advise(NULL, "invalid ID in message %s", ct->c_file);
789 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
800 ** Get any [Content-Description] given in buffer.
802 if (magic && *cp == '[') {
804 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
808 advise(NULL, "invalid description in message %s",
817 ct->c_descr = concat(ct->c_descr, "\n", NULL);
828 ** Get any {Content-Disposition} given in buffer.
830 if (magic && *cp == '{') {
832 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
836 advise(NULL, "invalid disposition in message %s",
845 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
856 ** Check if anything is left over
860 ci->ci_magic = mh_xstrdup(cp);
863 ** If there is a Content-Disposition header and
864 ** it doesn't have a *filename=, extract it from
865 ** the magic contents. The mhbasename call skips
866 ** any leading directory components.
869 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
871 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
879 get_comment(CT ct, unsigned char **ap, int istype)
884 char c, buffer[BUFSIZ], *dp;
896 advise(NULL, "invalid comment in message %s's %s: field",
897 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
902 if ((c = *cp++) == '\0')
925 if ((dp = ci->ci_comment)) {
926 ci->ci_comment = concat(dp, " ", buffer, NULL);
929 ci->ci_comment = mh_xstrdup(buffer);
944 ** Handles content types audio, image, and video.
945 ** There's not much to do right here.
951 return OK; /* not much to do here */
965 CI ci = &ct->c_ctinfo;
967 /* check for missing subtype */
968 if (!*ci->ci_subtype)
969 ci->ci_subtype = add("plain", ci->ci_subtype);
972 for (kv = SubText; kv->kv_key; kv++)
973 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
975 ct->c_subtype = kv->kv_value;
977 /* allocate text character set structure */
978 t = mh_xcalloc(1, sizeof(*t));
979 ct->c_ctparams = (void *) t;
981 /* scan for charset parameter */
982 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
983 if (!mh_strcasecmp(*ap, "charset"))
986 /* check if content specified a character set */
989 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
990 /* match character set or set to CHARSET_UNKNOWN */
991 for (kv = Charset; kv->kv_key; kv++) {
992 if (!mh_strcasecmp(*ep, kv->kv_key)) {
996 t->tx_charset = kv->kv_value;
998 t->tx_charset = CHARSET_UNSPECIFIED;
1010 InitMultiPart(CT ct)
1014 unsigned char *cp, *dp;
1016 char *bp, buffer[BUFSIZ];
1017 struct multipart *m;
1019 struct part *part, **next;
1020 CI ci = &ct->c_ctinfo;
1025 ** The encoding for multipart messages must be either
1026 ** 7bit, 8bit, or binary (per RFC2045).
1028 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1029 && ct->c_encoding != CE_BINARY) {
1030 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1031 ct->c_encoding = CE_7BIT;
1035 for (kv = SubMultiPart; kv->kv_key; kv++)
1036 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1038 ct->c_subtype = kv->kv_value;
1041 ** Check for "boundary" parameter, which is
1042 ** required for multipart messages.
1045 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1046 if (!mh_strcasecmp(*ap, "boundary")) {
1052 /* complain if boundary parameter is missing */
1054 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1058 /* allocate primary structure for multipart info */
1059 m = mh_xcalloc(1, sizeof(*m));
1060 ct->c_ctparams = (void *) m;
1062 /* check if boundary parameter contains only whitespace characters */
1063 for (cp = bp; isspace(*cp); cp++)
1066 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1070 /* remove trailing whitespace from boundary parameter */
1071 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1076 /* record boundary separators */
1077 m->mp_start = concat(bp, "\n", NULL);
1078 m->mp_stop = concat(bp, "--\n", NULL);
1080 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1081 advise(ct->c_file, "unable to open for reading");
1085 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1087 next = &m->mp_parts;
1091 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1095 pos += strlen(buffer);
1096 if (buffer[0] != '-' || buffer[1] != '-')
1099 if (strcmp(buffer + 2, m->mp_start)!=0)
1102 part = mh_xcalloc(1, sizeof(*part));
1104 next = &part->mp_next;
1106 if (!(p = get_content(fp, ct->c_file,
1107 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1114 fseek(fp, pos, SEEK_SET);
1117 if (strcmp(buffer + 2, m->mp_start) == 0) {
1121 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1122 if (p->c_end < p->c_begin)
1123 p->c_begin = p->c_end;
1128 if (strcmp(buffer + 2, m->mp_stop) == 0)
1134 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1135 if (!inout && part) {
1137 p->c_end = ct->c_end;
1139 if (p->c_begin >= p->c_end) {
1140 for (next = &m->mp_parts; *next != part;
1141 next = &((*next)->mp_next))
1150 /* reverse the order of the parts for multipart/alternative */
1151 if (ct->c_subtype == MULTI_ALTERNATE)
1155 ** label all subparts with part number, and
1156 ** then initialize the content of the subpart.
1161 char partnam[BUFSIZ];
1164 snprintf(partnam, sizeof(partnam), "%s.",
1166 pp = partnam + strlen(partnam);
1171 for (part = m->mp_parts, partnum = 1; part;
1172 part = part->mp_next, partnum++) {
1175 sprintf(pp, "%d", partnum);
1176 p->c_partno = mh_xstrdup(partnam);
1178 /* initialize the content of the subparts */
1179 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1194 ** reverse the order of the parts of a multipart
1198 reverse_parts(CT ct)
1201 struct multipart *m;
1202 struct part **base, **bmp, **next, *part;
1204 m = (struct multipart *) ct->c_ctparams;
1206 /* if only one part, just return */
1207 if (!m->mp_parts || !m->mp_parts->mp_next)
1210 /* count number of parts */
1212 for (part = m->mp_parts; part; part = part->mp_next)
1215 /* allocate array of pointers to the parts */
1216 base = mh_xcalloc(i + 1, sizeof(*base));
1219 /* point at all the parts */
1220 for (part = m->mp_parts; part; part = part->mp_next)
1224 /* reverse the order of the parts */
1225 next = &m->mp_parts;
1226 for (bmp--; bmp >= base; bmp--) {
1229 next = &part->mp_next;
1233 /* free array of pointers */
1246 CI ci = &ct->c_ctinfo;
1248 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1249 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1253 /* check for missing subtype */
1254 if (!*ci->ci_subtype)
1255 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1258 for (kv = SubMessage; kv->kv_key; kv++)
1259 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1261 ct->c_subtype = kv->kv_value;
1263 switch (ct->c_subtype) {
1264 case MESSAGE_RFC822:
1267 case MESSAGE_PARTIAL:
1272 p = mh_xcalloc(1, sizeof(*p));
1273 ct->c_ctparams = (void *) p;
1276 ** scan for parameters "id", "number",
1279 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1280 if (!mh_strcasecmp(*ap, "id")) {
1281 p->pm_partid = mh_xstrdup(*ep);
1284 if (!mh_strcasecmp(*ap, "number")) {
1285 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1287 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1292 if (!mh_strcasecmp(*ap, "total")) {
1293 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1300 if (!p->pm_partid || !p->pm_partno
1301 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1302 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1308 case MESSAGE_EXTERNAL:
1313 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1314 advise(ct->c_file, "unable to open for reading");
1318 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1320 if (!(p = get_content(fp, ct->c_file, 0))) {
1326 p->c_end = p->c_begin;
1331 switch (p->c_type) {
1336 if (p->c_subtype != MESSAGE_RFC822)
1341 (*p->c_ctinitfnx) (p);
1360 InitApplication(CT ct)
1363 CI ci = &ct->c_ctinfo;
1366 for (kv = SubApplication; kv->kv_key; kv++)
1367 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1369 ct->c_subtype = kv->kv_value;
1376 ** TRANSFER ENCODINGS
1380 init_encoding(CT ct, OpenCEFunc openfnx)
1384 ce = mh_xcalloc(1, sizeof(*ce));
1387 ct->c_ceopenfnx = openfnx;
1388 ct->c_ceclosefnx = close_encoding;
1389 ct->c_cesizefnx = size_encoding;
1396 close_encoding(CT ct)
1400 if (!(ce = ct->c_cefile))
1410 static unsigned long
1411 size_encoding(CT ct)
1419 if (!(ce = ct->c_cefile))
1420 return (ct->c_end - ct->c_begin);
1422 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1423 return (long) st.st_size;
1426 if (stat(ce->ce_file, &st) != NOTOK)
1427 return (long) st.st_size;
1432 if (ct->c_encoding == CE_EXTERNAL)
1433 return (ct->c_end - ct->c_begin);
1436 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1437 return (ct->c_end - ct->c_begin);
1439 if (fstat(fd, &st) != NOTOK)
1440 size = (long) st.st_size;
1444 (*ct->c_ceclosefnx) (ct);
1453 static unsigned char b642nib[0x80] = {
1454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1459 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1460 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1461 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1462 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1463 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1464 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1465 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1467 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1468 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1469 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1476 return init_encoding(ct, openBase64);
1481 openBase64(CT ct, char **file)
1484 int fd, len, skip, own_ct_fp = 0;
1486 unsigned char value, *b, *b1, *b2, *b3;
1487 unsigned char *cp, *ep;
1488 char buffer[BUFSIZ];
1489 /* sbeck -- handle suffixes */
1493 b = (unsigned char *) &bits;
1494 b1 = &b[endian > 0 ? 1 : 2];
1495 b2 = &b[endian > 0 ? 2 : 1];
1496 b3 = &b[endian > 0 ? 3 : 0];
1500 fseek(ce->ce_fp, 0L, SEEK_SET);
1505 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1506 content_error(ce->ce_file, ct,
1507 "unable to fopen for reading");
1513 if (*file == NULL) {
1514 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1517 ce->ce_file = mh_xstrdup(*file);
1521 /* sbeck@cise.ufl.edu -- handle suffixes */
1523 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1524 invo_name, ci->ci_type, ci->ci_subtype);
1525 cp = context_find(buffer);
1526 if (cp == NULL || *cp == '\0') {
1527 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1529 cp = context_find(buffer);
1531 if (cp != NULL && *cp != '\0') {
1532 if (ce->ce_unlink) {
1534 ** Temporary file already exists, so we rename to
1535 ** version with extension.
1537 char *file_org = mh_xstrdup(ce->ce_file);
1538 ce->ce_file = add(cp, ce->ce_file);
1539 if (rename(file_org, ce->ce_file)) {
1540 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1543 mh_free0(&file_org);
1546 ce->ce_file = add(cp, ce->ce_file);
1550 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1551 content_error(ce->ce_file, ct,
1552 "unable to fopen for reading/writing");
1556 if ((len = ct->c_end - ct->c_begin) < 0)
1557 adios(EX_SOFTWARE, NULL, "internal error(1)");
1560 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1561 content_error(ct->c_file, ct,
1562 "unable to open for reading");
1572 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1574 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1576 content_error(ct->c_file, ct, "error reading from");
1580 content_error(NULL, ct, "premature eof");
1588 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1593 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1595 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1597 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1601 bits |= value << bitno;
1603 if ((bitno -= 6) < 0) {
1604 putc((char) *b1, ce->ce_fp);
1606 putc((char) *b2, ce->ce_fp);
1608 putc((char) *b3, ce->ce_fp);
1612 if (ferror(ce->ce_fp)) {
1613 content_error(ce->ce_file, ct,
1614 "error writing to");
1617 bitno = 18, bits = 0L, skip = 0;
1623 goto self_delimiting;
1632 fprintf(stderr, "premature ending (bitno %d)\n",
1635 content_error(NULL, ct, "invalid BASE64 encoding");
1640 fseek(ct->c_fp, 0L, SEEK_SET);
1642 if (fflush(ce->ce_fp)) {
1643 content_error(ce->ce_file, ct, "error writing to");
1647 fseek(ce->ce_fp, 0L, SEEK_SET);
1650 *file = ce->ce_file;
1655 return fileno(ce->ce_fp);
1658 free_encoding(ct, 0);
1671 static char hex2nib[0x80] = {
1672 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1679 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1694 return init_encoding(ct, openQuoted);
1699 openQuoted(CT ct, char **file)
1701 int cc, len, quoted, own_ct_fp = 0;
1702 unsigned char *cp, *ep;
1703 char buffer[BUFSIZ];
1704 unsigned char mask = 0;
1706 /* sbeck -- handle suffixes */
1711 fseek(ce->ce_fp, 0L, SEEK_SET);
1716 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1717 content_error(ce->ce_file, ct,
1718 "unable to fopen for reading");
1724 if (*file == NULL) {
1725 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1728 ce->ce_file = mh_xstrdup(*file);
1732 /* sbeck@cise.ufl.edu -- handle suffixes */
1734 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1735 invo_name, ci->ci_type, ci->ci_subtype);
1736 cp = context_find(buffer);
1737 if (cp == NULL || *cp == '\0') {
1738 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1740 cp = context_find(buffer);
1742 if (cp != NULL && *cp != '\0') {
1743 if (ce->ce_unlink) {
1745 ** Temporary file already exists, so we rename to
1746 ** version with extension.
1748 char *file_org = mh_xstrdup(ce->ce_file);
1749 ce->ce_file = add(cp, ce->ce_file);
1750 if (rename(file_org, ce->ce_file)) {
1751 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1754 mh_free0(&file_org);
1757 ce->ce_file = add(cp, ce->ce_file);
1761 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1762 content_error(ce->ce_file, ct,
1763 "unable to fopen for reading/writing");
1767 if ((len = ct->c_end - ct->c_begin) < 0)
1768 adios(EX_SOFTWARE, NULL, "internal error(2)");
1771 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1772 content_error(ct->c_file, ct,
1773 "unable to open for reading");
1781 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1783 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1784 content_error(NULL, ct, "premature eof");
1788 if ((cc = strlen(buffer)) > len)
1792 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1797 for (; cp < ep; cp++) {
1799 /* in an escape sequence */
1801 /* at byte 1 of an escape sequence */
1802 mask = hex2nib[*cp & 0x7f];
1803 /* next is byte 2 */
1806 /* at byte 2 of an escape sequence */
1808 mask |= hex2nib[*cp & 0x7f];
1809 putc(mask, ce->ce_fp);
1810 if (ferror(ce->ce_fp)) {
1811 content_error(ce->ce_file, ct, "error writing to");
1815 ** finished escape sequence; next may
1816 ** be literal or a new escape sequence
1820 /* on to next byte */
1824 /* not in an escape sequence */
1827 ** starting an escape sequence,
1830 if (cp + 1 < ep && cp[1] == '\n') {
1831 /* "=\n" soft line break, eat the \n */
1835 if (cp + 1 >= ep || cp + 2 >= ep) {
1837 ** We don't have 2 bytes left,
1838 ** so this is an invalid escape
1839 ** sequence; just show the raw bytes
1842 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1844 ** Next 2 bytes are hex digits,
1845 ** making this a valid escape
1846 ** sequence; let's decode it (above).
1852 ** One or both of the next 2 is
1853 ** out of range, making this an
1854 ** invalid escape sequence; just
1855 ** show the raw bytes (below).
1860 /* Just show the raw byte. */
1861 putc(*cp, ce->ce_fp);
1862 if (ferror(ce->ce_fp)) {
1863 content_error(ce->ce_file, ct,
1864 "error writing to");
1870 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1874 fseek(ct->c_fp, 0L, SEEK_SET);
1876 if (fflush(ce->ce_fp)) {
1877 content_error(ce->ce_file, ct, "error writing to");
1881 fseek(ce->ce_fp, 0L, SEEK_SET);
1884 *file = ce->ce_file;
1889 return fileno(ce->ce_fp);
1892 free_encoding(ct, 0);
1908 if (init_encoding(ct, open7Bit) == NOTOK)
1911 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1917 open7Bit(CT ct, char **file)
1919 int cc, fd, len, own_ct_fp = 0;
1920 char buffer[BUFSIZ];
1921 /* sbeck -- handle suffixes */
1928 fseek(ce->ce_fp, 0L, SEEK_SET);
1933 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1934 content_error(ce->ce_file, ct,
1935 "unable to fopen for reading");
1941 if (*file == NULL) {
1942 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1945 ce->ce_file = mh_xstrdup(*file);
1949 /* sbeck@cise.ufl.edu -- handle suffixes */
1951 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1952 invo_name, ci->ci_type, ci->ci_subtype);
1953 cp = context_find(buffer);
1954 if (cp == NULL || *cp == '\0') {
1955 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1957 cp = context_find(buffer);
1959 if (cp != NULL && *cp != '\0') {
1960 if (ce->ce_unlink) {
1962 ** Temporary file already exists, so we rename to
1963 ** version with extension.
1965 char *file_org = mh_xstrdup(ce->ce_file);
1966 ce->ce_file = add(cp, ce->ce_file);
1967 if (rename(file_org, ce->ce_file)) {
1968 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1971 mh_free0(&file_org);
1974 ce->ce_file = add(cp, ce->ce_file);
1978 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1979 content_error(ce->ce_file, ct,
1980 "unable to fopen for reading/writing");
1984 if (ct->c_type == CT_MULTIPART) {
1986 CI ci = &ct->c_ctinfo;
1989 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1991 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1992 strlen(ci->ci_subtype);
1993 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1994 putc(';', ce->ce_fp);
1997 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2000 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2001 fputs("\n\t", ce->ce_fp);
2004 putc(' ', ce->ce_fp);
2007 fprintf(ce->ce_fp, "%s", buffer);
2011 if (ci->ci_comment) {
2012 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2014 fputs("\n\t", ce->ce_fp);
2017 putc(' ', ce->ce_fp);
2020 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2023 fprintf(ce->ce_fp, "\n");
2025 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2027 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2029 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2030 fprintf(ce->ce_fp, "\n");
2033 if ((len = ct->c_end - ct->c_begin) < 0)
2034 adios(EX_SOFTWARE, NULL, "internal error(3)");
2037 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2038 content_error(ct->c_file, ct,
2039 "unable to open for reading");
2045 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2047 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2049 content_error(ct->c_file, ct, "error reading from");
2053 content_error(NULL, ct, "premature eof");
2061 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2062 if (ferror(ce->ce_fp)) {
2063 content_error(ce->ce_file, ct,
2064 "error writing to");
2069 fseek(ct->c_fp, 0L, SEEK_SET);
2071 if (fflush(ce->ce_fp)) {
2072 content_error(ce->ce_file, ct, "error writing to");
2076 fseek(ce->ce_fp, 0L, SEEK_SET);
2079 *file = ce->ce_file;
2084 return fileno(ce->ce_fp);
2087 free_encoding(ct, 0);