2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 ct = (CT) mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = getcpy(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD2;;) {
252 switch (state = m_getfld2(state, &f, in)) {
256 /* add the header data to the list */
257 add_header(ct, getcpy(f.name), getcpy(f.value));
259 ct->c_begin = ftell(in) + 1;
263 ct->c_begin = ftell(in) - strlen(f.value);
267 ct->c_begin = ftell(in);
273 adios(EX_DATAERR, NULL, "message format error in component #%d",
277 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
283 ** Read the content headers. We will parse the
284 ** MIME related header fields into their various
285 ** structures and set internal flags related to
286 ** content type/subtype, etc.
289 hp = ct->c_first_hf; /* start at first header field */
291 /* Get MIME-Version field */
292 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
295 unsigned char *cp, *dp;
298 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
301 ct->c_vrsn = getcpy(hp->value);
303 /* Now, cleanup this field */
308 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
310 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
315 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
317 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
320 for (dp = cp; istoken(*dp); dp++)
324 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
327 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
330 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
331 /* Get Content-Type field */
332 struct str2init *s2i;
333 CI ci = &ct->c_ctinfo;
335 /* Check if we've already seen a Content-Type header */
337 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
341 /* Parse the Content-Type field */
342 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
346 ** Set the Init function and the internal
347 ** flag for this content type.
349 for (s2i = str2cts; s2i->si_key; s2i++)
350 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
352 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
354 ct->c_type = s2i->si_val;
355 ct->c_ctinitfnx = s2i->si_init;
357 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
358 /* Get Content-Transfer-Encoding field */
360 unsigned char *cp, *dp;
361 struct str2init *s2i;
364 ** Check if we've already seen the
365 ** Content-Transfer-Encoding field
368 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
372 /* get copy of this field */
373 ct->c_celine = cp = getcpy(hp->value);
377 for (dp = cp; istoken(*dp); dp++)
383 ** Find the internal flag and Init function
384 ** for this transfer encoding.
386 for (s2i = str2ces; s2i->si_key; s2i++)
387 if (!mh_strcasecmp(cp, s2i->si_key))
389 if (!s2i->si_key && !uprf(cp, "X-"))
392 ct->c_encoding = s2i->si_val;
394 /* Call the Init function for this encoding */
395 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
398 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
399 /* Get Content-ID field */
400 ct->c_id = add(hp->value, ct->c_id);
402 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
403 /* Get Content-Description field */
404 ct->c_descr = add(hp->value, ct->c_descr);
406 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
407 /* Get Content-Disposition field */
408 ct->c_dispo = add(hp->value, ct->c_dispo);
412 hp = hp->next; /* next header field */
416 ** Check if we saw a Content-Type field.
417 ** If not, then assign a default value for
418 ** it, and the Init function.
422 ** If we are inside a multipart/digest message,
423 ** so default type is message/rfc822
426 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
428 ct->c_type = CT_MESSAGE;
429 ct->c_ctinitfnx = InitMessage;
432 ** Else default type is text/plain
434 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
436 ct->c_type = CT_TEXT;
437 ct->c_ctinitfnx = InitText;
441 /* Use default Transfer-Encoding, if necessary */
443 ct->c_encoding = CE_7BIT;
456 ** small routine to add header field to list
460 add_header(CT ct, char *name, char *value)
464 /* allocate header field structure */
465 hp = mh_xmalloc(sizeof(*hp));
467 /* link data into header structure */
472 /* link header structure into the list */
473 if (ct->c_first_hf == NULL) {
474 ct->c_first_hf = hp; /* this is the first */
477 ct->c_last_hf->next = hp; /* add it to the end */
486 ** Make sure that buf contains at least one appearance of name,
487 ** followed by =. If not, insert both name and value, just after
488 ** first semicolon, if any. Note that name should not contain a
489 ** trailing =. And quotes will be added around the value. Typical
490 ** usage: make sure that a Content-Disposition header contains
491 ** filename="foo". If it doesn't and value does, use value from
495 incl_name_value(unsigned char *buf, char *name, char *value) {
498 /* Assume that name is non-null. */
500 char *name_plus_equal = concat(name, "=", NULL);
502 if (!strstr(buf, name_plus_equal)) {
505 char *prefix, *suffix;
507 /* Trim trailing space, esp. newline. */
508 for (cp = &buf[strlen(buf) - 1];
509 cp >= buf && isspace(*cp); --cp) {
513 insertion = concat("; ", name, "=", "\"", value, "\"",
517 ** Insert at first semicolon, if any.
518 ** If none, append to end.
520 prefix = getcpy(buf);
521 if ((cp = strchr(prefix, ';'))) {
522 suffix = concat(cp, NULL);
524 newbuf = concat(prefix, insertion, suffix,
529 newbuf = concat(buf, insertion, "\n", NULL);
537 free(name_plus_equal);
544 ** Extract just name_suffix="foo", if any, from value. If there isn't
545 ** one, return the entire value. Note that, for example, a name_suffix
546 ** of name will match filename="foo", and return foo.
549 extract_name_value(char *name_suffix, char *value) {
550 char *extracted_name_value = value;
551 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
552 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
555 free(name_suffix_plus_quote);
556 if (name_suffix_equals) {
557 char *name_suffix_begin;
560 for (cp = name_suffix_equals; *cp != '"'; ++cp)
562 name_suffix_begin = ++cp;
563 /* Find second \". */
564 for (; *cp != '"'; ++cp)
567 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
568 memcpy(extracted_name_value, name_suffix_begin,
569 cp - name_suffix_begin);
570 extracted_name_value[cp - name_suffix_begin] = '\0';
573 return extracted_name_value;
577 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
578 ** directives. Fills in the information of the CTinfo structure.
581 get_ctinfo(unsigned char *cp, CT ct, int magic)
590 i = strlen(invo_name) + 2;
592 /* store copy of Content-Type line */
593 cp = ct->c_ctline = getcpy(cp);
595 while (isspace(*cp)) /* trim leading spaces */
598 /* change newlines to spaces */
599 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
602 /* trim trailing spaces */
603 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
609 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
611 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
614 for (dp = cp; istoken(*dp); dp++)
617 ci->ci_type = getcpy(cp); /* store content type */
621 advise(NULL, "invalid %s: field in message %s (empty type)",
622 TYPE_FIELD, ct->c_file);
626 /* down case the content type string */
627 for (dp = ci->ci_type; *dp; dp++)
628 if (isalpha(*dp) && isupper(*dp))
634 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
639 ci->ci_subtype = getcpy("");
647 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
650 for (dp = cp; istoken(*dp); dp++)
653 ci->ci_subtype = getcpy(cp); /* store the content subtype */
656 if (!*ci->ci_subtype) {
657 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
661 /* down case the content subtype string */
662 for (dp = ci->ci_subtype; *dp; dp++)
663 if (isalpha(*dp) && isupper(*dp))
670 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
674 ** Parse attribute/value pairs given with Content-Type
676 ep = (ap = ci->ci_attrs) + NPARMS;
682 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
690 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
694 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
698 /* down case the attribute name */
699 for (dp = cp; istoken(*dp); dp++)
700 if (isalpha(*dp) && isupper(*dp))
703 for (up = dp; isspace(*dp);)
705 if (dp == cp || *dp != '=') {
706 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
710 vp = (*ap = getcpy(cp)) + (up - cp);
712 for (dp++; isspace(*dp);)
715 /* now add the attribute value */
716 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
719 for (cp = ++dp, dp = vp;;) {
723 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
728 if ((c = *cp++) == '\0')
743 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
748 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
749 *ci->ci_values[ap - ci->ci_attrs] = '\0';
750 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
758 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
763 ** Get any <Content-Id> given in buffer
765 if (magic && *cp == '<') {
770 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
771 advise(NULL, "invalid ID in message %s", ct->c_file);
777 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
788 ** Get any [Content-Description] given in buffer.
790 if (magic && *cp == '[') {
792 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
796 advise(NULL, "invalid description in message %s",
805 ct->c_descr = concat(ct->c_descr, "\n", NULL);
816 ** Get any {Content-Disposition} given in buffer.
818 if (magic && *cp == '{') {
820 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
824 advise(NULL, "invalid disposition in message %s",
833 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
844 ** Check if anything is left over
848 ci->ci_magic = getcpy(cp);
851 ** If there is a Content-Disposition header and
852 ** it doesn't have a *filename=, extract it from
853 ** the magic contents. The mhbasename call skips
854 ** any leading directory components.
857 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
859 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
867 get_comment(CT ct, unsigned char **ap, int istype)
872 char c, buffer[BUFSIZ], *dp;
884 advise(NULL, "invalid comment in message %s's %s: field",
885 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
890 if ((c = *cp++) == '\0')
913 if ((dp = ci->ci_comment)) {
914 ci->ci_comment = concat(dp, " ", buffer, NULL);
917 ci->ci_comment = getcpy(buffer);
932 ** Handles content types audio, image, and video.
933 ** There's not much to do right here.
939 return OK; /* not much to do here */
953 CI ci = &ct->c_ctinfo;
955 /* check for missing subtype */
956 if (!*ci->ci_subtype)
957 ci->ci_subtype = add("plain", ci->ci_subtype);
960 for (kv = SubText; kv->kv_key; kv++)
961 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
963 ct->c_subtype = kv->kv_value;
965 /* allocate text character set structure */
966 t = (struct text *) mh_xcalloc(1, sizeof(*t));
967 ct->c_ctparams = (void *) t;
969 /* scan for charset parameter */
970 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
971 if (!mh_strcasecmp(*ap, "charset"))
974 /* check if content specified a character set */
977 ct->c_charset = getcpy(norm_charmap(*ep));
978 /* match character set or set to CHARSET_UNKNOWN */
979 for (kv = Charset; kv->kv_key; kv++) {
980 if (!mh_strcasecmp(*ep, kv->kv_key)) {
984 t->tx_charset = kv->kv_value;
986 t->tx_charset = CHARSET_UNSPECIFIED;
1002 unsigned char *cp, *dp;
1004 char *bp, buffer[BUFSIZ];
1005 struct multipart *m;
1007 struct part *part, **next;
1008 CI ci = &ct->c_ctinfo;
1013 ** The encoding for multipart messages must be either
1014 ** 7bit, 8bit, or binary (per RFC2045).
1016 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1017 && ct->c_encoding != CE_BINARY) {
1018 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1019 ct->c_encoding = CE_7BIT;
1023 for (kv = SubMultiPart; kv->kv_key; kv++)
1024 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1026 ct->c_subtype = kv->kv_value;
1029 ** Check for "boundary" parameter, which is
1030 ** required for multipart messages.
1033 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1034 if (!mh_strcasecmp(*ap, "boundary")) {
1040 /* complain if boundary parameter is missing */
1042 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1046 /* allocate primary structure for multipart info */
1047 m = (struct multipart *) mh_xcalloc(1, sizeof(*m));
1048 ct->c_ctparams = (void *) m;
1050 /* check if boundary parameter contains only whitespace characters */
1051 for (cp = bp; isspace(*cp); cp++)
1054 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1058 /* remove trailing whitespace from boundary parameter */
1059 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1064 /* record boundary separators */
1065 m->mp_start = concat(bp, "\n", NULL);
1066 m->mp_stop = concat(bp, "--\n", NULL);
1068 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1069 advise(ct->c_file, "unable to open for reading");
1073 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1075 next = &m->mp_parts;
1079 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1083 pos += strlen(buffer);
1084 if (buffer[0] != '-' || buffer[1] != '-')
1087 if (strcmp(buffer + 2, m->mp_start)!=0)
1090 part = (struct part *) mh_xcalloc(1, sizeof(*part));
1092 next = &part->mp_next;
1094 if (!(p = get_content(fp, ct->c_file,
1095 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1102 fseek(fp, pos, SEEK_SET);
1105 if (strcmp(buffer + 2, m->mp_start) == 0) {
1109 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1110 if (p->c_end < p->c_begin)
1111 p->c_begin = p->c_end;
1116 if (strcmp(buffer + 2, m->mp_stop) == 0)
1122 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1123 if (!inout && part) {
1125 p->c_end = ct->c_end;
1127 if (p->c_begin >= p->c_end) {
1128 for (next = &m->mp_parts; *next != part;
1129 next = &((*next)->mp_next))
1133 free((char *) part);
1138 /* reverse the order of the parts for multipart/alternative */
1139 if (ct->c_subtype == MULTI_ALTERNATE)
1143 ** label all subparts with part number, and
1144 ** then initialize the content of the subpart.
1149 char partnam[BUFSIZ];
1152 snprintf(partnam, sizeof(partnam), "%s.",
1154 pp = partnam + strlen(partnam);
1159 for (part = m->mp_parts, partnum = 1; part;
1160 part = part->mp_next, partnum++) {
1163 sprintf(pp, "%d", partnum);
1164 p->c_partno = getcpy(partnam);
1166 /* initialize the content of the subparts */
1167 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1182 ** reverse the order of the parts of a multipart
1186 reverse_parts(CT ct)
1189 struct multipart *m;
1190 struct part **base, **bmp, **next, *part;
1192 m = (struct multipart *) ct->c_ctparams;
1194 /* if only one part, just return */
1195 if (!m->mp_parts || !m->mp_parts->mp_next)
1198 /* count number of parts */
1200 for (part = m->mp_parts; part; part = part->mp_next)
1203 /* allocate array of pointers to the parts */
1204 base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base));
1207 /* point at all the parts */
1208 for (part = m->mp_parts; part; part = part->mp_next)
1212 /* reverse the order of the parts */
1213 next = &m->mp_parts;
1214 for (bmp--; bmp >= base; bmp--) {
1217 next = &part->mp_next;
1221 /* free array of pointers */
1222 free((char *) base);
1234 CI ci = &ct->c_ctinfo;
1236 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1237 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1241 /* check for missing subtype */
1242 if (!*ci->ci_subtype)
1243 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1246 for (kv = SubMessage; kv->kv_key; kv++)
1247 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1249 ct->c_subtype = kv->kv_value;
1251 switch (ct->c_subtype) {
1252 case MESSAGE_RFC822:
1255 case MESSAGE_PARTIAL:
1260 p = (struct partial *) mh_xcalloc(1, sizeof(*p));
1261 ct->c_ctparams = (void *) p;
1264 ** scan for parameters "id", "number",
1267 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1268 if (!mh_strcasecmp(*ap, "id")) {
1269 p->pm_partid = getcpy(*ep);
1272 if (!mh_strcasecmp(*ap, "number")) {
1273 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1275 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1280 if (!mh_strcasecmp(*ap, "total")) {
1281 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1288 if (!p->pm_partid || !p->pm_partno
1289 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1290 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1296 case MESSAGE_EXTERNAL:
1301 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1302 advise(ct->c_file, "unable to open for reading");
1306 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1308 if (!(p = get_content(fp, ct->c_file, 0))) {
1314 p->c_end = p->c_begin;
1319 switch (p->c_type) {
1324 if (p->c_subtype != MESSAGE_RFC822)
1329 (*p->c_ctinitfnx) (p);
1348 InitApplication(CT ct)
1351 CI ci = &ct->c_ctinfo;
1354 for (kv = SubApplication; kv->kv_key; kv++)
1355 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1357 ct->c_subtype = kv->kv_value;
1364 ** TRANSFER ENCODINGS
1368 init_encoding(CT ct, OpenCEFunc openfnx)
1372 ce = (CE) mh_xcalloc(1, sizeof(*ce));
1375 ct->c_ceopenfnx = openfnx;
1376 ct->c_ceclosefnx = close_encoding;
1377 ct->c_cesizefnx = size_encoding;
1384 close_encoding(CT ct)
1388 if (!(ce = ct->c_cefile))
1398 static unsigned long
1399 size_encoding(CT ct)
1407 if (!(ce = ct->c_cefile))
1408 return (ct->c_end - ct->c_begin);
1410 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1411 return (long) st.st_size;
1414 if (stat(ce->ce_file, &st) != NOTOK)
1415 return (long) st.st_size;
1420 if (ct->c_encoding == CE_EXTERNAL)
1421 return (ct->c_end - ct->c_begin);
1424 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1425 return (ct->c_end - ct->c_begin);
1427 if (fstat(fd, &st) != NOTOK)
1428 size = (long) st.st_size;
1432 (*ct->c_ceclosefnx) (ct);
1441 static unsigned char b642nib[0x80] = {
1442 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1444 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1445 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1446 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1447 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1448 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1449 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1450 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1451 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1452 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1453 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1454 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1455 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1456 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1457 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1464 return init_encoding(ct, openBase64);
1469 openBase64(CT ct, char **file)
1472 int fd, len, skip, own_ct_fp = 0;
1474 unsigned char value, *b, *b1, *b2, *b3;
1475 unsigned char *cp, *ep;
1476 char buffer[BUFSIZ];
1477 /* sbeck -- handle suffixes */
1481 b = (unsigned char *) &bits;
1482 b1 = &b[endian > 0 ? 1 : 2];
1483 b2 = &b[endian > 0 ? 2 : 1];
1484 b3 = &b[endian > 0 ? 3 : 0];
1488 fseek(ce->ce_fp, 0L, SEEK_SET);
1493 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1494 content_error(ce->ce_file, ct,
1495 "unable to fopen for reading");
1501 if (*file == NULL) {
1502 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1505 ce->ce_file = getcpy(*file);
1509 /* sbeck@cise.ufl.edu -- handle suffixes */
1511 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1512 invo_name, ci->ci_type, ci->ci_subtype);
1513 cp = context_find(buffer);
1514 if (cp == NULL || *cp == '\0') {
1515 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1517 cp = context_find(buffer);
1519 if (cp != NULL && *cp != '\0') {
1520 if (ce->ce_unlink) {
1522 ** Temporary file already exists, so we rename to
1523 ** version with extension.
1525 char *file_org = strdup(ce->ce_file);
1526 ce->ce_file = add(cp, ce->ce_file);
1527 if (rename(file_org, ce->ce_file)) {
1528 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1534 ce->ce_file = add(cp, ce->ce_file);
1538 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1539 content_error(ce->ce_file, ct,
1540 "unable to fopen for reading/writing");
1544 if ((len = ct->c_end - ct->c_begin) < 0)
1545 adios(EX_SOFTWARE, NULL, "internal error(1)");
1548 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1549 content_error(ct->c_file, ct,
1550 "unable to open for reading");
1560 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1562 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1564 content_error(ct->c_file, ct, "error reading from");
1568 content_error(NULL, ct, "premature eof");
1576 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1581 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1583 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1585 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1589 bits |= value << bitno;
1591 if ((bitno -= 6) < 0) {
1592 putc((char) *b1, ce->ce_fp);
1594 putc((char) *b2, ce->ce_fp);
1596 putc((char) *b3, ce->ce_fp);
1600 if (ferror(ce->ce_fp)) {
1601 content_error(ce->ce_file, ct,
1602 "error writing to");
1605 bitno = 18, bits = 0L, skip = 0;
1611 goto self_delimiting;
1620 fprintf(stderr, "premature ending (bitno %d)\n",
1623 content_error(NULL, ct, "invalid BASE64 encoding");
1628 fseek(ct->c_fp, 0L, SEEK_SET);
1630 if (fflush(ce->ce_fp)) {
1631 content_error(ce->ce_file, ct, "error writing to");
1635 fseek(ce->ce_fp, 0L, SEEK_SET);
1638 *file = ce->ce_file;
1643 return fileno(ce->ce_fp);
1646 free_encoding(ct, 0);
1659 static char hex2nib[0x80] = {
1660 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1661 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1662 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1663 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1664 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1665 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1666 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1667 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1668 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1682 return init_encoding(ct, openQuoted);
1687 openQuoted(CT ct, char **file)
1689 int cc, len, quoted, own_ct_fp = 0;
1690 unsigned char *cp, *ep;
1691 char buffer[BUFSIZ];
1692 unsigned char mask = 0;
1694 /* sbeck -- handle suffixes */
1699 fseek(ce->ce_fp, 0L, SEEK_SET);
1704 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1705 content_error(ce->ce_file, ct,
1706 "unable to fopen for reading");
1712 if (*file == NULL) {
1713 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1716 ce->ce_file = getcpy(*file);
1720 /* sbeck@cise.ufl.edu -- handle suffixes */
1722 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1723 invo_name, ci->ci_type, ci->ci_subtype);
1724 cp = context_find(buffer);
1725 if (cp == NULL || *cp == '\0') {
1726 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1728 cp = context_find(buffer);
1730 if (cp != NULL && *cp != '\0') {
1731 if (ce->ce_unlink) {
1733 ** Temporary file already exists, so we rename to
1734 ** version with extension.
1736 char *file_org = strdup(ce->ce_file);
1737 ce->ce_file = add(cp, ce->ce_file);
1738 if (rename(file_org, ce->ce_file)) {
1739 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1745 ce->ce_file = add(cp, ce->ce_file);
1749 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1750 content_error(ce->ce_file, ct,
1751 "unable to fopen for reading/writing");
1755 if ((len = ct->c_end - ct->c_begin) < 0)
1756 adios(EX_SOFTWARE, NULL, "internal error(2)");
1759 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1760 content_error(ct->c_file, ct,
1761 "unable to open for reading");
1769 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1771 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1772 content_error(NULL, ct, "premature eof");
1776 if ((cc = strlen(buffer)) > len)
1780 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1785 for (; cp < ep; cp++) {
1787 /* in an escape sequence */
1789 /* at byte 1 of an escape sequence */
1790 mask = hex2nib[*cp & 0x7f];
1791 /* next is byte 2 */
1794 /* at byte 2 of an escape sequence */
1796 mask |= hex2nib[*cp & 0x7f];
1797 putc(mask, ce->ce_fp);
1798 if (ferror(ce->ce_fp)) {
1799 content_error(ce->ce_file, ct, "error writing to");
1803 ** finished escape sequence; next may
1804 ** be literal or a new escape sequence
1808 /* on to next byte */
1812 /* not in an escape sequence */
1815 ** starting an escape sequence,
1818 if (cp + 1 < ep && cp[1] == '\n') {
1819 /* "=\n" soft line break, eat the \n */
1823 if (cp + 1 >= ep || cp + 2 >= ep) {
1825 ** We don't have 2 bytes left,
1826 ** so this is an invalid escape
1827 ** sequence; just show the raw bytes
1830 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1832 ** Next 2 bytes are hex digits,
1833 ** making this a valid escape
1834 ** sequence; let's decode it (above).
1840 ** One or both of the next 2 is
1841 ** out of range, making this an
1842 ** invalid escape sequence; just
1843 ** show the raw bytes (below).
1848 /* Just show the raw byte. */
1849 putc(*cp, ce->ce_fp);
1850 if (ferror(ce->ce_fp)) {
1851 content_error(ce->ce_file, ct,
1852 "error writing to");
1858 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1862 fseek(ct->c_fp, 0L, SEEK_SET);
1864 if (fflush(ce->ce_fp)) {
1865 content_error(ce->ce_file, ct, "error writing to");
1869 fseek(ce->ce_fp, 0L, SEEK_SET);
1872 *file = ce->ce_file;
1877 return fileno(ce->ce_fp);
1880 free_encoding(ct, 0);
1896 if (init_encoding(ct, open7Bit) == NOTOK)
1899 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1905 open7Bit(CT ct, char **file)
1907 int cc, fd, len, own_ct_fp = 0;
1908 char buffer[BUFSIZ];
1909 /* sbeck -- handle suffixes */
1916 fseek(ce->ce_fp, 0L, SEEK_SET);
1921 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1922 content_error(ce->ce_file, ct,
1923 "unable to fopen for reading");
1929 if (*file == NULL) {
1930 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1933 ce->ce_file = getcpy(*file);
1937 /* sbeck@cise.ufl.edu -- handle suffixes */
1939 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1940 invo_name, ci->ci_type, ci->ci_subtype);
1941 cp = context_find(buffer);
1942 if (cp == NULL || *cp == '\0') {
1943 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1945 cp = context_find(buffer);
1947 if (cp != NULL && *cp != '\0') {
1948 if (ce->ce_unlink) {
1950 ** Temporary file already exists, so we rename to
1951 ** version with extension.
1953 char *file_org = strdup(ce->ce_file);
1954 ce->ce_file = add(cp, ce->ce_file);
1955 if (rename(file_org, ce->ce_file)) {
1956 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1962 ce->ce_file = add(cp, ce->ce_file);
1966 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1967 content_error(ce->ce_file, ct,
1968 "unable to fopen for reading/writing");
1972 if (ct->c_type == CT_MULTIPART) {
1974 CI ci = &ct->c_ctinfo;
1977 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1979 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1980 strlen(ci->ci_subtype);
1981 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1982 putc(';', ce->ce_fp);
1985 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1988 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1989 fputs("\n\t", ce->ce_fp);
1992 putc(' ', ce->ce_fp);
1995 fprintf(ce->ce_fp, "%s", buffer);
1999 if (ci->ci_comment) {
2000 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2002 fputs("\n\t", ce->ce_fp);
2005 putc(' ', ce->ce_fp);
2008 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2011 fprintf(ce->ce_fp, "\n");
2013 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2015 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2017 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2018 fprintf(ce->ce_fp, "\n");
2021 if ((len = ct->c_end - ct->c_begin) < 0)
2022 adios(EX_SOFTWARE, NULL, "internal error(3)");
2025 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2026 content_error(ct->c_file, ct,
2027 "unable to open for reading");
2033 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2035 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2037 content_error(ct->c_file, ct, "error reading from");
2041 content_error(NULL, ct, "premature eof");
2049 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2050 if (ferror(ce->ce_fp)) {
2051 content_error(ce->ce_file, ct,
2052 "error writing to");
2057 fseek(ct->c_fp, 0L, SEEK_SET);
2059 if (fflush(ce->ce_fp)) {
2060 content_error(ce->ce_file, ct, "error writing to");
2064 fseek(ce->ce_fp, 0L, SEEK_SET);
2067 *file = ce->ce_file;
2072 return fileno(ce->ce_fp);
2075 free_encoding(ct, 0);