2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = mh_xstrdup(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
241 /* allocate the content structure */
242 ct = mh_xcalloc(1, sizeof(*ct));
245 ct->c_file = mh_xstrdup(file);
246 ct->c_begin = ftell(ct->c_fp) + 1;
249 ** Parse the header fields for this
250 ** content into a linked list.
252 for (compnum = 1, state = FLD2;;) {
253 switch (state = m_getfld2(state, &f, in)) {
259 ct->crlf = f.value[f.valuelen-2] == '\r';
264 buf = mh_xcalloc(sizeof(char *), f.valuelen);
265 if (!decode_rfc2047(f.value, buf, f.valuelen)) {
267 buf = mh_xstrdup(f.value);
270 /* add the header data to the list */
271 add_header(ct, mh_xstrdup(f.name), buf);
275 ct->c_begin = ftell(in) + 1;
279 ct->c_begin = ftell(in) - strlen(f.value);
283 ct->c_begin = ftell(in);
287 advise(NULL, "message format error in component #%d", compnum);
292 adios(EX_IOERR, "m_getfld2", "io error");
295 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
301 ** Read the content headers. We will parse the
302 ** MIME related header fields into their various
303 ** structures and set internal flags related to
304 ** content type/subtype, etc.
307 hp = ct->c_first_hf; /* start at first header field */
309 /* Get MIME-Version field */
310 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
313 unsigned char *cp, *dp;
316 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
319 ct->c_vrsn = mh_xstrdup(hp->value);
321 /* Now, cleanup this field */
326 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
328 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
333 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
335 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
338 for (dp = cp; istoken(*dp); dp++)
342 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
345 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
348 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349 /* Get Content-Type field */
350 struct str2init *s2i;
351 CI ci = &ct->c_ctinfo;
353 /* Check if we've already seen a Content-Type header */
355 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
359 /* Parse the Content-Type field */
360 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
364 ** Set the Init function and the internal
365 ** flag for this content type.
367 for (s2i = str2cts; s2i->si_key; s2i++)
368 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
370 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
372 ct->c_type = s2i->si_val;
373 ct->c_ctinitfnx = s2i->si_init;
375 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376 /* Get Content-Transfer-Encoding field */
378 unsigned char *cp, *dp;
379 struct str2init *s2i;
382 ** Check if we've already seen the
383 ** Content-Transfer-Encoding field
386 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
390 /* get copy of this field */
391 ct->c_celine = cp = mh_xstrdup(hp->value);
395 for (dp = cp; istoken(*dp); dp++)
401 ** Find the internal flag and Init function
402 ** for this transfer encoding.
404 for (s2i = str2ces; s2i->si_key; s2i++)
405 if (!mh_strcasecmp(cp, s2i->si_key))
407 if (!s2i->si_key && !uprf(cp, "X-"))
410 ct->c_encoding = s2i->si_val;
412 /* Call the Init function for this encoding */
413 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
416 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417 /* Get Content-ID field */
418 ct->c_id = add(hp->value, ct->c_id);
420 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421 /* Get Content-Description field */
422 ct->c_descr = add(hp->value, ct->c_descr);
424 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425 /* Get Content-Disposition field */
426 ct->c_dispo = add(hp->value, ct->c_dispo);
430 hp = hp->next; /* next header field */
434 ** Check if we saw a Content-Type field.
435 ** If not, then assign a default value for
436 ** it, and the Init function.
440 ** If we are inside a multipart/digest message,
441 ** so default type is message/rfc822
444 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
446 ct->c_type = CT_MESSAGE;
447 ct->c_ctinitfnx = InitMessage;
450 ** Else default type is text/plain
452 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
454 ct->c_type = CT_TEXT;
455 ct->c_ctinitfnx = InitText;
459 /* Use default Transfer-Encoding, if necessary */
461 ct->c_encoding = CE_7BIT;
474 ** small routine to add header field to list
478 add_header(CT ct, char *name, char *value)
482 /* allocate header field structure */
483 hp = mh_xcalloc(1, sizeof(*hp));
485 /* link data into header structure */
490 /* link header structure into the list */
491 if (ct->c_first_hf == NULL) {
492 ct->c_first_hf = hp; /* this is the first */
495 ct->c_last_hf->next = hp; /* add it to the end */
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =. If not, insert both name and value, just after
506 ** first semicolon, if any. Note that name should not contain a
507 ** trailing =. And quotes will be added around the value. Typical
508 ** usage: make sure that a Content-Disposition header contains
509 ** filename="foo". If it doesn't and value does, use value from
513 incl_name_value(unsigned char *buf, char *name, char *value) {
516 /* Assume that name is non-null. */
518 char *name_plus_equal = concat(name, "=", NULL);
520 if (!strstr(buf, name_plus_equal)) {
523 char *prefix, *suffix;
525 /* Trim trailing space, esp. newline. */
526 for (cp = &buf[strlen(buf) - 1];
527 cp >= buf && isspace(*cp); --cp) {
531 insertion = concat("; ", name, "=", "\"", value, "\"",
535 ** Insert at first semicolon, if any.
536 ** If none, append to end.
538 prefix = mh_xstrdup(buf);
539 if ((cp = strchr(prefix, ';'))) {
540 suffix = concat(cp, NULL);
542 newbuf = concat(prefix, insertion, suffix,
547 newbuf = concat(buf, insertion, "\n", NULL);
551 mh_free0(&insertion);
555 mh_free0(&name_plus_equal);
562 ** Extract just name_suffix="foo", if any, from value. If there isn't
563 ** one, return the entire value. Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
567 extract_name_value(char *name_suffix, char *value) {
568 char *extracted_name_value;
569 char *name_suffix_plus_quote;
570 char *name_suffix_equals;
576 extracted_name_value = value;
577 name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
578 name_suffix_equals = strstr(value, name_suffix_plus_quote);
579 mh_free0(&name_suffix_plus_quote);
580 if (name_suffix_equals) {
581 char *name_suffix_begin;
584 for (cp = name_suffix_equals; *cp != '"'; ++cp)
586 name_suffix_begin = ++cp;
587 /* Find second \". */
588 for (; *cp != '"'; ++cp)
591 extracted_name_value = mh_xcalloc(cp - name_suffix_begin + 1, sizeof(char));
592 memcpy(extracted_name_value, name_suffix_begin,
593 cp - name_suffix_begin);
594 extracted_name_value[cp - name_suffix_begin] = '\0';
597 return extracted_name_value;
601 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
602 ** directives. Fills in the information of the CTinfo structure.
605 get_ctinfo(unsigned char *cp, CT ct, int magic)
614 i = strlen(invo_name) + 2;
616 /* store copy of Content-Type line */
617 cp = ct->c_ctline = mh_xstrdup(cp);
619 while (isspace(*cp)) /* trim leading spaces */
622 /* change newlines to spaces */
623 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
626 /* trim trailing spaces */
627 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
633 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
635 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
638 for (dp = cp; istoken(*dp); dp++)
641 ci->ci_type = mh_xstrdup(cp); /* store content type */
645 advise(NULL, "invalid %s: field in message %s (empty type)",
646 TYPE_FIELD, ct->c_file);
650 /* down case the content type string */
651 for (dp = ci->ci_type; *dp; dp++)
652 if (isalpha(*dp) && isupper(*dp))
658 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
663 ci->ci_subtype = mh_xstrdup("");
671 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
674 for (dp = cp; istoken(*dp); dp++)
677 ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */
680 if (!*ci->ci_subtype) {
681 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
685 /* down case the content subtype string */
686 for (dp = ci->ci_subtype; *dp; dp++)
687 if (isalpha(*dp) && isupper(*dp))
694 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
698 ** Parse attribute/value pairs given with Content-Type
700 ep = (ap = ci->ci_attrs) + NPARMS;
706 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
714 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
718 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
722 /* down case the attribute name */
723 for (dp = cp; istoken(*dp); dp++)
724 if (isalpha(*dp) && isupper(*dp))
727 for (up = dp; isspace(*dp);)
729 if (dp == cp || *dp != '=') {
730 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
734 vp = (*ap = mh_xstrdup(cp)) + (up - cp);
736 for (dp++; isspace(*dp);)
739 /* now add the attribute value */
740 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
743 for (cp = ++dp, dp = vp;;) {
747 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
752 if ((c = *cp++) == '\0')
767 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
772 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
773 *ci->ci_values[ap - ci->ci_attrs] = '\0';
774 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
782 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
787 ** Get any <Content-Id> given in buffer
789 if (magic && *cp == '<') {
791 mh_free0(&(ct->c_id));
793 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
794 advise(NULL, "invalid ID in message %s", ct->c_file);
800 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
811 ** Get any [Content-Description] given in buffer.
813 if (magic && *cp == '[') {
815 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
819 advise(NULL, "invalid description in message %s",
828 ct->c_descr = concat(ct->c_descr, "\n", NULL);
839 ** Get any {Content-Disposition} given in buffer.
841 if (magic && *cp == '{') {
843 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
847 advise(NULL, "invalid disposition in message %s",
856 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
867 ** Check if anything is left over
871 ci->ci_magic = mh_xstrdup(cp);
874 ** If there is a Content-Disposition header and
875 ** it doesn't have a *filename=, extract it from
876 ** the magic contents. The mhbasename call skips
877 ** any leading directory components.
880 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
882 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
890 get_comment(CT ct, unsigned char **ap, int istype)
895 char c, buffer[BUFSIZ], *dp;
907 advise(NULL, "invalid comment in message %s's %s: field",
908 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
913 if ((c = *cp++) == '\0')
936 if ((dp = ci->ci_comment)) {
937 ci->ci_comment = concat(dp, " ", buffer, NULL);
940 ci->ci_comment = mh_xstrdup(buffer);
955 ** Handles content types audio, image, and video.
956 ** There's not much to do right here.
962 return OK; /* not much to do here */
976 CI ci = &ct->c_ctinfo;
978 /* check for missing subtype */
979 if (!*ci->ci_subtype)
980 ci->ci_subtype = add("plain", ci->ci_subtype);
983 for (kv = SubText; kv->kv_key; kv++)
984 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
986 ct->c_subtype = kv->kv_value;
988 /* allocate text character set structure */
989 t = mh_xcalloc(1, sizeof(*t));
990 ct->c_ctparams = (void *) t;
992 /* scan for charset parameter */
993 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
994 if (!mh_strcasecmp(*ap, "charset"))
997 /* check if content specified a character set */
1000 ct->c_charset = mh_xstrdup(norm_charmap(*ep));
1001 /* match character set or set to CHARSET_UNKNOWN */
1002 for (kv = Charset; kv->kv_key; kv++) {
1003 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1007 t->tx_charset = kv->kv_value;
1009 t->tx_charset = CHARSET_UNSPECIFIED;
1021 InitMultiPart(CT ct)
1025 unsigned char *cp, *dp;
1027 char *bp, buffer[BUFSIZ];
1028 struct multipart *m;
1030 struct part *part, **next;
1031 CI ci = &ct->c_ctinfo;
1036 ** The encoding for multipart messages must be either
1037 ** 7bit, 8bit, or binary (per RFC2045).
1039 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1040 && ct->c_encoding != CE_BINARY) {
1041 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1042 ct->c_encoding = CE_7BIT;
1046 for (kv = SubMultiPart; kv->kv_key; kv++)
1047 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1049 ct->c_subtype = kv->kv_value;
1052 ** Check for "boundary" parameter, which is
1053 ** required for multipart messages.
1056 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1057 if (!mh_strcasecmp(*ap, "boundary")) {
1063 /* complain if boundary parameter is missing */
1065 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1069 /* allocate primary structure for multipart info */
1070 m = mh_xcalloc(1, sizeof(*m));
1071 ct->c_ctparams = (void *) m;
1073 /* check if boundary parameter contains only whitespace characters */
1074 for (cp = bp; isspace(*cp); cp++)
1077 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1081 /* remove trailing whitespace from boundary parameter */
1082 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1087 /* record boundary separators */
1089 m->mp_start = concat(bp, "\n", NULL);
1090 m->mp_stop = concat(bp, "--\n", NULL);
1092 m->mp_start = concat(bp, "\r\n", NULL);
1093 m->mp_stop = concat(bp, "--\r\n", NULL);
1097 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1098 advise(ct->c_file, "unable to open for reading");
1102 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1104 next = &m->mp_parts;
1108 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1112 pos += strlen(buffer);
1113 if (buffer[0] != '-' || buffer[1] != '-')
1116 if (strcmp(buffer + 2, m->mp_start)!=0)
1119 part = mh_xcalloc(1, sizeof(*part));
1121 next = &part->mp_next;
1123 if (!(p = get_content(fp, ct->c_file,
1124 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1131 fseek(fp, pos, SEEK_SET);
1134 if (strcmp(buffer + 2, m->mp_start) == 0) {
1138 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1139 if (p->c_end < p->c_begin)
1140 p->c_begin = p->c_end;
1145 if (strcmp(buffer + 2, m->mp_stop) == 0)
1151 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1152 if (!inout && part) {
1154 p->c_end = ct->c_end;
1156 if (p->c_begin >= p->c_end) {
1157 for (next = &m->mp_parts; *next != part;
1158 next = &((*next)->mp_next))
1167 /* reverse the order of the parts for multipart/alternative */
1168 if (ct->c_subtype == MULTI_ALTERNATE)
1172 ** label all subparts with part number, and
1173 ** then initialize the content of the subpart.
1178 char partnam[BUFSIZ];
1181 snprintf(partnam, sizeof(partnam), "%s.",
1183 pp = partnam + strlen(partnam);
1188 for (part = m->mp_parts, partnum = 1; part;
1189 part = part->mp_next, partnum++) {
1192 sprintf(pp, "%d", partnum);
1193 p->c_partno = mh_xstrdup(partnam);
1195 /* initialize the content of the subparts */
1196 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1211 ** reverse the order of the parts of a multipart
1215 reverse_parts(CT ct)
1218 struct multipart *m;
1219 struct part **base, **bmp, **next, *part;
1221 m = (struct multipart *) ct->c_ctparams;
1223 /* if only one part, just return */
1224 if (!m->mp_parts || !m->mp_parts->mp_next)
1227 /* count number of parts */
1229 for (part = m->mp_parts; part; part = part->mp_next)
1232 /* allocate array of pointers to the parts */
1233 base = mh_xcalloc(i + 1, sizeof(*base));
1236 /* point at all the parts */
1237 for (part = m->mp_parts; part; part = part->mp_next)
1241 /* reverse the order of the parts */
1242 next = &m->mp_parts;
1243 for (bmp--; bmp >= base; bmp--) {
1246 next = &part->mp_next;
1250 /* free array of pointers */
1263 CI ci = &ct->c_ctinfo;
1265 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT) && (ct->c_encoding != CE_BINARY)) {
1266 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1267 ct->c_encoding = CE_7BIT;
1270 /* check for missing subtype */
1271 if (!*ci->ci_subtype)
1272 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1275 for (kv = SubMessage; kv->kv_key; kv++)
1276 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1278 ct->c_subtype = kv->kv_value;
1280 switch (ct->c_subtype) {
1281 case MESSAGE_RFC822:
1284 case MESSAGE_PARTIAL:
1289 p = mh_xcalloc(1, sizeof(*p));
1290 ct->c_ctparams = (void *) p;
1293 ** scan for parameters "id", "number",
1296 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1297 if (!mh_strcasecmp(*ap, "id")) {
1298 p->pm_partid = mh_xstrdup(*ep);
1301 if (!mh_strcasecmp(*ap, "number")) {
1302 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1304 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1309 if (!mh_strcasecmp(*ap, "total")) {
1310 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1317 if (!p->pm_partid || !p->pm_partno
1318 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1319 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1325 case MESSAGE_EXTERNAL:
1330 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1331 advise(ct->c_file, "unable to open for reading");
1335 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1337 if (!(p = get_content(fp, ct->c_file, 0))) {
1343 p->c_end = p->c_begin;
1348 switch (p->c_type) {
1353 if (p->c_subtype != MESSAGE_RFC822)
1358 (*p->c_ctinitfnx) (p);
1377 InitApplication(CT ct)
1380 CI ci = &ct->c_ctinfo;
1383 for (kv = SubApplication; kv->kv_key; kv++)
1384 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1386 ct->c_subtype = kv->kv_value;
1393 ** TRANSFER ENCODINGS
1397 init_encoding(CT ct, OpenCEFunc openfnx)
1401 ce = mh_xcalloc(1, sizeof(*ce));
1404 ct->c_ceopenfnx = openfnx;
1405 ct->c_ceclosefnx = close_encoding;
1406 ct->c_cesizefnx = size_encoding;
1413 close_encoding(CT ct)
1417 if (!(ce = ct->c_cefile))
1427 static unsigned long
1428 size_encoding(CT ct)
1436 if (!(ce = ct->c_cefile))
1437 return (ct->c_end - ct->c_begin);
1439 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1440 return (long) st.st_size;
1443 if (stat(ce->ce_file, &st) != NOTOK)
1444 return (long) st.st_size;
1449 if (ct->c_encoding == CE_EXTERNAL)
1450 return (ct->c_end - ct->c_begin);
1453 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1454 return (ct->c_end - ct->c_begin);
1456 if (fstat(fd, &st) != NOTOK)
1457 size = (long) st.st_size;
1461 (*ct->c_ceclosefnx) (ct);
1470 static unsigned char b642nib[0x80] = {
1471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1475 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1476 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1477 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1478 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1479 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1480 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1481 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1482 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1483 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1484 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1485 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1486 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1493 return init_encoding(ct, openBase64);
1498 openBase64(CT ct, char **file)
1501 int fd, len, skip, own_ct_fp = 0;
1503 unsigned char value, *b, *b1, *b2, *b3;
1504 unsigned char *cp, *ep;
1505 char buffer[BUFSIZ];
1506 /* sbeck -- handle suffixes */
1510 b = (unsigned char *) &bits;
1511 b1 = &b[endian > 0 ? 1 : 2];
1512 b2 = &b[endian > 0 ? 2 : 1];
1513 b3 = &b[endian > 0 ? 3 : 0];
1517 fseek(ce->ce_fp, 0L, SEEK_SET);
1522 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1523 content_error(ce->ce_file, ct,
1524 "unable to fopen for reading");
1530 if (*file == NULL) {
1531 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1534 ce->ce_file = mh_xstrdup(*file);
1538 /* sbeck@cise.ufl.edu -- handle suffixes */
1540 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1541 invo_name, ci->ci_type, ci->ci_subtype);
1542 cp = context_find(buffer);
1543 if (cp == NULL || *cp == '\0') {
1544 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1546 cp = context_find(buffer);
1548 if (cp != NULL && *cp != '\0') {
1549 if (ce->ce_unlink) {
1551 ** Temporary file already exists, so we rename to
1552 ** version with extension.
1554 char *file_org = mh_xstrdup(ce->ce_file);
1555 ce->ce_file = add(cp, ce->ce_file);
1556 if (rename(file_org, ce->ce_file)) {
1557 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1560 mh_free0(&file_org);
1563 ce->ce_file = add(cp, ce->ce_file);
1567 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1568 content_error(ce->ce_file, ct,
1569 "unable to fopen for reading/writing");
1573 if ((len = ct->c_end - ct->c_begin) < 0)
1574 adios(EX_SOFTWARE, NULL, "internal error(1)");
1577 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1578 content_error(ct->c_file, ct,
1579 "unable to open for reading");
1589 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1591 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1593 content_error(ct->c_file, ct, "error reading from");
1597 content_error(NULL, ct, "premature eof");
1605 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1610 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1612 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1614 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1618 bits |= value << bitno;
1620 if ((bitno -= 6) < 0) {
1621 putc((char) *b1, ce->ce_fp);
1623 putc((char) *b2, ce->ce_fp);
1625 putc((char) *b3, ce->ce_fp);
1629 if (ferror(ce->ce_fp)) {
1630 content_error(ce->ce_file, ct,
1631 "error writing to");
1634 bitno = 18, bits = 0L, skip = 0;
1640 goto self_delimiting;
1649 fprintf(stderr, "premature ending (bitno %d)\n",
1652 content_error(NULL, ct, "invalid BASE64 encoding");
1657 fseek(ct->c_fp, 0L, SEEK_SET);
1659 if (fflush(ce->ce_fp)) {
1660 content_error(ce->ce_file, ct, "error writing to");
1664 fseek(ce->ce_fp, 0L, SEEK_SET);
1667 *file = ce->ce_file;
1672 return fileno(ce->ce_fp);
1675 free_encoding(ct, 0);
1688 static char hex2nib[0x80] = {
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1696 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1702 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1703 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1704 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1711 return init_encoding(ct, openQuoted);
1716 openQuoted(CT ct, char **file)
1718 int cc, len, quoted, own_ct_fp = 0;
1719 unsigned char *cp, *ep;
1720 char buffer[BUFSIZ];
1721 unsigned char mask = 0;
1723 /* sbeck -- handle suffixes */
1728 fseek(ce->ce_fp, 0L, SEEK_SET);
1733 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1734 content_error(ce->ce_file, ct,
1735 "unable to fopen for reading");
1741 if (*file == NULL) {
1742 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1745 ce->ce_file = mh_xstrdup(*file);
1749 /* sbeck@cise.ufl.edu -- handle suffixes */
1751 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1752 invo_name, ci->ci_type, ci->ci_subtype);
1753 cp = context_find(buffer);
1754 if (cp == NULL || *cp == '\0') {
1755 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1757 cp = context_find(buffer);
1759 if (cp != NULL && *cp != '\0') {
1760 if (ce->ce_unlink) {
1762 ** Temporary file already exists, so we rename to
1763 ** version with extension.
1765 char *file_org = mh_xstrdup(ce->ce_file);
1766 ce->ce_file = add(cp, ce->ce_file);
1767 if (rename(file_org, ce->ce_file)) {
1768 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1771 mh_free0(&file_org);
1774 ce->ce_file = add(cp, ce->ce_file);
1778 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1779 content_error(ce->ce_file, ct,
1780 "unable to fopen for reading/writing");
1784 if ((len = ct->c_end - ct->c_begin) < 0)
1785 adios(EX_SOFTWARE, NULL, "internal error(2)");
1788 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1789 content_error(ct->c_file, ct,
1790 "unable to open for reading");
1798 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1800 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1801 content_error(NULL, ct, "premature eof");
1805 if ((cc = strlen(buffer)) > len)
1809 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1814 for (; cp < ep; cp++) {
1816 /* in an escape sequence */
1818 /* at byte 1 of an escape sequence */
1819 mask = hex2nib[*cp & 0x7f];
1820 /* next is byte 2 */
1823 /* at byte 2 of an escape sequence */
1825 mask |= hex2nib[*cp & 0x7f];
1826 putc(mask, ce->ce_fp);
1827 if (ferror(ce->ce_fp)) {
1828 content_error(ce->ce_file, ct, "error writing to");
1832 ** finished escape sequence; next may
1833 ** be literal or a new escape sequence
1837 /* on to next byte */
1841 /* not in an escape sequence */
1844 ** starting an escape sequence,
1847 if (cp + 1 < ep && cp[1] == '\n') {
1848 /* "=\n" soft line break, eat the \n */
1852 if (cp + 1 >= ep || cp + 2 >= ep) {
1854 ** We don't have 2 bytes left,
1855 ** so this is an invalid escape
1856 ** sequence; just show the raw bytes
1859 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1861 ** Next 2 bytes are hex digits,
1862 ** making this a valid escape
1863 ** sequence; let's decode it (above).
1869 ** One or both of the next 2 is
1870 ** out of range, making this an
1871 ** invalid escape sequence; just
1872 ** show the raw bytes (below).
1877 /* Just show the raw byte. */
1878 putc(*cp, ce->ce_fp);
1879 if (ferror(ce->ce_fp)) {
1880 content_error(ce->ce_file, ct,
1881 "error writing to");
1887 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1891 fseek(ct->c_fp, 0L, SEEK_SET);
1893 if (fflush(ce->ce_fp)) {
1894 content_error(ce->ce_file, ct, "error writing to");
1898 fseek(ce->ce_fp, 0L, SEEK_SET);
1901 *file = ce->ce_file;
1906 return fileno(ce->ce_fp);
1909 free_encoding(ct, 0);
1925 if (init_encoding(ct, open7Bit) == NOTOK)
1928 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1934 open7Bit(CT ct, char **file)
1936 int cc, fd, len, own_ct_fp = 0;
1937 char buffer[BUFSIZ];
1938 /* sbeck -- handle suffixes */
1945 fseek(ce->ce_fp, 0L, SEEK_SET);
1950 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1951 content_error(ce->ce_file, ct,
1952 "unable to fopen for reading");
1958 if (*file == NULL) {
1959 ce->ce_file = mh_xstrdup(m_mktemp(tmp, NULL, NULL));
1962 ce->ce_file = mh_xstrdup(*file);
1966 /* sbeck@cise.ufl.edu -- handle suffixes */
1968 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1969 invo_name, ci->ci_type, ci->ci_subtype);
1970 cp = context_find(buffer);
1971 if (cp == NULL || *cp == '\0') {
1972 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1974 cp = context_find(buffer);
1976 if (cp != NULL && *cp != '\0') {
1977 if (ce->ce_unlink) {
1979 ** Temporary file already exists, so we rename to
1980 ** version with extension.
1982 char *file_org = mh_xstrdup(ce->ce_file);
1983 ce->ce_file = add(cp, ce->ce_file);
1984 if (rename(file_org, ce->ce_file)) {
1985 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1988 mh_free0(&file_org);
1991 ce->ce_file = add(cp, ce->ce_file);
1995 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1996 content_error(ce->ce_file, ct,
1997 "unable to fopen for reading/writing");
2001 if (ct->c_type == CT_MULTIPART) {
2003 CI ci = &ct->c_ctinfo;
2006 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2008 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2009 strlen(ci->ci_subtype);
2010 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2011 putc(';', ce->ce_fp);
2014 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2017 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2018 fputs("\n\t", ce->ce_fp);
2021 putc(' ', ce->ce_fp);
2024 fprintf(ce->ce_fp, "%s", buffer);
2028 if (ci->ci_comment) {
2029 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2031 fputs("\n\t", ce->ce_fp);
2034 putc(' ', ce->ce_fp);
2037 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2040 fprintf(ce->ce_fp, "\n");
2042 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2044 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2046 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2047 fprintf(ce->ce_fp, "\n");
2050 if ((len = ct->c_end - ct->c_begin) < 0)
2051 adios(EX_SOFTWARE, NULL, "internal error(3)");
2054 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2055 content_error(ct->c_file, ct,
2056 "unable to open for reading");
2062 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2064 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2066 content_error(ct->c_file, ct, "error reading from");
2070 content_error(NULL, ct, "premature eof");
2078 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2079 if (ferror(ce->ce_fp)) {
2080 content_error(ce->ce_file, ct,
2081 "error writing to");
2086 fseek(ct->c_fp, 0L, SEEK_SET);
2088 if (fflush(ce->ce_fp)) {
2089 content_error(ce->ce_file, ct, "error writing to");
2093 fseek(ce->ce_fp, 0L, SEEK_SET);
2096 *file = ce->ce_file;
2101 return fileno(ce->ce_fp);
2104 free_encoding(ct, 0);