2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 char buf[BUFSIZ], name[NAMESZ];
240 /* allocate the content structure */
241 ct = (CT) mh_xcalloc(1, sizeof(*ct));
244 ct->c_file = getcpy(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD;;) {
252 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
257 /* get copies of the buffers */
261 /* if necessary, get rest of field */
262 while (state == FLDPLUS) {
263 state = m_getfld(state, name, buf,
265 vp = add(buf, vp); /* add to previous value */
268 /* Now add the header data to the list */
269 add_header(ct, np, vp);
271 ct->c_begin = ftell(in) + 1;
275 ct->c_begin = ftell(in) - strlen(buf);
279 ct->c_begin = ftell(in);
284 adios(EX_DATAERR, NULL, "message format error in component #%d",
288 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
291 /* break out of the loop */
296 ** Read the content headers. We will parse the
297 ** MIME related header fields into their various
298 ** structures and set internal flags related to
299 ** content type/subtype, etc.
302 hp = ct->c_first_hf; /* start at first header field */
304 /* Get MIME-Version field */
305 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
308 unsigned char *cp, *dp;
311 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
314 ct->c_vrsn = getcpy(hp->value);
316 /* Now, cleanup this field */
321 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
323 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
328 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
330 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
333 for (dp = cp; istoken(*dp); dp++)
337 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
340 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
343 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
344 /* Get Content-Type field */
345 struct str2init *s2i;
346 CI ci = &ct->c_ctinfo;
348 /* Check if we've already seen a Content-Type header */
350 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
354 /* Parse the Content-Type field */
355 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
359 ** Set the Init function and the internal
360 ** flag for this content type.
362 for (s2i = str2cts; s2i->si_key; s2i++)
363 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
365 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
367 ct->c_type = s2i->si_val;
368 ct->c_ctinitfnx = s2i->si_init;
370 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
371 /* Get Content-Transfer-Encoding field */
373 unsigned char *cp, *dp;
374 struct str2init *s2i;
377 ** Check if we've already seen the
378 ** Content-Transfer-Encoding field
381 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
385 /* get copy of this field */
386 ct->c_celine = cp = getcpy(hp->value);
390 for (dp = cp; istoken(*dp); dp++)
396 ** Find the internal flag and Init function
397 ** for this transfer encoding.
399 for (s2i = str2ces; s2i->si_key; s2i++)
400 if (!mh_strcasecmp(cp, s2i->si_key))
402 if (!s2i->si_key && !uprf(cp, "X-"))
405 ct->c_encoding = s2i->si_val;
407 /* Call the Init function for this encoding */
408 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
411 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
412 /* Get Content-ID field */
413 ct->c_id = add(hp->value, ct->c_id);
415 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
416 /* Get Content-Description field */
417 ct->c_descr = add(hp->value, ct->c_descr);
419 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
420 /* Get Content-Disposition field */
421 ct->c_dispo = add(hp->value, ct->c_dispo);
425 hp = hp->next; /* next header field */
429 ** Check if we saw a Content-Type field.
430 ** If not, then assign a default value for
431 ** it, and the Init function.
435 ** If we are inside a multipart/digest message,
436 ** so default type is message/rfc822
439 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
441 ct->c_type = CT_MESSAGE;
442 ct->c_ctinitfnx = InitMessage;
445 ** Else default type is text/plain
447 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
449 ct->c_type = CT_TEXT;
450 ct->c_ctinitfnx = InitText;
454 /* Use default Transfer-Encoding, if necessary */
456 ct->c_encoding = CE_7BIT;
469 ** small routine to add header field to list
473 add_header(CT ct, char *name, char *value)
477 /* allocate header field structure */
478 hp = mh_xmalloc(sizeof(*hp));
480 /* link data into header structure */
485 /* link header structure into the list */
486 if (ct->c_first_hf == NULL) {
487 ct->c_first_hf = hp; /* this is the first */
490 ct->c_last_hf->next = hp; /* add it to the end */
499 ** Make sure that buf contains at least one appearance of name,
500 ** followed by =. If not, insert both name and value, just after
501 ** first semicolon, if any. Note that name should not contain a
502 ** trailing =. And quotes will be added around the value. Typical
503 ** usage: make sure that a Content-Disposition header contains
504 ** filename="foo". If it doesn't and value does, use value from
508 incl_name_value(unsigned char *buf, char *name, char *value) {
511 /* Assume that name is non-null. */
513 char *name_plus_equal = concat(name, "=", NULL);
515 if (!strstr(buf, name_plus_equal)) {
518 char *prefix, *suffix;
520 /* Trim trailing space, esp. newline. */
521 for (cp = &buf[strlen(buf) - 1];
522 cp >= buf && isspace(*cp); --cp) {
526 insertion = concat("; ", name, "=", "\"", value, "\"",
530 ** Insert at first semicolon, if any.
531 ** If none, append to end.
533 prefix = getcpy(buf);
534 if ((cp = strchr(prefix, ';'))) {
535 suffix = concat(cp, NULL);
537 newbuf = concat(prefix, insertion, suffix,
542 newbuf = concat(buf, insertion, "\n", NULL);
550 free(name_plus_equal);
557 ** Extract just name_suffix="foo", if any, from value. If there isn't
558 ** one, return the entire value. Note that, for example, a name_suffix
559 ** of name will match filename="foo", and return foo.
562 extract_name_value(char *name_suffix, char *value) {
563 char *extracted_name_value = value;
564 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
565 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
568 free(name_suffix_plus_quote);
569 if (name_suffix_equals) {
570 char *name_suffix_begin;
573 for (cp = name_suffix_equals; *cp != '"'; ++cp)
575 name_suffix_begin = ++cp;
576 /* Find second \". */
577 for (; *cp != '"'; ++cp)
580 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
581 memcpy(extracted_name_value, name_suffix_begin,
582 cp - name_suffix_begin);
583 extracted_name_value[cp - name_suffix_begin] = '\0';
586 return extracted_name_value;
590 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
591 ** directives. Fills in the information of the CTinfo structure.
594 get_ctinfo(unsigned char *cp, CT ct, int magic)
603 i = strlen(invo_name) + 2;
605 /* store copy of Content-Type line */
606 cp = ct->c_ctline = getcpy(cp);
608 while (isspace(*cp)) /* trim leading spaces */
611 /* change newlines to spaces */
612 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
615 /* trim trailing spaces */
616 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
622 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
624 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
627 for (dp = cp; istoken(*dp); dp++)
630 ci->ci_type = getcpy(cp); /* store content type */
634 advise(NULL, "invalid %s: field in message %s (empty type)",
635 TYPE_FIELD, ct->c_file);
639 /* down case the content type string */
640 for (dp = ci->ci_type; *dp; dp++)
641 if (isalpha(*dp) && isupper(*dp))
647 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
652 ci->ci_subtype = getcpy("");
660 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
663 for (dp = cp; istoken(*dp); dp++)
666 ci->ci_subtype = getcpy(cp); /* store the content subtype */
669 if (!*ci->ci_subtype) {
670 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
674 /* down case the content subtype string */
675 for (dp = ci->ci_subtype; *dp; dp++)
676 if (isalpha(*dp) && isupper(*dp))
683 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
687 ** Parse attribute/value pairs given with Content-Type
689 ep = (ap = ci->ci_attrs) + NPARMS;
695 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
703 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
707 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
711 /* down case the attribute name */
712 for (dp = cp; istoken(*dp); dp++)
713 if (isalpha(*dp) && isupper(*dp))
716 for (up = dp; isspace(*dp);)
718 if (dp == cp || *dp != '=') {
719 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
723 vp = (*ap = getcpy(cp)) + (up - cp);
725 for (dp++; isspace(*dp);)
728 /* now add the attribute value */
729 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
732 for (cp = ++dp, dp = vp;;) {
736 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
741 if ((c = *cp++) == '\0')
756 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
761 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
762 *ci->ci_values[ap - ci->ci_attrs] = '\0';
763 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
771 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
776 ** Get any <Content-Id> given in buffer
778 if (magic && *cp == '<') {
783 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784 advise(NULL, "invalid ID in message %s", ct->c_file);
790 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
801 ** Get any [Content-Description] given in buffer.
803 if (magic && *cp == '[') {
805 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
809 advise(NULL, "invalid description in message %s",
818 ct->c_descr = concat(ct->c_descr, "\n", NULL);
829 ** Get any {Content-Disposition} given in buffer.
831 if (magic && *cp == '{') {
833 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
837 advise(NULL, "invalid disposition in message %s",
846 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
857 ** Check if anything is left over
861 ci->ci_magic = getcpy(cp);
864 ** If there is a Content-Disposition header and
865 ** it doesn't have a *filename=, extract it from
866 ** the magic contents. The mhbasename call skips
867 ** any leading directory components.
870 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
872 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
880 get_comment(CT ct, unsigned char **ap, int istype)
885 char c, buffer[BUFSIZ], *dp;
897 advise(NULL, "invalid comment in message %s's %s: field",
898 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
903 if ((c = *cp++) == '\0')
926 if ((dp = ci->ci_comment)) {
927 ci->ci_comment = concat(dp, " ", buffer, NULL);
930 ci->ci_comment = getcpy(buffer);
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
952 return OK; /* not much to do here */
966 CI ci = &ct->c_ctinfo;
968 /* check for missing subtype */
969 if (!*ci->ci_subtype)
970 ci->ci_subtype = add("plain", ci->ci_subtype);
973 for (kv = SubText; kv->kv_key; kv++)
974 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
976 ct->c_subtype = kv->kv_value;
978 /* allocate text character set structure */
979 t = (struct text *) mh_xcalloc(1, sizeof(*t));
980 ct->c_ctparams = (void *) t;
982 /* scan for charset parameter */
983 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
984 if (!mh_strcasecmp(*ap, "charset"))
987 /* check if content specified a character set */
990 ct->c_charset = getcpy(norm_charmap(*ep));
991 /* match character set or set to CHARSET_UNKNOWN */
992 for (kv = Charset; kv->kv_key; kv++) {
993 if (!mh_strcasecmp(*ep, kv->kv_key)) {
997 t->tx_charset = kv->kv_value;
999 t->tx_charset = CHARSET_UNSPECIFIED;
1011 InitMultiPart(CT ct)
1015 unsigned char *cp, *dp;
1017 char *bp, buffer[BUFSIZ];
1018 struct multipart *m;
1020 struct part *part, **next;
1021 CI ci = &ct->c_ctinfo;
1026 ** The encoding for multipart messages must be either
1027 ** 7bit, 8bit, or binary (per RFC2045).
1029 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1030 && ct->c_encoding != CE_BINARY) {
1031 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1032 ct->c_encoding = CE_7BIT;
1036 for (kv = SubMultiPart; kv->kv_key; kv++)
1037 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1039 ct->c_subtype = kv->kv_value;
1042 ** Check for "boundary" parameter, which is
1043 ** required for multipart messages.
1046 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1047 if (!mh_strcasecmp(*ap, "boundary")) {
1053 /* complain if boundary parameter is missing */
1055 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1059 /* allocate primary structure for multipart info */
1060 m = (struct multipart *) mh_xcalloc(1, sizeof(*m));
1061 ct->c_ctparams = (void *) m;
1063 /* check if boundary parameter contains only whitespace characters */
1064 for (cp = bp; isspace(*cp); cp++)
1067 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1071 /* remove trailing whitespace from boundary parameter */
1072 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1077 /* record boundary separators */
1078 m->mp_start = concat(bp, "\n", NULL);
1079 m->mp_stop = concat(bp, "--\n", NULL);
1081 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1082 advise(ct->c_file, "unable to open for reading");
1086 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1088 next = &m->mp_parts;
1092 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1096 pos += strlen(buffer);
1097 if (buffer[0] != '-' || buffer[1] != '-')
1100 if (strcmp(buffer + 2, m->mp_start)!=0)
1103 part = (struct part *) mh_xcalloc(1, sizeof(*part));
1105 next = &part->mp_next;
1107 if (!(p = get_content(fp, ct->c_file,
1108 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1115 fseek(fp, pos, SEEK_SET);
1118 if (strcmp(buffer + 2, m->mp_start) == 0) {
1122 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1123 if (p->c_end < p->c_begin)
1124 p->c_begin = p->c_end;
1129 if (strcmp(buffer + 2, m->mp_stop) == 0)
1135 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1136 if (!inout && part) {
1138 p->c_end = ct->c_end;
1140 if (p->c_begin >= p->c_end) {
1141 for (next = &m->mp_parts; *next != part;
1142 next = &((*next)->mp_next))
1146 free((char *) part);
1151 /* reverse the order of the parts for multipart/alternative */
1152 if (ct->c_subtype == MULTI_ALTERNATE)
1156 ** label all subparts with part number, and
1157 ** then initialize the content of the subpart.
1162 char partnam[BUFSIZ];
1165 snprintf(partnam, sizeof(partnam), "%s.",
1167 pp = partnam + strlen(partnam);
1172 for (part = m->mp_parts, partnum = 1; part;
1173 part = part->mp_next, partnum++) {
1176 sprintf(pp, "%d", partnum);
1177 p->c_partno = getcpy(partnam);
1179 /* initialize the content of the subparts */
1180 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1195 ** reverse the order of the parts of a multipart
1199 reverse_parts(CT ct)
1202 struct multipart *m;
1203 struct part **base, **bmp, **next, *part;
1205 m = (struct multipart *) ct->c_ctparams;
1207 /* if only one part, just return */
1208 if (!m->mp_parts || !m->mp_parts->mp_next)
1211 /* count number of parts */
1213 for (part = m->mp_parts; part; part = part->mp_next)
1216 /* allocate array of pointers to the parts */
1217 base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base));
1220 /* point at all the parts */
1221 for (part = m->mp_parts; part; part = part->mp_next)
1225 /* reverse the order of the parts */
1226 next = &m->mp_parts;
1227 for (bmp--; bmp >= base; bmp--) {
1230 next = &part->mp_next;
1234 /* free array of pointers */
1235 free((char *) base);
1247 CI ci = &ct->c_ctinfo;
1249 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1250 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1254 /* check for missing subtype */
1255 if (!*ci->ci_subtype)
1256 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1259 for (kv = SubMessage; kv->kv_key; kv++)
1260 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1262 ct->c_subtype = kv->kv_value;
1264 switch (ct->c_subtype) {
1265 case MESSAGE_RFC822:
1268 case MESSAGE_PARTIAL:
1273 p = (struct partial *) mh_xcalloc(1, sizeof(*p));
1274 ct->c_ctparams = (void *) p;
1277 ** scan for parameters "id", "number",
1280 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1281 if (!mh_strcasecmp(*ap, "id")) {
1282 p->pm_partid = getcpy(*ep);
1285 if (!mh_strcasecmp(*ap, "number")) {
1286 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1288 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1293 if (!mh_strcasecmp(*ap, "total")) {
1294 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1301 if (!p->pm_partid || !p->pm_partno
1302 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1303 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1309 case MESSAGE_EXTERNAL:
1314 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1315 advise(ct->c_file, "unable to open for reading");
1319 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1321 if (!(p = get_content(fp, ct->c_file, 0))) {
1327 p->c_end = p->c_begin;
1332 switch (p->c_type) {
1337 if (p->c_subtype != MESSAGE_RFC822)
1342 (*p->c_ctinitfnx) (p);
1361 InitApplication(CT ct)
1364 CI ci = &ct->c_ctinfo;
1367 for (kv = SubApplication; kv->kv_key; kv++)
1368 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1370 ct->c_subtype = kv->kv_value;
1377 ** TRANSFER ENCODINGS
1381 init_encoding(CT ct, OpenCEFunc openfnx)
1385 ce = (CE) mh_xcalloc(1, sizeof(*ce));
1388 ct->c_ceopenfnx = openfnx;
1389 ct->c_ceclosefnx = close_encoding;
1390 ct->c_cesizefnx = size_encoding;
1397 close_encoding(CT ct)
1401 if (!(ce = ct->c_cefile))
1411 static unsigned long
1412 size_encoding(CT ct)
1420 if (!(ce = ct->c_cefile))
1421 return (ct->c_end - ct->c_begin);
1423 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1424 return (long) st.st_size;
1427 if (stat(ce->ce_file, &st) != NOTOK)
1428 return (long) st.st_size;
1433 if (ct->c_encoding == CE_EXTERNAL)
1434 return (ct->c_end - ct->c_begin);
1437 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1438 return (ct->c_end - ct->c_begin);
1440 if (fstat(fd, &st) != NOTOK)
1441 size = (long) st.st_size;
1445 (*ct->c_ceclosefnx) (ct);
1454 static unsigned char b642nib[0x80] = {
1455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1457 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1459 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1460 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1461 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1462 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1464 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1465 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1466 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1468 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1469 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1470 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1477 return init_encoding(ct, openBase64);
1482 openBase64(CT ct, char **file)
1485 int fd, len, skip, own_ct_fp = 0;
1487 unsigned char value, *b, *b1, *b2, *b3;
1488 unsigned char *cp, *ep;
1489 char buffer[BUFSIZ];
1490 /* sbeck -- handle suffixes */
1494 b = (unsigned char *) &bits;
1495 b1 = &b[endian > 0 ? 1 : 2];
1496 b2 = &b[endian > 0 ? 2 : 1];
1497 b3 = &b[endian > 0 ? 3 : 0];
1501 fseek(ce->ce_fp, 0L, SEEK_SET);
1506 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1507 content_error(ce->ce_file, ct,
1508 "unable to fopen for reading");
1514 if (*file == NULL) {
1515 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1518 ce->ce_file = getcpy(*file);
1522 /* sbeck@cise.ufl.edu -- handle suffixes */
1524 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1525 invo_name, ci->ci_type, ci->ci_subtype);
1526 cp = context_find(buffer);
1527 if (cp == NULL || *cp == '\0') {
1528 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1530 cp = context_find(buffer);
1532 if (cp != NULL && *cp != '\0') {
1533 if (ce->ce_unlink) {
1535 ** Temporary file already exists, so we rename to
1536 ** version with extension.
1538 char *file_org = strdup(ce->ce_file);
1539 ce->ce_file = add(cp, ce->ce_file);
1540 if (rename(file_org, ce->ce_file)) {
1541 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1547 ce->ce_file = add(cp, ce->ce_file);
1551 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1552 content_error(ce->ce_file, ct,
1553 "unable to fopen for reading/writing");
1557 if ((len = ct->c_end - ct->c_begin) < 0)
1558 adios(EX_SOFTWARE, NULL, "internal error(1)");
1561 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1562 content_error(ct->c_file, ct,
1563 "unable to open for reading");
1573 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1575 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1577 content_error(ct->c_file, ct, "error reading from");
1581 content_error(NULL, ct, "premature eof");
1589 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1594 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1596 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1598 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1602 bits |= value << bitno;
1604 if ((bitno -= 6) < 0) {
1605 putc((char) *b1, ce->ce_fp);
1607 putc((char) *b2, ce->ce_fp);
1609 putc((char) *b3, ce->ce_fp);
1613 if (ferror(ce->ce_fp)) {
1614 content_error(ce->ce_file, ct,
1615 "error writing to");
1618 bitno = 18, bits = 0L, skip = 0;
1624 goto self_delimiting;
1633 fprintf(stderr, "premature ending (bitno %d)\n",
1636 content_error(NULL, ct, "invalid BASE64 encoding");
1641 fseek(ct->c_fp, 0L, SEEK_SET);
1643 if (fflush(ce->ce_fp)) {
1644 content_error(ce->ce_file, ct, "error writing to");
1648 fseek(ce->ce_fp, 0L, SEEK_SET);
1651 *file = ce->ce_file;
1656 return fileno(ce->ce_fp);
1659 free_encoding(ct, 0);
1672 static char hex2nib[0x80] = {
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1675 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1677 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1680 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1695 return init_encoding(ct, openQuoted);
1700 openQuoted(CT ct, char **file)
1702 int cc, len, quoted, own_ct_fp = 0;
1703 unsigned char *cp, *ep;
1704 char buffer[BUFSIZ];
1705 unsigned char mask = 0;
1707 /* sbeck -- handle suffixes */
1712 fseek(ce->ce_fp, 0L, SEEK_SET);
1717 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1718 content_error(ce->ce_file, ct,
1719 "unable to fopen for reading");
1725 if (*file == NULL) {
1726 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1729 ce->ce_file = getcpy(*file);
1733 /* sbeck@cise.ufl.edu -- handle suffixes */
1735 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1736 invo_name, ci->ci_type, ci->ci_subtype);
1737 cp = context_find(buffer);
1738 if (cp == NULL || *cp == '\0') {
1739 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1741 cp = context_find(buffer);
1743 if (cp != NULL && *cp != '\0') {
1744 if (ce->ce_unlink) {
1746 ** Temporary file already exists, so we rename to
1747 ** version with extension.
1749 char *file_org = strdup(ce->ce_file);
1750 ce->ce_file = add(cp, ce->ce_file);
1751 if (rename(file_org, ce->ce_file)) {
1752 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1758 ce->ce_file = add(cp, ce->ce_file);
1762 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1763 content_error(ce->ce_file, ct,
1764 "unable to fopen for reading/writing");
1768 if ((len = ct->c_end - ct->c_begin) < 0)
1769 adios(EX_SOFTWARE, NULL, "internal error(2)");
1772 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1773 content_error(ct->c_file, ct,
1774 "unable to open for reading");
1782 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1784 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1785 content_error(NULL, ct, "premature eof");
1789 if ((cc = strlen(buffer)) > len)
1793 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1798 for (; cp < ep; cp++) {
1800 /* in an escape sequence */
1802 /* at byte 1 of an escape sequence */
1803 mask = hex2nib[*cp & 0x7f];
1804 /* next is byte 2 */
1807 /* at byte 2 of an escape sequence */
1809 mask |= hex2nib[*cp & 0x7f];
1810 putc(mask, ce->ce_fp);
1811 if (ferror(ce->ce_fp)) {
1812 content_error(ce->ce_file, ct, "error writing to");
1816 ** finished escape sequence; next may
1817 ** be literal or a new escape sequence
1821 /* on to next byte */
1825 /* not in an escape sequence */
1828 ** starting an escape sequence,
1831 if (cp + 1 < ep && cp[1] == '\n') {
1832 /* "=\n" soft line break, eat the \n */
1836 if (cp + 1 >= ep || cp + 2 >= ep) {
1838 ** We don't have 2 bytes left,
1839 ** so this is an invalid escape
1840 ** sequence; just show the raw bytes
1843 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1845 ** Next 2 bytes are hex digits,
1846 ** making this a valid escape
1847 ** sequence; let's decode it (above).
1853 ** One or both of the next 2 is
1854 ** out of range, making this an
1855 ** invalid escape sequence; just
1856 ** show the raw bytes (below).
1861 /* Just show the raw byte. */
1862 putc(*cp, ce->ce_fp);
1863 if (ferror(ce->ce_fp)) {
1864 content_error(ce->ce_file, ct,
1865 "error writing to");
1871 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1875 fseek(ct->c_fp, 0L, SEEK_SET);
1877 if (fflush(ce->ce_fp)) {
1878 content_error(ce->ce_file, ct, "error writing to");
1882 fseek(ce->ce_fp, 0L, SEEK_SET);
1885 *file = ce->ce_file;
1890 return fileno(ce->ce_fp);
1893 free_encoding(ct, 0);
1909 if (init_encoding(ct, open7Bit) == NOTOK)
1912 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1918 open7Bit(CT ct, char **file)
1920 int cc, fd, len, own_ct_fp = 0;
1921 char buffer[BUFSIZ];
1922 /* sbeck -- handle suffixes */
1929 fseek(ce->ce_fp, 0L, SEEK_SET);
1934 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1935 content_error(ce->ce_file, ct,
1936 "unable to fopen for reading");
1942 if (*file == NULL) {
1943 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1946 ce->ce_file = getcpy(*file);
1950 /* sbeck@cise.ufl.edu -- handle suffixes */
1952 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1953 invo_name, ci->ci_type, ci->ci_subtype);
1954 cp = context_find(buffer);
1955 if (cp == NULL || *cp == '\0') {
1956 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1958 cp = context_find(buffer);
1960 if (cp != NULL && *cp != '\0') {
1961 if (ce->ce_unlink) {
1963 ** Temporary file already exists, so we rename to
1964 ** version with extension.
1966 char *file_org = strdup(ce->ce_file);
1967 ce->ce_file = add(cp, ce->ce_file);
1968 if (rename(file_org, ce->ce_file)) {
1969 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1975 ce->ce_file = add(cp, ce->ce_file);
1979 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1980 content_error(ce->ce_file, ct,
1981 "unable to fopen for reading/writing");
1985 if (ct->c_type == CT_MULTIPART) {
1987 CI ci = &ct->c_ctinfo;
1990 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1992 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1993 strlen(ci->ci_subtype);
1994 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1995 putc(';', ce->ce_fp);
1998 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2001 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2002 fputs("\n\t", ce->ce_fp);
2005 putc(' ', ce->ce_fp);
2008 fprintf(ce->ce_fp, "%s", buffer);
2012 if (ci->ci_comment) {
2013 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2015 fputs("\n\t", ce->ce_fp);
2018 putc(' ', ce->ce_fp);
2021 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2024 fprintf(ce->ce_fp, "\n");
2026 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2028 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2030 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2031 fprintf(ce->ce_fp, "\n");
2034 if ((len = ct->c_end - ct->c_begin) < 0)
2035 adios(EX_SOFTWARE, NULL, "internal error(3)");
2038 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2039 content_error(ct->c_file, ct,
2040 "unable to open for reading");
2046 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2048 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2050 content_error(ct->c_file, ct, "error reading from");
2054 content_error(NULL, ct, "premature eof");
2062 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2063 if (ferror(ce->ce_fp)) {
2064 content_error(ce->ce_file, ct,
2065 "error writing to");
2070 fseek(ct->c_fp, 0L, SEEK_SET);
2072 if (fflush(ce->ce_fp)) {
2073 content_error(ce->ce_file, ct, "error writing to");
2077 fseek(ce->ce_fp, 0L, SEEK_SET);
2080 *file = ce->ce_file;
2085 return fileno(ce->ce_fp);
2088 free_encoding(ct, 0);