2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
14 #include <h/mhparse.h>
19 extern int endian; /* mhmisc.c */
21 extern pid_t xpid; /* mhshowsbr.c */
24 ** Directory to place temp files. This must
25 ** be set before these routines are called.
30 ** Structures for TEXT messages
32 struct k2v SubText[] = {
33 { "plain", TEXT_PLAIN },
34 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
35 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
36 { NULL, TEXT_UNKNOWN } /* this one must be last! */
39 struct k2v Charset[] = {
40 { "us-ascii", CHARSET_USASCII },
41 { "iso-8859-1", CHARSET_LATIN },
42 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
46 ** Structures for MULTIPART messages
48 struct k2v SubMultiPart[] = {
49 { "mixed", MULTI_MIXED },
50 { "alternative", MULTI_ALTERNATE },
51 { "digest", MULTI_DIGEST },
52 { "parallel", MULTI_PARALLEL },
53 { NULL, MULTI_UNKNOWN } /* this one must be last! */
57 ** Structures for MESSAGE messages
59 struct k2v SubMessage[] = {
60 { "rfc822", MESSAGE_RFC822 },
61 { "partial", MESSAGE_PARTIAL },
62 { "external-body", MESSAGE_EXTERNAL },
63 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
67 ** Structure for APPLICATION messages
69 struct k2v SubApplication[] = {
70 { "octet-stream", APPLICATION_OCTETS },
71 { "postscript", APPLICATION_POSTSCRIPT },
72 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
79 int make_intermediates(char *);
80 void content_error(char *, CT, char *, ...);
83 void free_content(CT);
84 void free_encoding(CT, int);
89 static CT get_content(FILE *, char *, int);
90 static int get_comment(CT, unsigned char **, int);
92 static int InitGeneric(CT);
93 static int InitText(CT);
94 static int InitMultiPart(CT);
95 static void reverse_parts(CT);
96 static int InitMessage(CT);
97 static int InitApplication(CT);
98 static int init_encoding(CT, OpenCEFunc);
99 static unsigned long size_encoding(CT);
100 static int InitBase64(CT);
101 static int openBase64(CT, char **);
102 static int InitQuoted(CT);
103 static int openQuoted(CT, char **);
104 static int Init7Bit(CT);
106 struct str2init str2cts[] = {
107 { "application", CT_APPLICATION, InitApplication },
108 { "audio", CT_AUDIO, InitGeneric },
109 { "image", CT_IMAGE, InitGeneric },
110 { "message", CT_MESSAGE, InitMessage },
111 { "multipart", CT_MULTIPART, InitMultiPart },
112 { "text", CT_TEXT, InitText },
113 { "video", CT_VIDEO, InitGeneric },
114 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
115 { NULL, CT_UNKNOWN, NULL },
118 struct str2init str2ces[] = {
119 { "base64", CE_BASE64, InitBase64 },
120 { "quoted-printable", CE_QUOTED, InitQuoted },
121 { "8bit", CE_8BIT, Init7Bit },
122 { "7bit", CE_7BIT, Init7Bit },
123 { "binary", CE_BINARY, Init7Bit },
124 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
125 { NULL, CE_UNKNOWN, NULL },
132 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
143 ** Main entry point for parsing a MIME message or file.
144 ** It returns the Content structure for the top level
145 ** entity in the file.
148 parse_mime(char *file)
156 ** Check if file is actually standard input
158 if ((is_stdin = (strcmp(file, "-")==0))) {
159 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
161 advise("mhparse", "unable to create temporary file");
164 file = getcpy(tfile);
167 while (fgets(buffer, sizeof(buffer), stdin))
173 advise("stdin", "error reading");
178 advise(file, "error writing");
181 fseek(fp, 0L, SEEK_SET);
182 } else if ((fp = fopen(file, "r")) == NULL) {
183 advise(file, "unable to read");
187 if (!(ct = get_content(fp, file, 1))) {
190 advise(NULL, "unable to decode %s", file);
195 ct->c_unlink = 1; /* temp file to remove */
199 if (ct->c_end == 0L) {
200 fseek(fp, 0L, SEEK_END);
201 ct->c_end = ftell(fp);
204 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
216 ** Main routine for reading/parsing the headers
217 ** of a message content.
219 ** toplevel = 1 # we are at the top level of the message
220 ** toplevel = 0 # we are inside message type or multipart type
221 ** # other than multipart/digest
222 ** toplevel = -1 # we are inside multipart/digest
223 ** NB: on failure we will fclose(in)!
227 get_content(FILE *in, char *file, int toplevel)
230 char buf[BUFSIZ], name[NAMESZ];
235 /* allocate the content structure */
236 if (!(ct = (CT) calloc(1, sizeof(*ct))))
237 adios(NULL, "out of memory");
240 ct->c_file = getcpy(file);
241 ct->c_begin = ftell(ct->c_fp) + 1;
244 ** Parse the header fields for this
245 ** content into a linked list.
247 for (compnum = 1, state = FLD;;) {
248 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
254 /* get copies of the buffers */
258 /* if necessary, get rest of field */
259 while (state == FLDPLUS) {
260 state = m_getfld(state, name, buf,
262 vp = add(buf, vp); /* add to previous value */
265 /* Now add the header data to the list */
266 add_header(ct, np, vp);
268 /* continue, if this isn't the last header field */
269 if (state != FLDEOF) {
270 ct->c_begin = ftell(in) + 1;
277 ct->c_begin = ftell(in) - strlen(buf);
281 ct->c_begin = ftell(in);
286 adios(NULL, "message format error in component #%d",
290 adios(NULL, "getfld() returned %d", state);
293 /* break out of the loop */
298 ** Read the content headers. We will parse the
299 ** MIME related header fields into their various
300 ** structures and set internal flags related to
301 ** content type/subtype, etc.
304 hp = ct->c_first_hf; /* start at first header field */
306 /* Get MIME-Version field */
307 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
310 unsigned char *cp, *dp;
313 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
316 ct->c_vrsn = getcpy(hp->value);
318 /* Now, cleanup this field */
323 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
325 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
330 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
332 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
335 for (dp = cp; istoken(*dp); dp++)
339 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
342 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
345 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
346 /* Get Content-Type field */
347 struct str2init *s2i;
348 CI ci = &ct->c_ctinfo;
350 /* Check if we've already seen a Content-Type header */
352 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
356 /* Parse the Content-Type field */
357 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
361 ** Set the Init function and the internal
362 ** flag for this content type.
364 for (s2i = str2cts; s2i->si_key; s2i++)
365 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
367 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
369 ct->c_type = s2i->si_val;
370 ct->c_ctinitfnx = s2i->si_init;
372 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
373 /* Get Content-Transfer-Encoding field */
375 unsigned char *cp, *dp;
376 struct str2init *s2i;
379 ** Check if we've already seen the
380 ** Content-Transfer-Encoding field
383 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
387 /* get copy of this field */
388 ct->c_celine = cp = getcpy(hp->value);
392 for (dp = cp; istoken(*dp); dp++)
398 ** Find the internal flag and Init function
399 ** for this transfer encoding.
401 for (s2i = str2ces; s2i->si_key; s2i++)
402 if (!mh_strcasecmp(cp, s2i->si_key))
404 if (!s2i->si_key && !uprf(cp, "X-"))
407 ct->c_encoding = s2i->si_val;
409 /* Call the Init function for this encoding */
410 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
413 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
414 /* Get Content-ID field */
415 ct->c_id = add(hp->value, ct->c_id);
417 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
418 /* Get Content-Description field */
419 ct->c_descr = add(hp->value, ct->c_descr);
421 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
422 /* Get Content-Disposition field */
423 ct->c_dispo = add(hp->value, ct->c_dispo);
427 hp = hp->next; /* next header field */
431 ** Check if we saw a Content-Type field.
432 ** If not, then assign a default value for
433 ** it, and the Init function.
437 ** If we are inside a multipart/digest message,
438 ** so default type is message/rfc822
441 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
443 ct->c_type = CT_MESSAGE;
444 ct->c_ctinitfnx = InitMessage;
447 ** Else default type is text/plain
449 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
451 ct->c_type = CT_TEXT;
452 ct->c_ctinitfnx = InitText;
456 /* Use default Transfer-Encoding, if necessary */
458 ct->c_encoding = CE_7BIT;
471 ** small routine to add header field to list
475 add_header(CT ct, char *name, char *value)
479 /* allocate header field structure */
480 hp = mh_xmalloc(sizeof(*hp));
482 /* link data into header structure */
487 /* link header structure into the list */
488 if (ct->c_first_hf == NULL) {
489 ct->c_first_hf = hp; /* this is the first */
492 ct->c_last_hf->next = hp; /* add it to the end */
501 ** Make sure that buf contains at least one appearance of name,
502 ** followed by =. If not, insert both name and value, just after
503 ** first semicolon, if any. Note that name should not contain a
504 ** trailing =. And quotes will be added around the value. Typical
505 ** usage: make sure that a Content-Disposition header contains
506 ** filename="foo". If it doesn't and value does, use value from
510 incl_name_value(unsigned char *buf, char *name, char *value) {
513 /* Assume that name is non-null. */
515 char *name_plus_equal = concat(name, "=", NULL);
517 if (!strstr(buf, name_plus_equal)) {
520 char *prefix, *suffix;
522 /* Trim trailing space, esp. newline. */
523 for (cp = &buf[strlen(buf) - 1];
524 cp >= buf && isspace(*cp); --cp) {
528 insertion = concat("; ", name, "=", "\"", value, "\"",
532 ** Insert at first semicolon, if any.
533 ** If none, append to end.
535 prefix = getcpy(buf);
536 if ((cp = strchr(prefix, ';'))) {
537 suffix = concat(cp, NULL);
539 newbuf = concat(prefix, insertion, suffix,
544 newbuf = concat(buf, insertion, "\n", NULL);
552 free(name_plus_equal);
559 ** Extract just name_suffix="foo", if any, from value. If there isn't
560 ** one, return the entire value. Note that, for example, a name_suffix
561 ** of name will match filename="foo", and return foo.
564 extract_name_value(char *name_suffix, char *value) {
565 char *extracted_name_value = value;
566 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
567 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
570 free(name_suffix_plus_quote);
571 if (name_suffix_equals) {
572 char *name_suffix_begin;
575 for (cp = name_suffix_equals; *cp != '"'; ++cp)
577 name_suffix_begin = ++cp;
578 /* Find second \". */
579 for (; *cp != '"'; ++cp)
582 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
583 memcpy(extracted_name_value, name_suffix_begin,
584 cp - name_suffix_begin);
585 extracted_name_value[cp - name_suffix_begin] = '\0';
588 return extracted_name_value;
592 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
593 ** directives. Fills in the information of the CTinfo structure.
596 get_ctinfo(unsigned char *cp, CT ct, int magic)
605 i = strlen(invo_name) + 2;
607 /* store copy of Content-Type line */
608 cp = ct->c_ctline = getcpy(cp);
610 while (isspace(*cp)) /* trim leading spaces */
613 /* change newlines to spaces */
614 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
617 /* trim trailing spaces */
618 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
624 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
626 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
629 for (dp = cp; istoken(*dp); dp++)
632 ci->ci_type = getcpy(cp); /* store content type */
636 advise(NULL, "invalid %s: field in message %s (empty type)",
637 TYPE_FIELD, ct->c_file);
641 /* down case the content type string */
642 for (dp = ci->ci_type; *dp; dp++)
643 if (isalpha(*dp) && isupper(*dp))
649 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
654 ci->ci_subtype = getcpy("");
662 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
665 for (dp = cp; istoken(*dp); dp++)
668 ci->ci_subtype = getcpy(cp); /* store the content subtype */
671 if (!*ci->ci_subtype) {
672 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
676 /* down case the content subtype string */
677 for (dp = ci->ci_subtype; *dp; dp++)
678 if (isalpha(*dp) && isupper(*dp))
685 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
689 ** Parse attribute/value pairs given with Content-Type
691 ep = (ap = ci->ci_attrs) + NPARMS;
697 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
705 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
709 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
713 /* down case the attribute name */
714 for (dp = cp; istoken(*dp); dp++)
715 if (isalpha(*dp) && isupper(*dp))
718 for (up = dp; isspace(*dp);)
720 if (dp == cp || *dp != '=') {
721 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
725 vp = (*ap = getcpy(cp)) + (up - cp);
727 for (dp++; isspace(*dp);)
730 /* now add the attribute value */
731 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
734 for (cp = ++dp, dp = vp;;) {
738 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
743 if ((c = *cp++) == '\0')
758 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
763 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
771 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
776 ** Get any <Content-Id> given in buffer
778 if (magic && *cp == '<') {
783 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
784 advise(NULL, "invalid ID in message %s", ct->c_file);
790 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
801 ** Get any [Content-Description] given in buffer.
803 if (magic && *cp == '[') {
805 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
809 advise(NULL, "invalid description in message %s",
818 ct->c_descr = concat(ct->c_descr, "\n", NULL);
829 ** Get any {Content-Disposition} given in buffer.
831 if (magic && *cp == '{') {
833 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
837 advise(NULL, "invalid disposition in message %s",
846 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
857 ** Check if anything is left over
861 ci->ci_magic = getcpy(cp);
864 ** If there is a Content-Disposition header and
865 ** it doesn't have a *filename=, extract it from
866 ** the magic contents. The mhbasename call skips
867 ** any leading directory components.
870 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
872 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
880 get_comment(CT ct, unsigned char **ap, int istype)
885 char c, buffer[BUFSIZ], *dp;
897 advise(NULL, "invalid comment in message %s's %s: field",
898 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
903 if ((c = *cp++) == '\0')
926 if ((dp = ci->ci_comment)) {
927 ci->ci_comment = concat(dp, " ", buffer, NULL);
930 ci->ci_comment = getcpy(buffer);
945 ** Handles content types audio, image, and video.
946 ** There's not much to do right here.
952 return OK; /* not much to do here */
966 CI ci = &ct->c_ctinfo;
968 /* check for missing subtype */
969 if (!*ci->ci_subtype)
970 ci->ci_subtype = add("plain", ci->ci_subtype);
973 for (kv = SubText; kv->kv_key; kv++)
974 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
976 ct->c_subtype = kv->kv_value;
978 /* allocate text character set structure */
979 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
980 adios(NULL, "out of memory");
981 ct->c_ctparams = (void *) t;
983 /* scan for charset parameter */
984 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
985 if (!mh_strcasecmp(*ap, "charset"))
988 /* check if content specified a character set */
991 ct->c_charset = getcpy(norm_charmap(*ep));
992 /* match character set or set to CHARSET_UNKNOWN */
993 for (kv = Charset; kv->kv_key; kv++) {
994 if (!mh_strcasecmp(*ep, kv->kv_key)) {
998 t->tx_charset = kv->kv_value;
1000 t->tx_charset = CHARSET_UNSPECIFIED;
1012 InitMultiPart(CT ct)
1016 unsigned char *cp, *dp;
1018 char *bp, buffer[BUFSIZ];
1019 struct multipart *m;
1021 struct part *part, **next;
1022 CI ci = &ct->c_ctinfo;
1027 ** The encoding for multipart messages must be either
1028 ** 7bit, 8bit, or binary (per RFC2045).
1030 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1031 && ct->c_encoding != CE_BINARY) {
1032 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1037 for (kv = SubMultiPart; kv->kv_key; kv++)
1038 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1040 ct->c_subtype = kv->kv_value;
1043 ** Check for "boundary" parameter, which is
1044 ** required for multipart messages.
1047 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1048 if (!mh_strcasecmp(*ap, "boundary")) {
1054 /* complain if boundary parameter is missing */
1056 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1060 /* allocate primary structure for multipart info */
1061 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1062 adios(NULL, "out of memory");
1063 ct->c_ctparams = (void *) m;
1065 /* check if boundary parameter contains only whitespace characters */
1066 for (cp = bp; isspace(*cp); cp++)
1069 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1073 /* remove trailing whitespace from boundary parameter */
1074 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1079 /* record boundary separators */
1080 m->mp_start = concat(bp, "\n", NULL);
1081 m->mp_stop = concat(bp, "--\n", NULL);
1083 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1084 advise(ct->c_file, "unable to open for reading");
1088 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1090 next = &m->mp_parts;
1094 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1098 pos += strlen(buffer);
1099 if (buffer[0] != '-' || buffer[1] != '-')
1102 if (strcmp(buffer + 2, m->mp_start)!=0)
1105 if ((part = (struct part *) calloc(1, sizeof(*part)))
1107 adios(NULL, "out of memory");
1109 next = &part->mp_next;
1111 if (!(p = get_content(fp, ct->c_file,
1112 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1119 fseek(fp, pos, SEEK_SET);
1122 if (strcmp(buffer + 2, m->mp_start) == 0) {
1126 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1127 if (p->c_end < p->c_begin)
1128 p->c_begin = p->c_end;
1133 if (strcmp(buffer + 2, m->mp_stop) == 0)
1139 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1140 if (!inout && part) {
1142 p->c_end = ct->c_end;
1144 if (p->c_begin >= p->c_end) {
1145 for (next = &m->mp_parts; *next != part;
1146 next = &((*next)->mp_next))
1150 free((char *) part);
1155 /* reverse the order of the parts for multipart/alternative */
1156 if (ct->c_subtype == MULTI_ALTERNATE)
1160 ** label all subparts with part number, and
1161 ** then initialize the content of the subpart.
1166 char partnam[BUFSIZ];
1169 snprintf(partnam, sizeof(partnam), "%s.",
1171 pp = partnam + strlen(partnam);
1176 for (part = m->mp_parts, partnum = 1; part;
1177 part = part->mp_next, partnum++) {
1180 sprintf(pp, "%d", partnum);
1181 p->c_partno = getcpy(partnam);
1183 /* initialize the content of the subparts */
1184 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1199 ** reverse the order of the parts of a multipart
1203 reverse_parts(CT ct)
1206 struct multipart *m;
1207 struct part **base, **bmp, **next, *part;
1209 m = (struct multipart *) ct->c_ctparams;
1211 /* if only one part, just return */
1212 if (!m->mp_parts || !m->mp_parts->mp_next)
1215 /* count number of parts */
1217 for (part = m->mp_parts; part; part = part->mp_next)
1220 /* allocate array of pointers to the parts */
1221 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1222 adios(NULL, "out of memory");
1225 /* point at all the parts */
1226 for (part = m->mp_parts; part; part = part->mp_next)
1230 /* reverse the order of the parts */
1231 next = &m->mp_parts;
1232 for (bmp--; bmp >= base; bmp--) {
1235 next = &part->mp_next;
1239 /* free array of pointers */
1240 free((char *) base);
1252 CI ci = &ct->c_ctinfo;
1254 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1255 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1259 /* check for missing subtype */
1260 if (!*ci->ci_subtype)
1261 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1264 for (kv = SubMessage; kv->kv_key; kv++)
1265 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1267 ct->c_subtype = kv->kv_value;
1269 switch (ct->c_subtype) {
1270 case MESSAGE_RFC822:
1273 case MESSAGE_PARTIAL:
1278 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1279 adios(NULL, "out of memory");
1280 ct->c_ctparams = (void *) p;
1283 ** scan for parameters "id", "number",
1286 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1287 if (!mh_strcasecmp(*ap, "id")) {
1288 p->pm_partid = getcpy(*ep);
1291 if (!mh_strcasecmp(*ap, "number")) {
1292 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1294 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1299 if (!mh_strcasecmp(*ap, "total")) {
1300 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1307 if (!p->pm_partid || !p->pm_partno
1308 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1309 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1315 case MESSAGE_EXTERNAL:
1320 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1321 advise(ct->c_file, "unable to open for reading");
1325 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1327 if (!(p = get_content(fp, ct->c_file, 0))) {
1333 p->c_end = p->c_begin;
1338 switch (p->c_type) {
1343 if (p->c_subtype != MESSAGE_RFC822)
1348 (*p->c_ctinitfnx) (p);
1367 InitApplication(CT ct)
1370 CI ci = &ct->c_ctinfo;
1373 for (kv = SubApplication; kv->kv_key; kv++)
1374 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1376 ct->c_subtype = kv->kv_value;
1383 ** TRANSFER ENCODINGS
1387 init_encoding(CT ct, OpenCEFunc openfnx)
1391 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1392 adios(NULL, "out of memory");
1395 ct->c_ceopenfnx = openfnx;
1396 ct->c_ceclosefnx = close_encoding;
1397 ct->c_cesizefnx = size_encoding;
1404 close_encoding(CT ct)
1408 if (!(ce = ct->c_cefile))
1418 static unsigned long
1419 size_encoding(CT ct)
1427 if (!(ce = ct->c_cefile))
1428 return (ct->c_end - ct->c_begin);
1430 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1431 return (long) st.st_size;
1434 if (stat(ce->ce_file, &st) != NOTOK)
1435 return (long) st.st_size;
1440 if (ct->c_encoding == CE_EXTERNAL)
1441 return (ct->c_end - ct->c_begin);
1444 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1445 return (ct->c_end - ct->c_begin);
1447 if (fstat(fd, &st) != NOTOK)
1448 size = (long) st.st_size;
1452 (*ct->c_ceclosefnx) (ct);
1461 static unsigned char b642nib[0x80] = {
1462 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1468 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1469 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1471 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1472 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1473 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1474 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1475 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1476 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1477 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1484 return init_encoding(ct, openBase64);
1489 openBase64(CT ct, char **file)
1492 int fd, len, skip, own_ct_fp = 0;
1494 unsigned char value, *b, *b1, *b2, *b3;
1495 unsigned char *cp, *ep;
1496 char buffer[BUFSIZ];
1497 /* sbeck -- handle suffixes */
1501 b = (unsigned char *) &bits;
1502 b1 = &b[endian > 0 ? 1 : 2];
1503 b2 = &b[endian > 0 ? 2 : 1];
1504 b3 = &b[endian > 0 ? 3 : 0];
1508 fseek(ce->ce_fp, 0L, SEEK_SET);
1513 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1514 content_error(ce->ce_file, ct,
1515 "unable to fopen for reading");
1521 if (*file == NULL) {
1522 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1525 ce->ce_file = getcpy(*file);
1529 /* sbeck@cise.ufl.edu -- handle suffixes */
1531 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1532 invo_name, ci->ci_type, ci->ci_subtype);
1533 cp = context_find(buffer);
1534 if (cp == NULL || *cp == '\0') {
1535 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1537 cp = context_find(buffer);
1539 if (cp != NULL && *cp != '\0') {
1540 if (ce->ce_unlink) {
1542 ** Temporary file already exists, so we rename to
1543 ** version with extension.
1545 char *file_org = strdup(ce->ce_file);
1546 ce->ce_file = add(cp, ce->ce_file);
1547 if (rename(file_org, ce->ce_file)) {
1548 adios(ce->ce_file, "unable to rename %s to ",
1554 ce->ce_file = add(cp, ce->ce_file);
1558 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1559 content_error(ce->ce_file, ct,
1560 "unable to fopen for reading/writing");
1564 if ((len = ct->c_end - ct->c_begin) < 0)
1565 adios(NULL, "internal error(1)");
1568 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1569 content_error(ct->c_file, ct,
1570 "unable to open for reading");
1580 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1582 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1584 content_error(ct->c_file, ct, "error reading from");
1588 content_error(NULL, ct, "premature eof");
1596 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1601 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1603 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1605 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1609 bits |= value << bitno;
1611 if ((bitno -= 6) < 0) {
1612 putc((char) *b1, ce->ce_fp);
1614 putc((char) *b2, ce->ce_fp);
1616 putc((char) *b3, ce->ce_fp);
1620 if (ferror(ce->ce_fp)) {
1621 content_error(ce->ce_file, ct,
1622 "error writing to");
1625 bitno = 18, bits = 0L, skip = 0;
1631 goto self_delimiting;
1640 fprintf(stderr, "premature ending (bitno %d)\n",
1643 content_error(NULL, ct, "invalid BASE64 encoding");
1648 fseek(ct->c_fp, 0L, SEEK_SET);
1650 if (fflush(ce->ce_fp)) {
1651 content_error(ce->ce_file, ct, "error writing to");
1655 fseek(ce->ce_fp, 0L, SEEK_SET);
1658 *file = ce->ce_file;
1663 return fileno(ce->ce_fp);
1666 free_encoding(ct, 0);
1679 static char hex2nib[0x80] = {
1680 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1687 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1702 return init_encoding(ct, openQuoted);
1707 openQuoted(CT ct, char **file)
1709 int cc, len, quoted, own_ct_fp = 0;
1710 unsigned char *cp, *ep;
1711 char buffer[BUFSIZ];
1712 unsigned char mask = 0;
1714 /* sbeck -- handle suffixes */
1719 fseek(ce->ce_fp, 0L, SEEK_SET);
1724 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1725 content_error(ce->ce_file, ct,
1726 "unable to fopen for reading");
1732 if (*file == NULL) {
1733 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1736 ce->ce_file = getcpy(*file);
1740 /* sbeck@cise.ufl.edu -- handle suffixes */
1742 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1743 invo_name, ci->ci_type, ci->ci_subtype);
1744 cp = context_find(buffer);
1745 if (cp == NULL || *cp == '\0') {
1746 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1748 cp = context_find(buffer);
1750 if (cp != NULL && *cp != '\0') {
1751 if (ce->ce_unlink) {
1753 ** Temporary file already exists, so we rename to
1754 ** version with extension.
1756 char *file_org = strdup(ce->ce_file);
1757 ce->ce_file = add(cp, ce->ce_file);
1758 if (rename(file_org, ce->ce_file)) {
1759 adios(ce->ce_file, "unable to rename %s to ",
1765 ce->ce_file = add(cp, ce->ce_file);
1769 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1770 content_error(ce->ce_file, ct,
1771 "unable to fopen for reading/writing");
1775 if ((len = ct->c_end - ct->c_begin) < 0)
1776 adios(NULL, "internal error(2)");
1779 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1780 content_error(ct->c_file, ct,
1781 "unable to open for reading");
1789 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1791 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1792 content_error(NULL, ct, "premature eof");
1796 if ((cc = strlen(buffer)) > len)
1800 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1805 for (; cp < ep; cp++) {
1807 /* in an escape sequence */
1809 /* at byte 1 of an escape sequence */
1810 mask = hex2nib[*cp & 0x7f];
1811 /* next is byte 2 */
1814 /* at byte 2 of an escape sequence */
1816 mask |= hex2nib[*cp & 0x7f];
1817 putc(mask, ce->ce_fp);
1818 if (ferror(ce->ce_fp)) {
1819 content_error(ce->ce_file, ct, "error writing to");
1823 ** finished escape sequence; next may
1824 ** be literal or a new escape sequence
1828 /* on to next byte */
1832 /* not in an escape sequence */
1835 ** starting an escape sequence,
1838 if (cp + 1 < ep && cp[1] == '\n') {
1839 /* "=\n" soft line break, eat the \n */
1843 if (cp + 1 >= ep || cp + 2 >= ep) {
1845 ** We don't have 2 bytes left,
1846 ** so this is an invalid escape
1847 ** sequence; just show the raw bytes
1850 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1852 ** Next 2 bytes are hex digits,
1853 ** making this a valid escape
1854 ** sequence; let's decode it (above).
1860 ** One or both of the next 2 is
1861 ** out of range, making this an
1862 ** invalid escape sequence; just
1863 ** show the raw bytes (below).
1868 /* Just show the raw byte. */
1869 putc(*cp, ce->ce_fp);
1870 if (ferror(ce->ce_fp)) {
1871 content_error(ce->ce_file, ct,
1872 "error writing to");
1878 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1882 fseek(ct->c_fp, 0L, SEEK_SET);
1884 if (fflush(ce->ce_fp)) {
1885 content_error(ce->ce_file, ct, "error writing to");
1889 fseek(ce->ce_fp, 0L, SEEK_SET);
1892 *file = ce->ce_file;
1897 return fileno(ce->ce_fp);
1900 free_encoding(ct, 0);
1916 if (init_encoding(ct, open7Bit) == NOTOK)
1919 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1925 open7Bit(CT ct, char **file)
1927 int cc, fd, len, own_ct_fp = 0;
1928 char buffer[BUFSIZ];
1929 /* sbeck -- handle suffixes */
1936 fseek(ce->ce_fp, 0L, SEEK_SET);
1941 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1942 content_error(ce->ce_file, ct,
1943 "unable to fopen for reading");
1949 if (*file == NULL) {
1950 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1953 ce->ce_file = getcpy(*file);
1957 /* sbeck@cise.ufl.edu -- handle suffixes */
1959 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1960 invo_name, ci->ci_type, ci->ci_subtype);
1961 cp = context_find(buffer);
1962 if (cp == NULL || *cp == '\0') {
1963 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1965 cp = context_find(buffer);
1967 if (cp != NULL && *cp != '\0') {
1968 if (ce->ce_unlink) {
1970 ** Temporary file already exists, so we rename to
1971 ** version with extension.
1973 char *file_org = strdup(ce->ce_file);
1974 ce->ce_file = add(cp, ce->ce_file);
1975 if (rename(file_org, ce->ce_file)) {
1976 adios(ce->ce_file, "unable to rename %s to ",
1982 ce->ce_file = add(cp, ce->ce_file);
1986 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1987 content_error(ce->ce_file, ct,
1988 "unable to fopen for reading/writing");
1992 if (ct->c_type == CT_MULTIPART) {
1994 CI ci = &ct->c_ctinfo;
1997 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1999 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2000 strlen(ci->ci_subtype);
2001 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2002 putc(';', ce->ce_fp);
2005 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2008 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2009 fputs("\n\t", ce->ce_fp);
2012 putc(' ', ce->ce_fp);
2015 fprintf(ce->ce_fp, "%s", buffer);
2019 if (ci->ci_comment) {
2020 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2022 fputs("\n\t", ce->ce_fp);
2025 putc(' ', ce->ce_fp);
2028 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2031 fprintf(ce->ce_fp, "\n");
2033 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2035 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2037 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2038 fprintf(ce->ce_fp, "\n");
2041 if ((len = ct->c_end - ct->c_begin) < 0)
2042 adios(NULL, "internal error(3)");
2045 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2046 content_error(ct->c_file, ct,
2047 "unable to open for reading");
2053 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2055 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2057 content_error(ct->c_file, ct, "error reading from");
2061 content_error(NULL, ct, "premature eof");
2069 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2070 if (ferror(ce->ce_fp)) {
2071 content_error(ce->ce_file, ct,
2072 "error writing to");
2077 fseek(ct->c_fp, 0L, SEEK_SET);
2079 if (fflush(ce->ce_fp)) {
2080 content_error(ce->ce_file, ct, "error writing to");
2084 fseek(ce->ce_fp, 0L, SEEK_SET);
2087 *file = ce->ce_file;
2092 return fileno(ce->ce_fp);
2095 free_encoding(ct, 0);