2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
23 extern int endian; /* mhmisc.c */
25 extern pid_t xpid; /* mhshowsbr.c */
28 ** Directory to place temp files. This must
29 ** be set before these routines are called.
34 ** Structures for TEXT messages
36 struct k2v SubText[] = {
37 { "plain", TEXT_PLAIN },
38 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
39 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
40 { NULL, TEXT_UNKNOWN } /* this one must be last! */
43 struct k2v Charset[] = {
44 { "us-ascii", CHARSET_USASCII },
45 { "iso-8859-1", CHARSET_LATIN },
46 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
50 ** Structures for MULTIPART messages
52 struct k2v SubMultiPart[] = {
53 { "mixed", MULTI_MIXED },
54 { "alternative", MULTI_ALTERNATE },
55 { "digest", MULTI_DIGEST },
56 { "parallel", MULTI_PARALLEL },
57 { NULL, MULTI_UNKNOWN } /* this one must be last! */
61 ** Structures for MESSAGE messages
63 struct k2v SubMessage[] = {
64 { "rfc822", MESSAGE_RFC822 },
65 { "partial", MESSAGE_PARTIAL },
66 { "external-body", MESSAGE_EXTERNAL },
67 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
71 ** Structure for APPLICATION messages
73 struct k2v SubApplication[] = {
74 { "octet-stream", APPLICATION_OCTETS },
75 { "postscript", APPLICATION_POSTSCRIPT },
76 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
83 int make_intermediates(char *);
84 void content_error(char *, CT, char *, ...);
87 void free_content(CT);
88 void free_encoding(CT, int);
93 static CT get_content(FILE *, char *, int);
94 static int get_comment(CT, unsigned char **, int);
96 static int InitGeneric(CT);
97 static int InitText(CT);
98 static int InitMultiPart(CT);
99 static void reverse_parts(CT);
100 static int InitMessage(CT);
101 static int InitApplication(CT);
102 static int init_encoding(CT, OpenCEFunc);
103 static unsigned long size_encoding(CT);
104 static int InitBase64(CT);
105 static int openBase64(CT, char **);
106 static int InitQuoted(CT);
107 static int openQuoted(CT, char **);
108 static int Init7Bit(CT);
110 struct str2init str2cts[] = {
111 { "application", CT_APPLICATION, InitApplication },
112 { "audio", CT_AUDIO, InitGeneric },
113 { "image", CT_IMAGE, InitGeneric },
114 { "message", CT_MESSAGE, InitMessage },
115 { "multipart", CT_MULTIPART, InitMultiPart },
116 { "text", CT_TEXT, InitText },
117 { "video", CT_VIDEO, InitGeneric },
118 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
119 { NULL, CT_UNKNOWN, NULL },
122 struct str2init str2ces[] = {
123 { "base64", CE_BASE64, InitBase64 },
124 { "quoted-printable", CE_QUOTED, InitQuoted },
125 { "8bit", CE_8BIT, Init7Bit },
126 { "7bit", CE_7BIT, Init7Bit },
127 { "binary", CE_BINARY, Init7Bit },
128 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
129 { NULL, CE_UNKNOWN, NULL },
136 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
147 ** Main entry point for parsing a MIME message or file.
148 ** It returns the Content structure for the top level
149 ** entity in the file.
152 parse_mime(char *file)
160 ** Check if file is actually standard input
162 if ((is_stdin = (strcmp(file, "-")==0))) {
163 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
165 advise("mhparse", "unable to create temporary file");
168 file = getcpy(tfile);
171 while (fgets(buffer, sizeof(buffer), stdin))
177 advise("stdin", "error reading");
182 advise(file, "error writing");
185 fseek(fp, 0L, SEEK_SET);
186 } else if ((fp = fopen(file, "r")) == NULL) {
187 advise(file, "unable to read");
191 if (!(ct = get_content(fp, file, 1))) {
194 advise(NULL, "unable to decode %s", file);
199 ct->c_unlink = 1; /* temp file to remove */
203 if (ct->c_end == 0L) {
204 fseek(fp, 0L, SEEK_END);
205 ct->c_end = ftell(fp);
208 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
220 ** Main routine for reading/parsing the headers
221 ** of a message content.
223 ** toplevel = 1 # we are at the top level of the message
224 ** toplevel = 0 # we are inside message type or multipart type
225 ** # other than multipart/digest
226 ** toplevel = -1 # we are inside multipart/digest
227 ** NB: on failure we will fclose(in)!
231 get_content(FILE *in, char *file, int toplevel)
234 char buf[BUFSIZ], name[NAMESZ];
239 /* allocate the content structure */
240 if (!(ct = (CT) calloc(1, sizeof(*ct))))
241 adios(NULL, "out of memory");
244 ct->c_file = getcpy(file);
245 ct->c_begin = ftell(ct->c_fp) + 1;
248 ** Parse the header fields for this
249 ** content into a linked list.
251 for (compnum = 1, state = FLD;;) {
252 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
258 /* get copies of the buffers */
262 /* if necessary, get rest of field */
263 while (state == FLDPLUS) {
264 state = m_getfld(state, name, buf,
266 vp = add(buf, vp); /* add to previous value */
269 /* Now add the header data to the list */
270 add_header(ct, np, vp);
272 /* continue, if this isn't the last header field */
273 if (state != FLDEOF) {
274 ct->c_begin = ftell(in) + 1;
281 ct->c_begin = ftell(in) - strlen(buf);
285 ct->c_begin = ftell(in);
290 adios(NULL, "message format error in component #%d",
294 adios(NULL, "getfld() returned %d", state);
297 /* break out of the loop */
302 ** Read the content headers. We will parse the
303 ** MIME related header fields into their various
304 ** structures and set internal flags related to
305 ** content type/subtype, etc.
308 hp = ct->c_first_hf; /* start at first header field */
310 /* Get MIME-Version field */
311 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
314 unsigned char *cp, *dp;
317 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
320 ct->c_vrsn = getcpy(hp->value);
322 /* Now, cleanup this field */
327 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
329 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
334 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
336 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
339 for (dp = cp; istoken(*dp); dp++)
343 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
346 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
349 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
350 /* Get Content-Type field */
351 struct str2init *s2i;
352 CI ci = &ct->c_ctinfo;
354 /* Check if we've already seen a Content-Type header */
356 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
360 /* Parse the Content-Type field */
361 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
365 ** Set the Init function and the internal
366 ** flag for this content type.
368 for (s2i = str2cts; s2i->si_key; s2i++)
369 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
371 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
373 ct->c_type = s2i->si_val;
374 ct->c_ctinitfnx = s2i->si_init;
376 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
377 /* Get Content-Transfer-Encoding field */
379 unsigned char *cp, *dp;
380 struct str2init *s2i;
383 ** Check if we've already seen the
384 ** Content-Transfer-Encoding field
387 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
391 /* get copy of this field */
392 ct->c_celine = cp = getcpy(hp->value);
396 for (dp = cp; istoken(*dp); dp++)
402 ** Find the internal flag and Init function
403 ** for this transfer encoding.
405 for (s2i = str2ces; s2i->si_key; s2i++)
406 if (!mh_strcasecmp(cp, s2i->si_key))
408 if (!s2i->si_key && !uprf(cp, "X-"))
411 ct->c_encoding = s2i->si_val;
413 /* Call the Init function for this encoding */
414 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
417 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
418 /* Get Content-ID field */
419 ct->c_id = add(hp->value, ct->c_id);
421 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
422 /* Get Content-Description field */
423 ct->c_descr = add(hp->value, ct->c_descr);
425 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
426 /* Get Content-Disposition field */
427 ct->c_dispo = add(hp->value, ct->c_dispo);
431 hp = hp->next; /* next header field */
435 ** Check if we saw a Content-Type field.
436 ** If not, then assign a default value for
437 ** it, and the Init function.
441 ** If we are inside a multipart/digest message,
442 ** so default type is message/rfc822
445 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
447 ct->c_type = CT_MESSAGE;
448 ct->c_ctinitfnx = InitMessage;
451 ** Else default type is text/plain
453 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
455 ct->c_type = CT_TEXT;
456 ct->c_ctinitfnx = InitText;
460 /* Use default Transfer-Encoding, if necessary */
462 ct->c_encoding = CE_7BIT;
475 ** small routine to add header field to list
479 add_header(CT ct, char *name, char *value)
483 /* allocate header field structure */
484 hp = mh_xmalloc(sizeof(*hp));
486 /* link data into header structure */
491 /* link header structure into the list */
492 if (ct->c_first_hf == NULL) {
493 ct->c_first_hf = hp; /* this is the first */
496 ct->c_last_hf->next = hp; /* add it to the end */
505 ** Make sure that buf contains at least one appearance of name,
506 ** followed by =. If not, insert both name and value, just after
507 ** first semicolon, if any. Note that name should not contain a
508 ** trailing =. And quotes will be added around the value. Typical
509 ** usage: make sure that a Content-Disposition header contains
510 ** filename="foo". If it doesn't and value does, use value from
514 incl_name_value(unsigned char *buf, char *name, char *value) {
517 /* Assume that name is non-null. */
519 char *name_plus_equal = concat(name, "=", NULL);
521 if (!strstr(buf, name_plus_equal)) {
524 char *prefix, *suffix;
526 /* Trim trailing space, esp. newline. */
527 for (cp = &buf[strlen(buf) - 1];
528 cp >= buf && isspace(*cp); --cp) {
532 insertion = concat("; ", name, "=", "\"", value, "\"",
536 ** Insert at first semicolon, if any.
537 ** If none, append to end.
539 prefix = getcpy(buf);
540 if ((cp = strchr(prefix, ';'))) {
541 suffix = concat(cp, NULL);
543 newbuf = concat(prefix, insertion, suffix,
548 newbuf = concat(buf, insertion, "\n", NULL);
556 free(name_plus_equal);
563 ** Extract just name_suffix="foo", if any, from value. If there isn't
564 ** one, return the entire value. Note that, for example, a name_suffix
565 ** of name will match filename="foo", and return foo.
568 extract_name_value(char *name_suffix, char *value) {
569 char *extracted_name_value = value;
570 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
571 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
574 free(name_suffix_plus_quote);
575 if (name_suffix_equals) {
576 char *name_suffix_begin;
579 for (cp = name_suffix_equals; *cp != '"'; ++cp)
581 name_suffix_begin = ++cp;
582 /* Find second \". */
583 for (; *cp != '"'; ++cp)
586 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
587 memcpy(extracted_name_value, name_suffix_begin,
588 cp - name_suffix_begin);
589 extracted_name_value[cp - name_suffix_begin] = '\0';
592 return extracted_name_value;
596 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
597 ** directives. Fills in the information of the CTinfo structure.
600 get_ctinfo(unsigned char *cp, CT ct, int magic)
609 i = strlen(invo_name) + 2;
611 /* store copy of Content-Type line */
612 cp = ct->c_ctline = getcpy(cp);
614 while (isspace(*cp)) /* trim leading spaces */
617 /* change newlines to spaces */
618 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
621 /* trim trailing spaces */
622 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
628 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
630 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
633 for (dp = cp; istoken(*dp); dp++)
636 ci->ci_type = getcpy(cp); /* store content type */
640 advise(NULL, "invalid %s: field in message %s (empty type)",
641 TYPE_FIELD, ct->c_file);
645 /* down case the content type string */
646 for (dp = ci->ci_type; *dp; dp++)
647 if (isalpha(*dp) && isupper(*dp))
653 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
658 ci->ci_subtype = getcpy("");
666 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
669 for (dp = cp; istoken(*dp); dp++)
672 ci->ci_subtype = getcpy(cp); /* store the content subtype */
675 if (!*ci->ci_subtype) {
676 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
680 /* down case the content subtype string */
681 for (dp = ci->ci_subtype; *dp; dp++)
682 if (isalpha(*dp) && isupper(*dp))
689 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
693 ** Parse attribute/value pairs given with Content-Type
695 ep = (ap = ci->ci_attrs) + NPARMS;
701 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
709 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
713 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
717 /* down case the attribute name */
718 for (dp = cp; istoken(*dp); dp++)
719 if (isalpha(*dp) && isupper(*dp))
722 for (up = dp; isspace(*dp);)
724 if (dp == cp || *dp != '=') {
725 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
729 vp = (*ap = getcpy(cp)) + (up - cp);
731 for (dp++; isspace(*dp);)
734 /* now add the attribute value */
735 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
738 for (cp = ++dp, dp = vp;;) {
742 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
747 if ((c = *cp++) == '\0')
762 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
767 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
775 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
780 ** Get any <Content-Id> given in buffer
782 if (magic && *cp == '<') {
787 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
788 advise(NULL, "invalid ID in message %s", ct->c_file);
794 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
805 ** Get any [Content-Description] given in buffer.
807 if (magic && *cp == '[') {
809 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
813 advise(NULL, "invalid description in message %s",
822 ct->c_descr = concat(ct->c_descr, "\n", NULL);
833 ** Get any {Content-Disposition} given in buffer.
835 if (magic && *cp == '{') {
837 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
841 advise(NULL, "invalid disposition in message %s",
850 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
861 ** Check if anything is left over
865 ci->ci_magic = getcpy(cp);
868 ** If there is a Content-Disposition header and
869 ** it doesn't have a *filename=, extract it from
870 ** the magic contents. The mhbasename call skips
871 ** any leading directory components.
874 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
876 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
884 get_comment(CT ct, unsigned char **ap, int istype)
889 char c, buffer[BUFSIZ], *dp;
901 advise(NULL, "invalid comment in message %s's %s: field",
902 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
907 if ((c = *cp++) == '\0')
930 if ((dp = ci->ci_comment)) {
931 ci->ci_comment = concat(dp, " ", buffer, NULL);
934 ci->ci_comment = getcpy(buffer);
949 ** Handles content types audio, image, and video.
950 ** There's not much to do right here.
956 return OK; /* not much to do here */
970 CI ci = &ct->c_ctinfo;
972 /* check for missing subtype */
973 if (!*ci->ci_subtype)
974 ci->ci_subtype = add("plain", ci->ci_subtype);
977 for (kv = SubText; kv->kv_key; kv++)
978 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
980 ct->c_subtype = kv->kv_value;
982 /* allocate text character set structure */
983 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
984 adios(NULL, "out of memory");
985 ct->c_ctparams = (void *) t;
987 /* scan for charset parameter */
988 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
989 if (!mh_strcasecmp(*ap, "charset"))
992 /* check if content specified a character set */
995 ct->c_charset = getcpy(norm_charmap(*ep));
996 /* match character set or set to CHARSET_UNKNOWN */
997 for (kv = Charset; kv->kv_key; kv++) {
998 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1002 t->tx_charset = kv->kv_value;
1004 t->tx_charset = CHARSET_UNSPECIFIED;
1016 InitMultiPart(CT ct)
1020 unsigned char *cp, *dp;
1022 char *bp, buffer[BUFSIZ];
1023 struct multipart *m;
1025 struct part *part, **next;
1026 CI ci = &ct->c_ctinfo;
1031 ** The encoding for multipart messages must be either
1032 ** 7bit, 8bit, or binary (per RFC2045).
1034 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1035 && ct->c_encoding != CE_BINARY) {
1036 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1041 for (kv = SubMultiPart; kv->kv_key; kv++)
1042 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1044 ct->c_subtype = kv->kv_value;
1047 ** Check for "boundary" parameter, which is
1048 ** required for multipart messages.
1051 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1052 if (!mh_strcasecmp(*ap, "boundary")) {
1058 /* complain if boundary parameter is missing */
1060 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1064 /* allocate primary structure for multipart info */
1065 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1066 adios(NULL, "out of memory");
1067 ct->c_ctparams = (void *) m;
1069 /* check if boundary parameter contains only whitespace characters */
1070 for (cp = bp; isspace(*cp); cp++)
1073 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1077 /* remove trailing whitespace from boundary parameter */
1078 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1083 /* record boundary separators */
1084 m->mp_start = concat(bp, "\n", NULL);
1085 m->mp_stop = concat(bp, "--\n", NULL);
1087 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1088 advise(ct->c_file, "unable to open for reading");
1092 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1094 next = &m->mp_parts;
1098 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1102 pos += strlen(buffer);
1103 if (buffer[0] != '-' || buffer[1] != '-')
1106 if (strcmp(buffer + 2, m->mp_start)!=0)
1109 if ((part = (struct part *) calloc(1, sizeof(*part)))
1111 adios(NULL, "out of memory");
1113 next = &part->mp_next;
1115 if (!(p = get_content(fp, ct->c_file,
1116 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1123 fseek(fp, pos, SEEK_SET);
1126 if (strcmp(buffer + 2, m->mp_start) == 0) {
1130 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1131 if (p->c_end < p->c_begin)
1132 p->c_begin = p->c_end;
1137 if (strcmp(buffer + 2, m->mp_stop) == 0)
1143 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1144 if (!inout && part) {
1146 p->c_end = ct->c_end;
1148 if (p->c_begin >= p->c_end) {
1149 for (next = &m->mp_parts; *next != part;
1150 next = &((*next)->mp_next))
1154 free((char *) part);
1159 /* reverse the order of the parts for multipart/alternative */
1160 if (ct->c_subtype == MULTI_ALTERNATE)
1164 ** label all subparts with part number, and
1165 ** then initialize the content of the subpart.
1170 char partnam[BUFSIZ];
1173 snprintf(partnam, sizeof(partnam), "%s.",
1175 pp = partnam + strlen(partnam);
1180 for (part = m->mp_parts, partnum = 1; part;
1181 part = part->mp_next, partnum++) {
1184 sprintf(pp, "%d", partnum);
1185 p->c_partno = getcpy(partnam);
1187 /* initialize the content of the subparts */
1188 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1203 ** reverse the order of the parts of a multipart
1207 reverse_parts(CT ct)
1210 struct multipart *m;
1211 struct part **base, **bmp, **next, *part;
1213 m = (struct multipart *) ct->c_ctparams;
1215 /* if only one part, just return */
1216 if (!m->mp_parts || !m->mp_parts->mp_next)
1219 /* count number of parts */
1221 for (part = m->mp_parts; part; part = part->mp_next)
1224 /* allocate array of pointers to the parts */
1225 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1226 adios(NULL, "out of memory");
1229 /* point at all the parts */
1230 for (part = m->mp_parts; part; part = part->mp_next)
1234 /* reverse the order of the parts */
1235 next = &m->mp_parts;
1236 for (bmp--; bmp >= base; bmp--) {
1239 next = &part->mp_next;
1243 /* free array of pointers */
1244 free((char *) base);
1256 CI ci = &ct->c_ctinfo;
1258 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1259 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1263 /* check for missing subtype */
1264 if (!*ci->ci_subtype)
1265 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1268 for (kv = SubMessage; kv->kv_key; kv++)
1269 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1271 ct->c_subtype = kv->kv_value;
1273 switch (ct->c_subtype) {
1274 case MESSAGE_RFC822:
1277 case MESSAGE_PARTIAL:
1282 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1283 adios(NULL, "out of memory");
1284 ct->c_ctparams = (void *) p;
1287 ** scan for parameters "id", "number",
1290 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1291 if (!mh_strcasecmp(*ap, "id")) {
1292 p->pm_partid = getcpy(*ep);
1295 if (!mh_strcasecmp(*ap, "number")) {
1296 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1298 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1303 if (!mh_strcasecmp(*ap, "total")) {
1304 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1311 if (!p->pm_partid || !p->pm_partno
1312 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1313 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1319 case MESSAGE_EXTERNAL:
1324 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1325 advise(ct->c_file, "unable to open for reading");
1329 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1331 if (!(p = get_content(fp, ct->c_file, 0))) {
1337 p->c_end = p->c_begin;
1342 switch (p->c_type) {
1347 if (p->c_subtype != MESSAGE_RFC822)
1352 (*p->c_ctinitfnx) (p);
1371 InitApplication(CT ct)
1374 CI ci = &ct->c_ctinfo;
1377 for (kv = SubApplication; kv->kv_key; kv++)
1378 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1380 ct->c_subtype = kv->kv_value;
1387 ** TRANSFER ENCODINGS
1391 init_encoding(CT ct, OpenCEFunc openfnx)
1395 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1396 adios(NULL, "out of memory");
1399 ct->c_ceopenfnx = openfnx;
1400 ct->c_ceclosefnx = close_encoding;
1401 ct->c_cesizefnx = size_encoding;
1408 close_encoding(CT ct)
1412 if (!(ce = ct->c_cefile))
1422 static unsigned long
1423 size_encoding(CT ct)
1431 if (!(ce = ct->c_cefile))
1432 return (ct->c_end - ct->c_begin);
1434 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1435 return (long) st.st_size;
1438 if (stat(ce->ce_file, &st) != NOTOK)
1439 return (long) st.st_size;
1444 if (ct->c_encoding == CE_EXTERNAL)
1445 return (ct->c_end - ct->c_begin);
1448 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1449 return (ct->c_end - ct->c_begin);
1451 if (fstat(fd, &st) != NOTOK)
1452 size = (long) st.st_size;
1456 (*ct->c_ceclosefnx) (ct);
1465 static unsigned char b642nib[0x80] = {
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1472 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1473 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1475 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1476 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1477 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1478 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1479 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1480 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1481 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1488 return init_encoding(ct, openBase64);
1493 openBase64(CT ct, char **file)
1496 int fd, len, skip, own_ct_fp = 0;
1498 unsigned char value, *b, *b1, *b2, *b3;
1499 unsigned char *cp, *ep;
1500 char buffer[BUFSIZ];
1501 /* sbeck -- handle suffixes */
1505 b = (unsigned char *) &bits;
1506 b1 = &b[endian > 0 ? 1 : 2];
1507 b2 = &b[endian > 0 ? 2 : 1];
1508 b3 = &b[endian > 0 ? 3 : 0];
1512 fseek(ce->ce_fp, 0L, SEEK_SET);
1517 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1518 content_error(ce->ce_file, ct,
1519 "unable to fopen for reading");
1525 if (*file == NULL) {
1526 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1529 ce->ce_file = getcpy(*file);
1533 /* sbeck@cise.ufl.edu -- handle suffixes */
1535 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1536 invo_name, ci->ci_type, ci->ci_subtype);
1537 cp = context_find(buffer);
1538 if (cp == NULL || *cp == '\0') {
1539 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1541 cp = context_find(buffer);
1543 if (cp != NULL && *cp != '\0') {
1544 if (ce->ce_unlink) {
1546 ** Temporary file already exists, so we rename to
1547 ** version with extension.
1549 char *file_org = strdup(ce->ce_file);
1550 ce->ce_file = add(cp, ce->ce_file);
1551 if (rename(file_org, ce->ce_file)) {
1552 adios(ce->ce_file, "unable to rename %s to ",
1558 ce->ce_file = add(cp, ce->ce_file);
1562 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1563 content_error(ce->ce_file, ct,
1564 "unable to fopen for reading/writing");
1568 if ((len = ct->c_end - ct->c_begin) < 0)
1569 adios(NULL, "internal error(1)");
1572 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1573 content_error(ct->c_file, ct,
1574 "unable to open for reading");
1584 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1586 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1588 content_error(ct->c_file, ct, "error reading from");
1592 content_error(NULL, ct, "premature eof");
1600 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1605 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1607 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1609 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1613 bits |= value << bitno;
1615 if ((bitno -= 6) < 0) {
1616 putc((char) *b1, ce->ce_fp);
1618 putc((char) *b2, ce->ce_fp);
1620 putc((char) *b3, ce->ce_fp);
1624 if (ferror(ce->ce_fp)) {
1625 content_error(ce->ce_file, ct,
1626 "error writing to");
1629 bitno = 18, bits = 0L, skip = 0;
1635 goto self_delimiting;
1644 fprintf(stderr, "premature ending (bitno %d)\n",
1647 content_error(NULL, ct, "invalid BASE64 encoding");
1652 fseek(ct->c_fp, 0L, SEEK_SET);
1654 if (fflush(ce->ce_fp)) {
1655 content_error(ce->ce_file, ct, "error writing to");
1659 fseek(ce->ce_fp, 0L, SEEK_SET);
1662 *file = ce->ce_file;
1667 return fileno(ce->ce_fp);
1670 free_encoding(ct, 0);
1683 static char hex2nib[0x80] = {
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1691 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1706 return init_encoding(ct, openQuoted);
1711 openQuoted(CT ct, char **file)
1713 int cc, len, quoted, own_ct_fp = 0;
1714 unsigned char *cp, *ep;
1715 char buffer[BUFSIZ];
1716 unsigned char mask = 0;
1718 /* sbeck -- handle suffixes */
1723 fseek(ce->ce_fp, 0L, SEEK_SET);
1728 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1729 content_error(ce->ce_file, ct,
1730 "unable to fopen for reading");
1736 if (*file == NULL) {
1737 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1740 ce->ce_file = getcpy(*file);
1744 /* sbeck@cise.ufl.edu -- handle suffixes */
1746 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1747 invo_name, ci->ci_type, ci->ci_subtype);
1748 cp = context_find(buffer);
1749 if (cp == NULL || *cp == '\0') {
1750 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1752 cp = context_find(buffer);
1754 if (cp != NULL && *cp != '\0') {
1755 if (ce->ce_unlink) {
1757 ** Temporary file already exists, so we rename to
1758 ** version with extension.
1760 char *file_org = strdup(ce->ce_file);
1761 ce->ce_file = add(cp, ce->ce_file);
1762 if (rename(file_org, ce->ce_file)) {
1763 adios(ce->ce_file, "unable to rename %s to ",
1769 ce->ce_file = add(cp, ce->ce_file);
1773 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1774 content_error(ce->ce_file, ct,
1775 "unable to fopen for reading/writing");
1779 if ((len = ct->c_end - ct->c_begin) < 0)
1780 adios(NULL, "internal error(2)");
1783 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1784 content_error(ct->c_file, ct,
1785 "unable to open for reading");
1793 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1795 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1796 content_error(NULL, ct, "premature eof");
1800 if ((cc = strlen(buffer)) > len)
1804 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1809 for (; cp < ep; cp++) {
1811 /* in an escape sequence */
1813 /* at byte 1 of an escape sequence */
1814 mask = hex2nib[*cp & 0x7f];
1815 /* next is byte 2 */
1818 /* at byte 2 of an escape sequence */
1820 mask |= hex2nib[*cp & 0x7f];
1821 putc(mask, ce->ce_fp);
1822 if (ferror(ce->ce_fp)) {
1823 content_error(ce->ce_file, ct, "error writing to");
1827 ** finished escape sequence; next may
1828 ** be literal or a new escape sequence
1832 /* on to next byte */
1836 /* not in an escape sequence */
1839 ** starting an escape sequence,
1842 if (cp + 1 < ep && cp[1] == '\n') {
1843 /* "=\n" soft line break, eat the \n */
1847 if (cp + 1 >= ep || cp + 2 >= ep) {
1849 ** We don't have 2 bytes left,
1850 ** so this is an invalid escape
1851 ** sequence; just show the raw bytes
1854 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1856 ** Next 2 bytes are hex digits,
1857 ** making this a valid escape
1858 ** sequence; let's decode it (above).
1864 ** One or both of the next 2 is
1865 ** out of range, making this an
1866 ** invalid escape sequence; just
1867 ** show the raw bytes (below).
1872 /* Just show the raw byte. */
1873 putc(*cp, ce->ce_fp);
1874 if (ferror(ce->ce_fp)) {
1875 content_error(ce->ce_file, ct,
1876 "error writing to");
1882 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1886 fseek(ct->c_fp, 0L, SEEK_SET);
1888 if (fflush(ce->ce_fp)) {
1889 content_error(ce->ce_file, ct, "error writing to");
1893 fseek(ce->ce_fp, 0L, SEEK_SET);
1896 *file = ce->ce_file;
1901 return fileno(ce->ce_fp);
1904 free_encoding(ct, 0);
1920 if (init_encoding(ct, open7Bit) == NOTOK)
1923 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1929 open7Bit(CT ct, char **file)
1931 int cc, fd, len, own_ct_fp = 0;
1932 char buffer[BUFSIZ];
1933 /* sbeck -- handle suffixes */
1940 fseek(ce->ce_fp, 0L, SEEK_SET);
1945 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1946 content_error(ce->ce_file, ct,
1947 "unable to fopen for reading");
1953 if (*file == NULL) {
1954 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1957 ce->ce_file = getcpy(*file);
1961 /* sbeck@cise.ufl.edu -- handle suffixes */
1963 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1964 invo_name, ci->ci_type, ci->ci_subtype);
1965 cp = context_find(buffer);
1966 if (cp == NULL || *cp == '\0') {
1967 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1969 cp = context_find(buffer);
1971 if (cp != NULL && *cp != '\0') {
1972 if (ce->ce_unlink) {
1974 ** Temporary file already exists, so we rename to
1975 ** version with extension.
1977 char *file_org = strdup(ce->ce_file);
1978 ce->ce_file = add(cp, ce->ce_file);
1979 if (rename(file_org, ce->ce_file)) {
1980 adios(ce->ce_file, "unable to rename %s to ",
1986 ce->ce_file = add(cp, ce->ce_file);
1990 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1991 content_error(ce->ce_file, ct,
1992 "unable to fopen for reading/writing");
1996 if (ct->c_type == CT_MULTIPART) {
1998 CI ci = &ct->c_ctinfo;
2001 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2003 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2004 strlen(ci->ci_subtype);
2005 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2006 putc(';', ce->ce_fp);
2009 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2012 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2013 fputs("\n\t", ce->ce_fp);
2016 putc(' ', ce->ce_fp);
2019 fprintf(ce->ce_fp, "%s", buffer);
2023 if (ci->ci_comment) {
2024 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2026 fputs("\n\t", ce->ce_fp);
2029 putc(' ', ce->ce_fp);
2032 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2035 fprintf(ce->ce_fp, "\n");
2037 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2039 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2041 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2042 fprintf(ce->ce_fp, "\n");
2045 if ((len = ct->c_end - ct->c_begin) < 0)
2046 adios(NULL, "internal error(3)");
2049 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2050 content_error(ct->c_file, ct,
2051 "unable to open for reading");
2057 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2059 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2061 content_error(ct->c_file, ct, "error reading from");
2065 content_error(NULL, ct, "premature eof");
2073 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2074 if (ferror(ce->ce_fp)) {
2075 content_error(ce->ce_file, ct,
2076 "error writing to");
2081 fseek(ct->c_fp, 0L, SEEK_SET);
2083 if (fflush(ce->ce_fp)) {
2084 content_error(ce->ce_file, ct, "error writing to");
2088 fseek(ce->ce_fp, 0L, SEEK_SET);
2091 *file = ce->ce_file;
2096 return fileno(ce->ce_fp);
2099 free_encoding(ct, 0);