2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
11 #include <h/signals.h>
15 #include <h/mhparse.h>
20 extern int endian; /* mhmisc.c */
22 extern pid_t xpid; /* mhshowsbr.c */
25 ** Directory to place temp files. This must
26 ** be set before these routines are called.
31 ** Structures for TEXT messages
33 struct k2v SubText[] = {
34 { "plain", TEXT_PLAIN },
35 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
36 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
37 { NULL, TEXT_UNKNOWN } /* this one must be last! */
40 struct k2v Charset[] = {
41 { "us-ascii", CHARSET_USASCII },
42 { "iso-8859-1", CHARSET_LATIN },
43 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
47 ** Structures for MULTIPART messages
49 struct k2v SubMultiPart[] = {
50 { "mixed", MULTI_MIXED },
51 { "alternative", MULTI_ALTERNATE },
52 { "digest", MULTI_DIGEST },
53 { "parallel", MULTI_PARALLEL },
54 { NULL, MULTI_UNKNOWN } /* this one must be last! */
58 ** Structures for MESSAGE messages
60 struct k2v SubMessage[] = {
61 { "rfc822", MESSAGE_RFC822 },
62 { "partial", MESSAGE_PARTIAL },
63 { "external-body", MESSAGE_EXTERNAL },
64 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
68 ** Structure for APPLICATION messages
70 struct k2v SubApplication[] = {
71 { "octet-stream", APPLICATION_OCTETS },
72 { "postscript", APPLICATION_POSTSCRIPT },
73 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
80 int make_intermediates(char *);
81 void content_error(char *, CT, char *, ...);
84 void free_content(CT);
85 void free_encoding(CT, int);
90 static CT get_content(FILE *, char *, int);
91 static int get_comment(CT, unsigned char **, int);
93 static int InitGeneric(CT);
94 static int InitText(CT);
95 static int InitMultiPart(CT);
96 static void reverse_parts(CT);
97 static int InitMessage(CT);
98 static int InitApplication(CT);
99 static int init_encoding(CT, OpenCEFunc);
100 static unsigned long size_encoding(CT);
101 static int InitBase64(CT);
102 static int openBase64(CT, char **);
103 static int InitQuoted(CT);
104 static int openQuoted(CT, char **);
105 static int Init7Bit(CT);
107 struct str2init str2cts[] = {
108 { "application", CT_APPLICATION, InitApplication },
109 { "audio", CT_AUDIO, InitGeneric },
110 { "image", CT_IMAGE, InitGeneric },
111 { "message", CT_MESSAGE, InitMessage },
112 { "multipart", CT_MULTIPART, InitMultiPart },
113 { "text", CT_TEXT, InitText },
114 { "video", CT_VIDEO, InitGeneric },
115 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
116 { NULL, CT_UNKNOWN, NULL },
119 struct str2init str2ces[] = {
120 { "base64", CE_BASE64, InitBase64 },
121 { "quoted-printable", CE_QUOTED, InitQuoted },
122 { "8bit", CE_8BIT, Init7Bit },
123 { "7bit", CE_7BIT, Init7Bit },
124 { "binary", CE_BINARY, Init7Bit },
125 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
126 { NULL, CE_UNKNOWN, NULL },
133 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
144 ** Main entry point for parsing a MIME message or file.
145 ** It returns the Content structure for the top level
146 ** entity in the file.
149 parse_mime(char *file)
157 ** Check if file is actually standard input
159 if ((is_stdin = (strcmp(file, "-")==0))) {
160 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
162 advise("mhparse", "unable to create temporary file");
165 file = getcpy(tfile);
168 while (fgets(buffer, sizeof(buffer), stdin))
174 advise("stdin", "error reading");
179 advise(file, "error writing");
182 fseek(fp, 0L, SEEK_SET);
183 } else if ((fp = fopen(file, "r")) == NULL) {
184 advise(file, "unable to read");
188 if (!(ct = get_content(fp, file, 1))) {
191 advise(NULL, "unable to decode %s", file);
196 ct->c_unlink = 1; /* temp file to remove */
200 if (ct->c_end == 0L) {
201 fseek(fp, 0L, SEEK_END);
202 ct->c_end = ftell(fp);
205 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
217 ** Main routine for reading/parsing the headers
218 ** of a message content.
220 ** toplevel = 1 # we are at the top level of the message
221 ** toplevel = 0 # we are inside message type or multipart type
222 ** # other than multipart/digest
223 ** toplevel = -1 # we are inside multipart/digest
224 ** NB: on failure we will fclose(in)!
228 get_content(FILE *in, char *file, int toplevel)
231 char buf[BUFSIZ], name[NAMESZ];
236 /* allocate the content structure */
237 if (!(ct = (CT) calloc(1, sizeof(*ct))))
238 adios(NULL, "out of memory");
241 ct->c_file = getcpy(file);
242 ct->c_begin = ftell(ct->c_fp) + 1;
245 ** Parse the header fields for this
246 ** content into a linked list.
248 for (compnum = 1, state = FLD;;) {
249 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
255 /* get copies of the buffers */
259 /* if necessary, get rest of field */
260 while (state == FLDPLUS) {
261 state = m_getfld(state, name, buf,
263 vp = add(buf, vp); /* add to previous value */
266 /* Now add the header data to the list */
267 add_header(ct, np, vp);
269 /* continue, if this isn't the last header field */
270 if (state != FLDEOF) {
271 ct->c_begin = ftell(in) + 1;
278 ct->c_begin = ftell(in) - strlen(buf);
282 ct->c_begin = ftell(in);
287 adios(NULL, "message format error in component #%d",
291 adios(NULL, "getfld() returned %d", state);
294 /* break out of the loop */
299 ** Read the content headers. We will parse the
300 ** MIME related header fields into their various
301 ** structures and set internal flags related to
302 ** content type/subtype, etc.
305 hp = ct->c_first_hf; /* start at first header field */
307 /* Get MIME-Version field */
308 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
311 unsigned char *cp, *dp;
314 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
317 ct->c_vrsn = getcpy(hp->value);
319 /* Now, cleanup this field */
324 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
326 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
331 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
333 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
336 for (dp = cp; istoken(*dp); dp++)
340 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
343 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
346 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
347 /* Get Content-Type field */
348 struct str2init *s2i;
349 CI ci = &ct->c_ctinfo;
351 /* Check if we've already seen a Content-Type header */
353 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
357 /* Parse the Content-Type field */
358 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
362 ** Set the Init function and the internal
363 ** flag for this content type.
365 for (s2i = str2cts; s2i->si_key; s2i++)
366 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
368 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
370 ct->c_type = s2i->si_val;
371 ct->c_ctinitfnx = s2i->si_init;
373 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
374 /* Get Content-Transfer-Encoding field */
376 unsigned char *cp, *dp;
377 struct str2init *s2i;
380 ** Check if we've already seen the
381 ** Content-Transfer-Encoding field
384 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
388 /* get copy of this field */
389 ct->c_celine = cp = getcpy(hp->value);
393 for (dp = cp; istoken(*dp); dp++)
399 ** Find the internal flag and Init function
400 ** for this transfer encoding.
402 for (s2i = str2ces; s2i->si_key; s2i++)
403 if (!mh_strcasecmp(cp, s2i->si_key))
405 if (!s2i->si_key && !uprf(cp, "X-"))
408 ct->c_encoding = s2i->si_val;
410 /* Call the Init function for this encoding */
411 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
414 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
415 /* Get Content-ID field */
416 ct->c_id = add(hp->value, ct->c_id);
418 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
419 /* Get Content-Description field */
420 ct->c_descr = add(hp->value, ct->c_descr);
422 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
423 /* Get Content-Disposition field */
424 ct->c_dispo = add(hp->value, ct->c_dispo);
428 hp = hp->next; /* next header field */
432 ** Check if we saw a Content-Type field.
433 ** If not, then assign a default value for
434 ** it, and the Init function.
438 ** If we are inside a multipart/digest message,
439 ** so default type is message/rfc822
442 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
444 ct->c_type = CT_MESSAGE;
445 ct->c_ctinitfnx = InitMessage;
448 ** Else default type is text/plain
450 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
452 ct->c_type = CT_TEXT;
453 ct->c_ctinitfnx = InitText;
457 /* Use default Transfer-Encoding, if necessary */
459 ct->c_encoding = CE_7BIT;
472 ** small routine to add header field to list
476 add_header(CT ct, char *name, char *value)
480 /* allocate header field structure */
481 hp = mh_xmalloc(sizeof(*hp));
483 /* link data into header structure */
488 /* link header structure into the list */
489 if (ct->c_first_hf == NULL) {
490 ct->c_first_hf = hp; /* this is the first */
493 ct->c_last_hf->next = hp; /* add it to the end */
502 ** Make sure that buf contains at least one appearance of name,
503 ** followed by =. If not, insert both name and value, just after
504 ** first semicolon, if any. Note that name should not contain a
505 ** trailing =. And quotes will be added around the value. Typical
506 ** usage: make sure that a Content-Disposition header contains
507 ** filename="foo". If it doesn't and value does, use value from
511 incl_name_value(unsigned char *buf, char *name, char *value) {
514 /* Assume that name is non-null. */
516 char *name_plus_equal = concat(name, "=", NULL);
518 if (!strstr(buf, name_plus_equal)) {
521 char *prefix, *suffix;
523 /* Trim trailing space, esp. newline. */
524 for (cp = &buf[strlen(buf) - 1];
525 cp >= buf && isspace(*cp); --cp) {
529 insertion = concat("; ", name, "=", "\"", value, "\"",
533 ** Insert at first semicolon, if any.
534 ** If none, append to end.
536 prefix = getcpy(buf);
537 if ((cp = strchr(prefix, ';'))) {
538 suffix = concat(cp, NULL);
540 newbuf = concat(prefix, insertion, suffix,
545 newbuf = concat(buf, insertion, "\n", NULL);
553 free(name_plus_equal);
560 ** Extract just name_suffix="foo", if any, from value. If there isn't
561 ** one, return the entire value. Note that, for example, a name_suffix
562 ** of name will match filename="foo", and return foo.
565 extract_name_value(char *name_suffix, char *value) {
566 char *extracted_name_value = value;
567 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
568 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
571 free(name_suffix_plus_quote);
572 if (name_suffix_equals) {
573 char *name_suffix_begin;
576 for (cp = name_suffix_equals; *cp != '"'; ++cp)
578 name_suffix_begin = ++cp;
579 /* Find second \". */
580 for (; *cp != '"'; ++cp)
583 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
584 memcpy(extracted_name_value, name_suffix_begin,
585 cp - name_suffix_begin);
586 extracted_name_value[cp - name_suffix_begin] = '\0';
589 return extracted_name_value;
593 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
594 ** directives. Fills in the information of the CTinfo structure.
597 get_ctinfo(unsigned char *cp, CT ct, int magic)
606 i = strlen(invo_name) + 2;
608 /* store copy of Content-Type line */
609 cp = ct->c_ctline = getcpy(cp);
611 while (isspace(*cp)) /* trim leading spaces */
614 /* change newlines to spaces */
615 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
618 /* trim trailing spaces */
619 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
625 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
627 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
630 for (dp = cp; istoken(*dp); dp++)
633 ci->ci_type = getcpy(cp); /* store content type */
637 advise(NULL, "invalid %s: field in message %s (empty type)",
638 TYPE_FIELD, ct->c_file);
642 /* down case the content type string */
643 for (dp = ci->ci_type; *dp; dp++)
644 if (isalpha(*dp) && isupper(*dp))
650 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
655 ci->ci_subtype = getcpy("");
663 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
666 for (dp = cp; istoken(*dp); dp++)
669 ci->ci_subtype = getcpy(cp); /* store the content subtype */
672 if (!*ci->ci_subtype) {
673 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
677 /* down case the content subtype string */
678 for (dp = ci->ci_subtype; *dp; dp++)
679 if (isalpha(*dp) && isupper(*dp))
686 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
690 ** Parse attribute/value pairs given with Content-Type
692 ep = (ap = ci->ci_attrs) + NPARMS;
698 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
706 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
710 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
714 /* down case the attribute name */
715 for (dp = cp; istoken(*dp); dp++)
716 if (isalpha(*dp) && isupper(*dp))
719 for (up = dp; isspace(*dp);)
721 if (dp == cp || *dp != '=') {
722 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
726 vp = (*ap = getcpy(cp)) + (up - cp);
728 for (dp++; isspace(*dp);)
731 /* now add the attribute value */
732 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
735 for (cp = ++dp, dp = vp;;) {
739 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
744 if ((c = *cp++) == '\0')
759 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
764 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
772 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
777 ** Get any <Content-Id> given in buffer
779 if (magic && *cp == '<') {
784 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
785 advise(NULL, "invalid ID in message %s", ct->c_file);
791 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
802 ** Get any [Content-Description] given in buffer.
804 if (magic && *cp == '[') {
806 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
810 advise(NULL, "invalid description in message %s",
819 ct->c_descr = concat(ct->c_descr, "\n", NULL);
830 ** Get any {Content-Disposition} given in buffer.
832 if (magic && *cp == '{') {
834 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
838 advise(NULL, "invalid disposition in message %s",
847 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
858 ** Check if anything is left over
862 ci->ci_magic = getcpy(cp);
865 ** If there is a Content-Disposition header and
866 ** it doesn't have a *filename=, extract it from
867 ** the magic contents. The mhbasename call skips
868 ** any leading directory components.
871 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
873 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
881 get_comment(CT ct, unsigned char **ap, int istype)
886 char c, buffer[BUFSIZ], *dp;
898 advise(NULL, "invalid comment in message %s's %s: field",
899 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
904 if ((c = *cp++) == '\0')
927 if ((dp = ci->ci_comment)) {
928 ci->ci_comment = concat(dp, " ", buffer, NULL);
931 ci->ci_comment = getcpy(buffer);
946 ** Handles content types audio, image, and video.
947 ** There's not much to do right here.
953 return OK; /* not much to do here */
967 CI ci = &ct->c_ctinfo;
969 /* check for missing subtype */
970 if (!*ci->ci_subtype)
971 ci->ci_subtype = add("plain", ci->ci_subtype);
974 for (kv = SubText; kv->kv_key; kv++)
975 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
977 ct->c_subtype = kv->kv_value;
979 /* allocate text character set structure */
980 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
981 adios(NULL, "out of memory");
982 ct->c_ctparams = (void *) t;
984 /* scan for charset parameter */
985 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
986 if (!mh_strcasecmp(*ap, "charset"))
989 /* check if content specified a character set */
992 ct->c_charset = getcpy(norm_charmap(*ep));
993 /* match character set or set to CHARSET_UNKNOWN */
994 for (kv = Charset; kv->kv_key; kv++) {
995 if (!mh_strcasecmp(*ep, kv->kv_key)) {
999 t->tx_charset = kv->kv_value;
1001 t->tx_charset = CHARSET_UNSPECIFIED;
1013 InitMultiPart(CT ct)
1017 unsigned char *cp, *dp;
1019 char *bp, buffer[BUFSIZ];
1020 struct multipart *m;
1022 struct part *part, **next;
1023 CI ci = &ct->c_ctinfo;
1028 ** The encoding for multipart messages must be either
1029 ** 7bit, 8bit, or binary (per RFC2045).
1031 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1032 && ct->c_encoding != CE_BINARY) {
1033 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1038 for (kv = SubMultiPart; kv->kv_key; kv++)
1039 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1041 ct->c_subtype = kv->kv_value;
1044 ** Check for "boundary" parameter, which is
1045 ** required for multipart messages.
1048 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1049 if (!mh_strcasecmp(*ap, "boundary")) {
1055 /* complain if boundary parameter is missing */
1057 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1061 /* allocate primary structure for multipart info */
1062 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1063 adios(NULL, "out of memory");
1064 ct->c_ctparams = (void *) m;
1066 /* check if boundary parameter contains only whitespace characters */
1067 for (cp = bp; isspace(*cp); cp++)
1070 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1074 /* remove trailing whitespace from boundary parameter */
1075 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1080 /* record boundary separators */
1081 m->mp_start = concat(bp, "\n", NULL);
1082 m->mp_stop = concat(bp, "--\n", NULL);
1084 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1085 advise(ct->c_file, "unable to open for reading");
1089 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1091 next = &m->mp_parts;
1095 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1099 pos += strlen(buffer);
1100 if (buffer[0] != '-' || buffer[1] != '-')
1103 if (strcmp(buffer + 2, m->mp_start)!=0)
1106 if ((part = (struct part *) calloc(1, sizeof(*part)))
1108 adios(NULL, "out of memory");
1110 next = &part->mp_next;
1112 if (!(p = get_content(fp, ct->c_file,
1113 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1120 fseek(fp, pos, SEEK_SET);
1123 if (strcmp(buffer + 2, m->mp_start) == 0) {
1127 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1128 if (p->c_end < p->c_begin)
1129 p->c_begin = p->c_end;
1134 if (strcmp(buffer + 2, m->mp_stop) == 0)
1140 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1141 if (!inout && part) {
1143 p->c_end = ct->c_end;
1145 if (p->c_begin >= p->c_end) {
1146 for (next = &m->mp_parts; *next != part;
1147 next = &((*next)->mp_next))
1151 free((char *) part);
1156 /* reverse the order of the parts for multipart/alternative */
1157 if (ct->c_subtype == MULTI_ALTERNATE)
1161 ** label all subparts with part number, and
1162 ** then initialize the content of the subpart.
1167 char partnam[BUFSIZ];
1170 snprintf(partnam, sizeof(partnam), "%s.",
1172 pp = partnam + strlen(partnam);
1177 for (part = m->mp_parts, partnum = 1; part;
1178 part = part->mp_next, partnum++) {
1181 sprintf(pp, "%d", partnum);
1182 p->c_partno = getcpy(partnam);
1184 /* initialize the content of the subparts */
1185 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1200 ** reverse the order of the parts of a multipart
1204 reverse_parts(CT ct)
1207 struct multipart *m;
1208 struct part **base, **bmp, **next, *part;
1210 m = (struct multipart *) ct->c_ctparams;
1212 /* if only one part, just return */
1213 if (!m->mp_parts || !m->mp_parts->mp_next)
1216 /* count number of parts */
1218 for (part = m->mp_parts; part; part = part->mp_next)
1221 /* allocate array of pointers to the parts */
1222 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1223 adios(NULL, "out of memory");
1226 /* point at all the parts */
1227 for (part = m->mp_parts; part; part = part->mp_next)
1231 /* reverse the order of the parts */
1232 next = &m->mp_parts;
1233 for (bmp--; bmp >= base; bmp--) {
1236 next = &part->mp_next;
1240 /* free array of pointers */
1241 free((char *) base);
1253 CI ci = &ct->c_ctinfo;
1255 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1256 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1260 /* check for missing subtype */
1261 if (!*ci->ci_subtype)
1262 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1265 for (kv = SubMessage; kv->kv_key; kv++)
1266 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1268 ct->c_subtype = kv->kv_value;
1270 switch (ct->c_subtype) {
1271 case MESSAGE_RFC822:
1274 case MESSAGE_PARTIAL:
1279 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1280 adios(NULL, "out of memory");
1281 ct->c_ctparams = (void *) p;
1284 ** scan for parameters "id", "number",
1287 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1288 if (!mh_strcasecmp(*ap, "id")) {
1289 p->pm_partid = getcpy(*ep);
1292 if (!mh_strcasecmp(*ap, "number")) {
1293 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1295 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1300 if (!mh_strcasecmp(*ap, "total")) {
1301 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1308 if (!p->pm_partid || !p->pm_partno
1309 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1310 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1316 case MESSAGE_EXTERNAL:
1321 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1322 advise(ct->c_file, "unable to open for reading");
1326 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1328 if (!(p = get_content(fp, ct->c_file, 0))) {
1334 p->c_end = p->c_begin;
1339 switch (p->c_type) {
1344 if (p->c_subtype != MESSAGE_RFC822)
1349 (*p->c_ctinitfnx) (p);
1368 InitApplication(CT ct)
1371 CI ci = &ct->c_ctinfo;
1374 for (kv = SubApplication; kv->kv_key; kv++)
1375 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1377 ct->c_subtype = kv->kv_value;
1384 ** TRANSFER ENCODINGS
1388 init_encoding(CT ct, OpenCEFunc openfnx)
1392 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1393 adios(NULL, "out of memory");
1396 ct->c_ceopenfnx = openfnx;
1397 ct->c_ceclosefnx = close_encoding;
1398 ct->c_cesizefnx = size_encoding;
1405 close_encoding(CT ct)
1409 if (!(ce = ct->c_cefile))
1419 static unsigned long
1420 size_encoding(CT ct)
1428 if (!(ce = ct->c_cefile))
1429 return (ct->c_end - ct->c_begin);
1431 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1432 return (long) st.st_size;
1435 if (stat(ce->ce_file, &st) != NOTOK)
1436 return (long) st.st_size;
1441 if (ct->c_encoding == CE_EXTERNAL)
1442 return (ct->c_end - ct->c_begin);
1445 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1446 return (ct->c_end - ct->c_begin);
1448 if (fstat(fd, &st) != NOTOK)
1449 size = (long) st.st_size;
1453 (*ct->c_ceclosefnx) (ct);
1462 static unsigned char b642nib[0x80] = {
1463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1469 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1470 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1472 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1473 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1474 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1475 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1476 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1477 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1478 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1485 return init_encoding(ct, openBase64);
1490 openBase64(CT ct, char **file)
1493 int fd, len, skip, own_ct_fp = 0;
1495 unsigned char value, *b, *b1, *b2, *b3;
1496 unsigned char *cp, *ep;
1497 char buffer[BUFSIZ];
1498 /* sbeck -- handle suffixes */
1502 b = (unsigned char *) &bits;
1503 b1 = &b[endian > 0 ? 1 : 2];
1504 b2 = &b[endian > 0 ? 2 : 1];
1505 b3 = &b[endian > 0 ? 3 : 0];
1509 fseek(ce->ce_fp, 0L, SEEK_SET);
1514 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1515 content_error(ce->ce_file, ct,
1516 "unable to fopen for reading");
1522 if (*file == NULL) {
1523 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1526 ce->ce_file = getcpy(*file);
1530 /* sbeck@cise.ufl.edu -- handle suffixes */
1532 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1533 invo_name, ci->ci_type, ci->ci_subtype);
1534 cp = context_find(buffer);
1535 if (cp == NULL || *cp == '\0') {
1536 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1538 cp = context_find(buffer);
1540 if (cp != NULL && *cp != '\0') {
1541 if (ce->ce_unlink) {
1543 ** Temporary file already exists, so we rename to
1544 ** version with extension.
1546 char *file_org = strdup(ce->ce_file);
1547 ce->ce_file = add(cp, ce->ce_file);
1548 if (rename(file_org, ce->ce_file)) {
1549 adios(ce->ce_file, "unable to rename %s to ",
1555 ce->ce_file = add(cp, ce->ce_file);
1559 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1560 content_error(ce->ce_file, ct,
1561 "unable to fopen for reading/writing");
1565 if ((len = ct->c_end - ct->c_begin) < 0)
1566 adios(NULL, "internal error(1)");
1569 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1570 content_error(ct->c_file, ct,
1571 "unable to open for reading");
1581 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1583 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1585 content_error(ct->c_file, ct, "error reading from");
1589 content_error(NULL, ct, "premature eof");
1597 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1602 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1604 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1606 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1610 bits |= value << bitno;
1612 if ((bitno -= 6) < 0) {
1613 putc((char) *b1, ce->ce_fp);
1615 putc((char) *b2, ce->ce_fp);
1617 putc((char) *b3, ce->ce_fp);
1621 if (ferror(ce->ce_fp)) {
1622 content_error(ce->ce_file, ct,
1623 "error writing to");
1626 bitno = 18, bits = 0L, skip = 0;
1632 goto self_delimiting;
1641 fprintf(stderr, "premature ending (bitno %d)\n",
1644 content_error(NULL, ct, "invalid BASE64 encoding");
1649 fseek(ct->c_fp, 0L, SEEK_SET);
1651 if (fflush(ce->ce_fp)) {
1652 content_error(ce->ce_file, ct, "error writing to");
1656 fseek(ce->ce_fp, 0L, SEEK_SET);
1659 *file = ce->ce_file;
1664 return fileno(ce->ce_fp);
1667 free_encoding(ct, 0);
1680 static char hex2nib[0x80] = {
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1688 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1703 return init_encoding(ct, openQuoted);
1708 openQuoted(CT ct, char **file)
1710 int cc, len, quoted, own_ct_fp = 0;
1711 unsigned char *cp, *ep;
1712 char buffer[BUFSIZ];
1713 unsigned char mask = 0;
1715 /* sbeck -- handle suffixes */
1720 fseek(ce->ce_fp, 0L, SEEK_SET);
1725 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1726 content_error(ce->ce_file, ct,
1727 "unable to fopen for reading");
1733 if (*file == NULL) {
1734 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1737 ce->ce_file = getcpy(*file);
1741 /* sbeck@cise.ufl.edu -- handle suffixes */
1743 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1744 invo_name, ci->ci_type, ci->ci_subtype);
1745 cp = context_find(buffer);
1746 if (cp == NULL || *cp == '\0') {
1747 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1749 cp = context_find(buffer);
1751 if (cp != NULL && *cp != '\0') {
1752 if (ce->ce_unlink) {
1754 ** Temporary file already exists, so we rename to
1755 ** version with extension.
1757 char *file_org = strdup(ce->ce_file);
1758 ce->ce_file = add(cp, ce->ce_file);
1759 if (rename(file_org, ce->ce_file)) {
1760 adios(ce->ce_file, "unable to rename %s to ",
1766 ce->ce_file = add(cp, ce->ce_file);
1770 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1771 content_error(ce->ce_file, ct,
1772 "unable to fopen for reading/writing");
1776 if ((len = ct->c_end - ct->c_begin) < 0)
1777 adios(NULL, "internal error(2)");
1780 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1781 content_error(ct->c_file, ct,
1782 "unable to open for reading");
1790 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1792 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1793 content_error(NULL, ct, "premature eof");
1797 if ((cc = strlen(buffer)) > len)
1801 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1806 for (; cp < ep; cp++) {
1808 /* in an escape sequence */
1810 /* at byte 1 of an escape sequence */
1811 mask = hex2nib[*cp & 0x7f];
1812 /* next is byte 2 */
1815 /* at byte 2 of an escape sequence */
1817 mask |= hex2nib[*cp & 0x7f];
1818 putc(mask, ce->ce_fp);
1819 if (ferror(ce->ce_fp)) {
1820 content_error(ce->ce_file, ct, "error writing to");
1824 ** finished escape sequence; next may
1825 ** be literal or a new escape sequence
1829 /* on to next byte */
1833 /* not in an escape sequence */
1836 ** starting an escape sequence,
1839 if (cp + 1 < ep && cp[1] == '\n') {
1840 /* "=\n" soft line break, eat the \n */
1844 if (cp + 1 >= ep || cp + 2 >= ep) {
1846 ** We don't have 2 bytes left,
1847 ** so this is an invalid escape
1848 ** sequence; just show the raw bytes
1851 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1853 ** Next 2 bytes are hex digits,
1854 ** making this a valid escape
1855 ** sequence; let's decode it (above).
1861 ** One or both of the next 2 is
1862 ** out of range, making this an
1863 ** invalid escape sequence; just
1864 ** show the raw bytes (below).
1869 /* Just show the raw byte. */
1870 putc(*cp, ce->ce_fp);
1871 if (ferror(ce->ce_fp)) {
1872 content_error(ce->ce_file, ct,
1873 "error writing to");
1879 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1883 fseek(ct->c_fp, 0L, SEEK_SET);
1885 if (fflush(ce->ce_fp)) {
1886 content_error(ce->ce_file, ct, "error writing to");
1890 fseek(ce->ce_fp, 0L, SEEK_SET);
1893 *file = ce->ce_file;
1898 return fileno(ce->ce_fp);
1901 free_encoding(ct, 0);
1917 if (init_encoding(ct, open7Bit) == NOTOK)
1920 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1926 open7Bit(CT ct, char **file)
1928 int cc, fd, len, own_ct_fp = 0;
1929 char buffer[BUFSIZ];
1930 /* sbeck -- handle suffixes */
1937 fseek(ce->ce_fp, 0L, SEEK_SET);
1942 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1943 content_error(ce->ce_file, ct,
1944 "unable to fopen for reading");
1950 if (*file == NULL) {
1951 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1954 ce->ce_file = getcpy(*file);
1958 /* sbeck@cise.ufl.edu -- handle suffixes */
1960 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1961 invo_name, ci->ci_type, ci->ci_subtype);
1962 cp = context_find(buffer);
1963 if (cp == NULL || *cp == '\0') {
1964 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1966 cp = context_find(buffer);
1968 if (cp != NULL && *cp != '\0') {
1969 if (ce->ce_unlink) {
1971 ** Temporary file already exists, so we rename to
1972 ** version with extension.
1974 char *file_org = strdup(ce->ce_file);
1975 ce->ce_file = add(cp, ce->ce_file);
1976 if (rename(file_org, ce->ce_file)) {
1977 adios(ce->ce_file, "unable to rename %s to ",
1983 ce->ce_file = add(cp, ce->ce_file);
1987 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1988 content_error(ce->ce_file, ct,
1989 "unable to fopen for reading/writing");
1993 if (ct->c_type == CT_MULTIPART) {
1995 CI ci = &ct->c_ctinfo;
1998 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2000 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2001 strlen(ci->ci_subtype);
2002 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2003 putc(';', ce->ce_fp);
2006 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2009 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2010 fputs("\n\t", ce->ce_fp);
2013 putc(' ', ce->ce_fp);
2016 fprintf(ce->ce_fp, "%s", buffer);
2020 if (ci->ci_comment) {
2021 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2023 fputs("\n\t", ce->ce_fp);
2026 putc(' ', ce->ce_fp);
2029 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2032 fprintf(ce->ce_fp, "\n");
2034 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2036 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2038 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2039 fprintf(ce->ce_fp, "\n");
2042 if ((len = ct->c_end - ct->c_begin) < 0)
2043 adios(NULL, "internal error(3)");
2046 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2047 content_error(ct->c_file, ct,
2048 "unable to open for reading");
2054 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2056 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2058 content_error(ct->c_file, ct, "error reading from");
2062 content_error(NULL, ct, "premature eof");
2070 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2071 if (ferror(ce->ce_fp)) {
2072 content_error(ce->ce_file, ct,
2073 "error writing to");
2078 fseek(ct->c_fp, 0L, SEEK_SET);
2080 if (fflush(ce->ce_fp)) {
2081 content_error(ce->ce_file, ct, "error writing to");
2085 fseek(ce->ce_fp, 0L, SEEK_SET);
2088 *file = ce->ce_file;
2093 return fileno(ce->ce_fp);
2096 free_encoding(ct, 0);