2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
11 #include <h/signals.h>
17 #include <h/mhparse.h>
22 extern int endian; /* mhmisc.c */
24 extern pid_t xpid; /* mhshowsbr.c */
27 ** Directory to place temp files. This must
28 ** be set before these routines are called.
33 ** Structures for TEXT messages
35 struct k2v SubText[] = {
36 { "plain", TEXT_PLAIN },
37 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
38 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
39 { NULL, TEXT_UNKNOWN } /* this one must be last! */
42 struct k2v Charset[] = {
43 { "us-ascii", CHARSET_USASCII },
44 { "iso-8859-1", CHARSET_LATIN },
45 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
49 ** Structures for MULTIPART messages
51 struct k2v SubMultiPart[] = {
52 { "mixed", MULTI_MIXED },
53 { "alternative", MULTI_ALTERNATE },
54 { "digest", MULTI_DIGEST },
55 { "parallel", MULTI_PARALLEL },
56 { NULL, MULTI_UNKNOWN } /* this one must be last! */
60 ** Structures for MESSAGE messages
62 struct k2v SubMessage[] = {
63 { "rfc822", MESSAGE_RFC822 },
64 { "partial", MESSAGE_PARTIAL },
65 { "external-body", MESSAGE_EXTERNAL },
66 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
70 ** Structure for APPLICATION messages
72 struct k2v SubApplication[] = {
73 { "octet-stream", APPLICATION_OCTETS },
74 { "postscript", APPLICATION_POSTSCRIPT },
75 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
82 int make_intermediates(char *);
83 void content_error(char *, CT, char *, ...);
86 void free_content(CT);
87 void free_encoding(CT, int);
92 static CT get_content(FILE *, char *, int);
93 static int get_comment(CT, unsigned char **, int);
95 static int InitGeneric(CT);
96 static int InitText(CT);
97 static int InitMultiPart(CT);
98 static void reverse_parts(CT);
99 static int InitMessage(CT);
100 static int InitApplication(CT);
101 static int init_encoding(CT, OpenCEFunc);
102 static unsigned long size_encoding(CT);
103 static int InitBase64(CT);
104 static int openBase64(CT, char **);
105 static int InitQuoted(CT);
106 static int openQuoted(CT, char **);
107 static int Init7Bit(CT);
109 struct str2init str2cts[] = {
110 { "application", CT_APPLICATION, InitApplication },
111 { "audio", CT_AUDIO, InitGeneric },
112 { "image", CT_IMAGE, InitGeneric },
113 { "message", CT_MESSAGE, InitMessage },
114 { "multipart", CT_MULTIPART, InitMultiPart },
115 { "text", CT_TEXT, InitText },
116 { "video", CT_VIDEO, InitGeneric },
117 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
118 { NULL, CT_UNKNOWN, NULL },
121 struct str2init str2ces[] = {
122 { "base64", CE_BASE64, InitBase64 },
123 { "quoted-printable", CE_QUOTED, InitQuoted },
124 { "8bit", CE_8BIT, Init7Bit },
125 { "7bit", CE_7BIT, Init7Bit },
126 { "binary", CE_BINARY, Init7Bit },
127 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
128 { NULL, CE_UNKNOWN, NULL },
135 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
146 ** Main entry point for parsing a MIME message or file.
147 ** It returns the Content structure for the top level
148 ** entity in the file.
151 parse_mime(char *file)
159 ** Check if file is actually standard input
161 if ((is_stdin = (strcmp(file, "-")==0))) {
162 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
164 advise("mhparse", "unable to create temporary file");
167 file = getcpy(tfile);
170 while (fgets(buffer, sizeof(buffer), stdin))
176 advise("stdin", "error reading");
181 advise(file, "error writing");
184 fseek(fp, 0L, SEEK_SET);
185 } else if ((fp = fopen(file, "r")) == NULL) {
186 advise(file, "unable to read");
190 if (!(ct = get_content(fp, file, 1))) {
193 advise(NULL, "unable to decode %s", file);
198 ct->c_unlink = 1; /* temp file to remove */
202 if (ct->c_end == 0L) {
203 fseek(fp, 0L, SEEK_END);
204 ct->c_end = ftell(fp);
207 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
219 ** Main routine for reading/parsing the headers
220 ** of a message content.
222 ** toplevel = 1 # we are at the top level of the message
223 ** toplevel = 0 # we are inside message type or multipart type
224 ** # other than multipart/digest
225 ** toplevel = -1 # we are inside multipart/digest
226 ** NB: on failure we will fclose(in)!
230 get_content(FILE *in, char *file, int toplevel)
233 char buf[BUFSIZ], name[NAMESZ];
238 /* allocate the content structure */
239 if (!(ct = (CT) calloc(1, sizeof(*ct))))
240 adios(NULL, "out of memory");
243 ct->c_file = getcpy(file);
244 ct->c_begin = ftell(ct->c_fp) + 1;
247 ** Parse the header fields for this
248 ** content into a linked list.
250 for (compnum = 1, state = FLD;;) {
251 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
257 /* get copies of the buffers */
261 /* if necessary, get rest of field */
262 while (state == FLDPLUS) {
263 state = m_getfld(state, name, buf,
265 vp = add(buf, vp); /* add to previous value */
268 /* Now add the header data to the list */
269 add_header(ct, np, vp);
271 /* continue, if this isn't the last header field */
272 if (state != FLDEOF) {
273 ct->c_begin = ftell(in) + 1;
280 ct->c_begin = ftell(in) - strlen(buf);
284 ct->c_begin = ftell(in);
289 adios(NULL, "message format error in component #%d",
293 adios(NULL, "getfld() returned %d", state);
296 /* break out of the loop */
301 ** Read the content headers. We will parse the
302 ** MIME related header fields into their various
303 ** structures and set internal flags related to
304 ** content type/subtype, etc.
307 hp = ct->c_first_hf; /* start at first header field */
309 /* Get MIME-Version field */
310 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
313 unsigned char *cp, *dp;
316 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
319 ct->c_vrsn = getcpy(hp->value);
321 /* Now, cleanup this field */
326 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
328 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
333 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
335 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
338 for (dp = cp; istoken(*dp); dp++)
342 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
345 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
348 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349 /* Get Content-Type field */
350 struct str2init *s2i;
351 CI ci = &ct->c_ctinfo;
353 /* Check if we've already seen a Content-Type header */
355 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
359 /* Parse the Content-Type field */
360 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
364 ** Set the Init function and the internal
365 ** flag for this content type.
367 for (s2i = str2cts; s2i->si_key; s2i++)
368 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
370 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
372 ct->c_type = s2i->si_val;
373 ct->c_ctinitfnx = s2i->si_init;
375 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376 /* Get Content-Transfer-Encoding field */
378 unsigned char *cp, *dp;
379 struct str2init *s2i;
382 ** Check if we've already seen the
383 ** Content-Transfer-Encoding field
386 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
390 /* get copy of this field */
391 ct->c_celine = cp = getcpy(hp->value);
395 for (dp = cp; istoken(*dp); dp++)
401 ** Find the internal flag and Init function
402 ** for this transfer encoding.
404 for (s2i = str2ces; s2i->si_key; s2i++)
405 if (!mh_strcasecmp(cp, s2i->si_key))
407 if (!s2i->si_key && !uprf(cp, "X-"))
410 ct->c_encoding = s2i->si_val;
412 /* Call the Init function for this encoding */
413 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
416 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417 /* Get Content-ID field */
418 ct->c_id = add(hp->value, ct->c_id);
420 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421 /* Get Content-Description field */
422 ct->c_descr = add(hp->value, ct->c_descr);
424 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425 /* Get Content-Disposition field */
426 ct->c_dispo = add(hp->value, ct->c_dispo);
430 hp = hp->next; /* next header field */
434 ** Check if we saw a Content-Type field.
435 ** If not, then assign a default value for
436 ** it, and the Init function.
440 ** If we are inside a multipart/digest message,
441 ** so default type is message/rfc822
444 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
446 ct->c_type = CT_MESSAGE;
447 ct->c_ctinitfnx = InitMessage;
450 ** Else default type is text/plain
452 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
454 ct->c_type = CT_TEXT;
455 ct->c_ctinitfnx = InitText;
459 /* Use default Transfer-Encoding, if necessary */
461 ct->c_encoding = CE_7BIT;
474 ** small routine to add header field to list
478 add_header(CT ct, char *name, char *value)
482 /* allocate header field structure */
483 hp = mh_xmalloc(sizeof(*hp));
485 /* link data into header structure */
490 /* link header structure into the list */
491 if (ct->c_first_hf == NULL) {
492 ct->c_first_hf = hp; /* this is the first */
495 ct->c_last_hf->next = hp; /* add it to the end */
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =. If not, insert both name and value, just after
506 ** first semicolon, if any. Note that name should not contain a
507 ** trailing =. And quotes will be added around the value. Typical
508 ** usage: make sure that a Content-Disposition header contains
509 ** filename="foo". If it doesn't and value does, use value from
513 incl_name_value(unsigned char *buf, char *name, char *value) {
516 /* Assume that name is non-null. */
518 char *name_plus_equal = concat(name, "=", NULL);
520 if (!strstr(buf, name_plus_equal)) {
523 char *prefix, *suffix;
525 /* Trim trailing space, esp. newline. */
526 for (cp = &buf[strlen(buf) - 1];
527 cp >= buf && isspace(*cp); --cp) {
531 insertion = concat("; ", name, "=", "\"", value, "\"",
535 ** Insert at first semicolon, if any.
536 ** If none, append to end.
538 prefix = getcpy(buf);
539 if ((cp = strchr(prefix, ';'))) {
540 suffix = concat(cp, NULL);
542 newbuf = concat(prefix, insertion, suffix,
547 newbuf = concat(buf, insertion, "\n", NULL);
555 free(name_plus_equal);
562 ** Extract just name_suffix="foo", if any, from value. If there isn't
563 ** one, return the entire value. Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
567 extract_name_value(char *name_suffix, char *value) {
568 char *extracted_name_value = value;
569 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
570 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
573 free(name_suffix_plus_quote);
574 if (name_suffix_equals) {
575 char *name_suffix_begin;
578 for (cp = name_suffix_equals; *cp != '"'; ++cp)
580 name_suffix_begin = ++cp;
581 /* Find second \". */
582 for (; *cp != '"'; ++cp)
585 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
586 memcpy(extracted_name_value, name_suffix_begin,
587 cp - name_suffix_begin);
588 extracted_name_value[cp - name_suffix_begin] = '\0';
591 return extracted_name_value;
595 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
596 ** directives. Fills in the information of the CTinfo structure.
599 get_ctinfo(unsigned char *cp, CT ct, int magic)
608 i = strlen(invo_name) + 2;
610 /* store copy of Content-Type line */
611 cp = ct->c_ctline = getcpy(cp);
613 while (isspace(*cp)) /* trim leading spaces */
616 /* change newlines to spaces */
617 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
620 /* trim trailing spaces */
621 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
627 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
629 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
632 for (dp = cp; istoken(*dp); dp++)
635 ci->ci_type = getcpy(cp); /* store content type */
639 advise(NULL, "invalid %s: field in message %s (empty type)",
640 TYPE_FIELD, ct->c_file);
644 /* down case the content type string */
645 for (dp = ci->ci_type; *dp; dp++)
646 if (isalpha(*dp) && isupper(*dp))
652 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
657 ci->ci_subtype = getcpy("");
665 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
668 for (dp = cp; istoken(*dp); dp++)
671 ci->ci_subtype = getcpy(cp); /* store the content subtype */
674 if (!*ci->ci_subtype) {
675 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
679 /* down case the content subtype string */
680 for (dp = ci->ci_subtype; *dp; dp++)
681 if (isalpha(*dp) && isupper(*dp))
688 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
692 ** Parse attribute/value pairs given with Content-Type
694 ep = (ap = ci->ci_attrs) + NPARMS;
700 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
708 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
712 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
716 /* down case the attribute name */
717 for (dp = cp; istoken(*dp); dp++)
718 if (isalpha(*dp) && isupper(*dp))
721 for (up = dp; isspace(*dp);)
723 if (dp == cp || *dp != '=') {
724 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
728 vp = (*ap = getcpy(cp)) + (up - cp);
730 for (dp++; isspace(*dp);)
733 /* now add the attribute value */
734 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
737 for (cp = ++dp, dp = vp;;) {
741 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
746 if ((c = *cp++) == '\0')
761 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
766 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
774 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
779 ** Get any <Content-Id> given in buffer
781 if (magic && *cp == '<') {
786 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
787 advise(NULL, "invalid ID in message %s", ct->c_file);
793 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
804 ** Get any [Content-Description] given in buffer.
806 if (magic && *cp == '[') {
808 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
812 advise(NULL, "invalid description in message %s",
821 ct->c_descr = concat(ct->c_descr, "\n", NULL);
832 ** Get any {Content-Disposition} given in buffer.
834 if (magic && *cp == '{') {
836 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
840 advise(NULL, "invalid disposition in message %s",
849 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
860 ** Check if anything is left over
864 ci->ci_magic = getcpy(cp);
867 ** If there is a Content-Disposition header and
868 ** it doesn't have a *filename=, extract it from
869 ** the magic contents. The mhbasename call skips
870 ** any leading directory components.
873 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
875 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
883 get_comment(CT ct, unsigned char **ap, int istype)
888 char c, buffer[BUFSIZ], *dp;
900 advise(NULL, "invalid comment in message %s's %s: field",
901 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
906 if ((c = *cp++) == '\0')
929 if ((dp = ci->ci_comment)) {
930 ci->ci_comment = concat(dp, " ", buffer, NULL);
933 ci->ci_comment = getcpy(buffer);
948 ** Handles content types audio, image, and video.
949 ** There's not much to do right here.
955 return OK; /* not much to do here */
969 CI ci = &ct->c_ctinfo;
971 /* check for missing subtype */
972 if (!*ci->ci_subtype)
973 ci->ci_subtype = add("plain", ci->ci_subtype);
976 for (kv = SubText; kv->kv_key; kv++)
977 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
979 ct->c_subtype = kv->kv_value;
981 /* allocate text character set structure */
982 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
983 adios(NULL, "out of memory");
984 ct->c_ctparams = (void *) t;
986 /* scan for charset parameter */
987 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
988 if (!mh_strcasecmp(*ap, "charset"))
991 /* check if content specified a character set */
994 ct->c_charset = getcpy(norm_charmap(*ep));
995 /* match character set or set to CHARSET_UNKNOWN */
996 for (kv = Charset; kv->kv_key; kv++) {
997 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1001 t->tx_charset = kv->kv_value;
1003 t->tx_charset = CHARSET_UNSPECIFIED;
1015 InitMultiPart(CT ct)
1019 unsigned char *cp, *dp;
1021 char *bp, buffer[BUFSIZ];
1022 struct multipart *m;
1024 struct part *part, **next;
1025 CI ci = &ct->c_ctinfo;
1030 ** The encoding for multipart messages must be either
1031 ** 7bit, 8bit, or binary (per RFC2045).
1033 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1034 && ct->c_encoding != CE_BINARY) {
1035 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1040 for (kv = SubMultiPart; kv->kv_key; kv++)
1041 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1043 ct->c_subtype = kv->kv_value;
1046 ** Check for "boundary" parameter, which is
1047 ** required for multipart messages.
1050 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1051 if (!mh_strcasecmp(*ap, "boundary")) {
1057 /* complain if boundary parameter is missing */
1059 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1063 /* allocate primary structure for multipart info */
1064 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1065 adios(NULL, "out of memory");
1066 ct->c_ctparams = (void *) m;
1068 /* check if boundary parameter contains only whitespace characters */
1069 for (cp = bp; isspace(*cp); cp++)
1072 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1076 /* remove trailing whitespace from boundary parameter */
1077 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1082 /* record boundary separators */
1083 m->mp_start = concat(bp, "\n", NULL);
1084 m->mp_stop = concat(bp, "--\n", NULL);
1086 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1087 advise(ct->c_file, "unable to open for reading");
1091 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1093 next = &m->mp_parts;
1097 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1101 pos += strlen(buffer);
1102 if (buffer[0] != '-' || buffer[1] != '-')
1105 if (strcmp(buffer + 2, m->mp_start)!=0)
1108 if ((part = (struct part *) calloc(1, sizeof(*part)))
1110 adios(NULL, "out of memory");
1112 next = &part->mp_next;
1114 if (!(p = get_content(fp, ct->c_file,
1115 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1122 fseek(fp, pos, SEEK_SET);
1125 if (strcmp(buffer + 2, m->mp_start) == 0) {
1129 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1130 if (p->c_end < p->c_begin)
1131 p->c_begin = p->c_end;
1136 if (strcmp(buffer + 2, m->mp_stop) == 0)
1142 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1143 if (!inout && part) {
1145 p->c_end = ct->c_end;
1147 if (p->c_begin >= p->c_end) {
1148 for (next = &m->mp_parts; *next != part;
1149 next = &((*next)->mp_next))
1153 free((char *) part);
1158 /* reverse the order of the parts for multipart/alternative */
1159 if (ct->c_subtype == MULTI_ALTERNATE)
1163 ** label all subparts with part number, and
1164 ** then initialize the content of the subpart.
1169 char partnam[BUFSIZ];
1172 snprintf(partnam, sizeof(partnam), "%s.",
1174 pp = partnam + strlen(partnam);
1179 for (part = m->mp_parts, partnum = 1; part;
1180 part = part->mp_next, partnum++) {
1183 sprintf(pp, "%d", partnum);
1184 p->c_partno = getcpy(partnam);
1186 /* initialize the content of the subparts */
1187 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1202 ** reverse the order of the parts of a multipart
1206 reverse_parts(CT ct)
1209 struct multipart *m;
1210 struct part **base, **bmp, **next, *part;
1212 m = (struct multipart *) ct->c_ctparams;
1214 /* if only one part, just return */
1215 if (!m->mp_parts || !m->mp_parts->mp_next)
1218 /* count number of parts */
1220 for (part = m->mp_parts; part; part = part->mp_next)
1223 /* allocate array of pointers to the parts */
1224 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1225 adios(NULL, "out of memory");
1228 /* point at all the parts */
1229 for (part = m->mp_parts; part; part = part->mp_next)
1233 /* reverse the order of the parts */
1234 next = &m->mp_parts;
1235 for (bmp--; bmp >= base; bmp--) {
1238 next = &part->mp_next;
1242 /* free array of pointers */
1243 free((char *) base);
1255 CI ci = &ct->c_ctinfo;
1257 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1258 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1262 /* check for missing subtype */
1263 if (!*ci->ci_subtype)
1264 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1267 for (kv = SubMessage; kv->kv_key; kv++)
1268 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1270 ct->c_subtype = kv->kv_value;
1272 switch (ct->c_subtype) {
1273 case MESSAGE_RFC822:
1276 case MESSAGE_PARTIAL:
1281 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1282 adios(NULL, "out of memory");
1283 ct->c_ctparams = (void *) p;
1286 ** scan for parameters "id", "number",
1289 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1290 if (!mh_strcasecmp(*ap, "id")) {
1291 p->pm_partid = getcpy(*ep);
1294 if (!mh_strcasecmp(*ap, "number")) {
1295 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1297 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1302 if (!mh_strcasecmp(*ap, "total")) {
1303 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1310 if (!p->pm_partid || !p->pm_partno
1311 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1312 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1318 case MESSAGE_EXTERNAL:
1323 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1324 advise(ct->c_file, "unable to open for reading");
1328 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1330 if (!(p = get_content(fp, ct->c_file, 0))) {
1336 p->c_end = p->c_begin;
1341 switch (p->c_type) {
1346 if (p->c_subtype != MESSAGE_RFC822)
1351 (*p->c_ctinitfnx) (p);
1370 InitApplication(CT ct)
1373 CI ci = &ct->c_ctinfo;
1376 for (kv = SubApplication; kv->kv_key; kv++)
1377 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1379 ct->c_subtype = kv->kv_value;
1386 ** TRANSFER ENCODINGS
1390 init_encoding(CT ct, OpenCEFunc openfnx)
1394 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1395 adios(NULL, "out of memory");
1398 ct->c_ceopenfnx = openfnx;
1399 ct->c_ceclosefnx = close_encoding;
1400 ct->c_cesizefnx = size_encoding;
1407 close_encoding(CT ct)
1411 if (!(ce = ct->c_cefile))
1421 static unsigned long
1422 size_encoding(CT ct)
1430 if (!(ce = ct->c_cefile))
1431 return (ct->c_end - ct->c_begin);
1433 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1434 return (long) st.st_size;
1437 if (stat(ce->ce_file, &st) != NOTOK)
1438 return (long) st.st_size;
1443 if (ct->c_encoding == CE_EXTERNAL)
1444 return (ct->c_end - ct->c_begin);
1447 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1448 return (ct->c_end - ct->c_begin);
1450 if (fstat(fd, &st) != NOTOK)
1451 size = (long) st.st_size;
1455 (*ct->c_ceclosefnx) (ct);
1464 static unsigned char b642nib[0x80] = {
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1471 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1472 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1474 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1475 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1476 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1477 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1478 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1479 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1480 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1487 return init_encoding(ct, openBase64);
1492 openBase64(CT ct, char **file)
1495 int fd, len, skip, own_ct_fp = 0;
1497 unsigned char value, *b, *b1, *b2, *b3;
1498 unsigned char *cp, *ep;
1499 char buffer[BUFSIZ];
1500 /* sbeck -- handle suffixes */
1504 b = (unsigned char *) &bits;
1505 b1 = &b[endian > 0 ? 1 : 2];
1506 b2 = &b[endian > 0 ? 2 : 1];
1507 b3 = &b[endian > 0 ? 3 : 0];
1511 fseek(ce->ce_fp, 0L, SEEK_SET);
1516 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1517 content_error(ce->ce_file, ct,
1518 "unable to fopen for reading");
1524 if (*file == NULL) {
1525 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1528 ce->ce_file = getcpy(*file);
1532 /* sbeck@cise.ufl.edu -- handle suffixes */
1534 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1535 invo_name, ci->ci_type, ci->ci_subtype);
1536 cp = context_find(buffer);
1537 if (cp == NULL || *cp == '\0') {
1538 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1540 cp = context_find(buffer);
1542 if (cp != NULL && *cp != '\0') {
1543 if (ce->ce_unlink) {
1545 ** Temporary file already exists, so we rename to
1546 ** version with extension.
1548 char *file_org = strdup(ce->ce_file);
1549 ce->ce_file = add(cp, ce->ce_file);
1550 if (rename(file_org, ce->ce_file)) {
1551 adios(ce->ce_file, "unable to rename %s to ",
1557 ce->ce_file = add(cp, ce->ce_file);
1561 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1562 content_error(ce->ce_file, ct,
1563 "unable to fopen for reading/writing");
1567 if ((len = ct->c_end - ct->c_begin) < 0)
1568 adios(NULL, "internal error(1)");
1571 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1572 content_error(ct->c_file, ct,
1573 "unable to open for reading");
1583 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1585 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1587 content_error(ct->c_file, ct, "error reading from");
1591 content_error(NULL, ct, "premature eof");
1599 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1604 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1606 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1608 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1612 bits |= value << bitno;
1614 if ((bitno -= 6) < 0) {
1615 putc((char) *b1, ce->ce_fp);
1617 putc((char) *b2, ce->ce_fp);
1619 putc((char) *b3, ce->ce_fp);
1623 if (ferror(ce->ce_fp)) {
1624 content_error(ce->ce_file, ct,
1625 "error writing to");
1628 bitno = 18, bits = 0L, skip = 0;
1634 goto self_delimiting;
1643 fprintf(stderr, "premature ending (bitno %d)\n",
1646 content_error(NULL, ct, "invalid BASE64 encoding");
1651 fseek(ct->c_fp, 0L, SEEK_SET);
1653 if (fflush(ce->ce_fp)) {
1654 content_error(ce->ce_file, ct, "error writing to");
1658 fseek(ce->ce_fp, 0L, SEEK_SET);
1661 *file = ce->ce_file;
1666 return fileno(ce->ce_fp);
1669 free_encoding(ct, 0);
1682 static char hex2nib[0x80] = {
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1690 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1705 return init_encoding(ct, openQuoted);
1710 openQuoted(CT ct, char **file)
1712 int cc, len, quoted, own_ct_fp = 0;
1713 unsigned char *cp, *ep;
1714 char buffer[BUFSIZ];
1715 unsigned char mask = 0;
1717 /* sbeck -- handle suffixes */
1722 fseek(ce->ce_fp, 0L, SEEK_SET);
1727 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1728 content_error(ce->ce_file, ct,
1729 "unable to fopen for reading");
1735 if (*file == NULL) {
1736 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1739 ce->ce_file = getcpy(*file);
1743 /* sbeck@cise.ufl.edu -- handle suffixes */
1745 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1746 invo_name, ci->ci_type, ci->ci_subtype);
1747 cp = context_find(buffer);
1748 if (cp == NULL || *cp == '\0') {
1749 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1751 cp = context_find(buffer);
1753 if (cp != NULL && *cp != '\0') {
1754 if (ce->ce_unlink) {
1755 // Temporary file already exists, so we rename to
1756 // version with extension.
1757 char *file_org = strdup(ce->ce_file);
1758 ce->ce_file = add(cp, ce->ce_file);
1759 if (rename(file_org, ce->ce_file)) {
1760 adios(ce->ce_file, "unable to rename %s to ",
1766 ce->ce_file = add(cp, ce->ce_file);
1770 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1771 content_error(ce->ce_file, ct,
1772 "unable to fopen for reading/writing");
1776 if ((len = ct->c_end - ct->c_begin) < 0)
1777 adios(NULL, "internal error(2)");
1780 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1781 content_error(ct->c_file, ct,
1782 "unable to open for reading");
1790 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1792 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1793 content_error(NULL, ct, "premature eof");
1797 if ((cc = strlen(buffer)) > len)
1801 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1806 for (; cp < ep; cp++) {
1808 /* in an escape sequence */
1810 /* at byte 1 of an escape sequence */
1811 mask = hex2nib[*cp & 0x7f];
1812 /* next is byte 2 */
1815 /* at byte 2 of an escape sequence */
1817 mask |= hex2nib[*cp & 0x7f];
1818 putc(mask, ce->ce_fp);
1819 if (ferror(ce->ce_fp)) {
1820 content_error(ce->ce_file, ct, "error writing to");
1824 ** finished escape sequence; next may
1825 ** be literal or a new escape sequence
1829 /* on to next byte */
1833 /* not in an escape sequence */
1836 ** starting an escape sequence,
1839 if (cp + 1 < ep && cp[1] == '\n') {
1840 /* "=\n" soft line break, eat the \n */
1844 if (cp + 1 >= ep || cp + 2 >= ep) {
1846 ** We don't have 2 bytes left,
1847 ** so this is an invalid escape
1848 ** sequence; just show the raw bytes
1851 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1853 ** Next 2 bytes are hex digits,
1854 ** making this a valid escape
1855 ** sequence; let's decode it (above).
1861 ** One or both of the next 2 is
1862 ** out of range, making this an
1863 ** invalid escape sequence; just
1864 ** show the raw bytes (below).
1869 /* Just show the raw byte. */
1870 putc(*cp, ce->ce_fp);
1871 if (ferror(ce->ce_fp)) {
1872 content_error(ce->ce_file, ct,
1873 "error writing to");
1879 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1883 fseek(ct->c_fp, 0L, SEEK_SET);
1885 if (fflush(ce->ce_fp)) {
1886 content_error(ce->ce_file, ct, "error writing to");
1890 fseek(ce->ce_fp, 0L, SEEK_SET);
1893 *file = ce->ce_file;
1898 return fileno(ce->ce_fp);
1901 free_encoding(ct, 0);
1917 if (init_encoding(ct, open7Bit) == NOTOK)
1920 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1926 open7Bit(CT ct, char **file)
1928 int cc, fd, len, own_ct_fp = 0;
1929 char buffer[BUFSIZ];
1930 /* sbeck -- handle suffixes */
1937 fseek(ce->ce_fp, 0L, SEEK_SET);
1942 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1943 content_error(ce->ce_file, ct,
1944 "unable to fopen for reading");
1950 if (*file == NULL) {
1951 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1954 ce->ce_file = getcpy(*file);
1958 /* sbeck@cise.ufl.edu -- handle suffixes */
1960 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1961 invo_name, ci->ci_type, ci->ci_subtype);
1962 cp = context_find(buffer);
1963 if (cp == NULL || *cp == '\0') {
1964 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1966 cp = context_find(buffer);
1968 if (cp != NULL && *cp != '\0') {
1969 if (ce->ce_unlink) {
1971 ** Temporary file already exists, so we rename to
1972 ** version with extension.
1974 char *file_org = strdup(ce->ce_file);
1975 ce->ce_file = add(cp, ce->ce_file);
1976 if (rename(file_org, ce->ce_file)) {
1977 adios(ce->ce_file, "unable to rename %s to ",
1983 ce->ce_file = add(cp, ce->ce_file);
1987 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1988 content_error(ce->ce_file, ct,
1989 "unable to fopen for reading/writing");
1993 if (ct->c_type == CT_MULTIPART) {
1995 CI ci = &ct->c_ctinfo;
1998 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2000 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2001 strlen(ci->ci_subtype);
2002 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2003 putc(';', ce->ce_fp);
2006 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2009 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2010 fputs("\n\t", ce->ce_fp);
2013 putc(' ', ce->ce_fp);
2016 fprintf(ce->ce_fp, "%s", buffer);
2020 if (ci->ci_comment) {
2021 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2023 fputs("\n\t", ce->ce_fp);
2026 putc(' ', ce->ce_fp);
2029 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2032 fprintf(ce->ce_fp, "\n");
2034 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2036 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2038 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2039 fprintf(ce->ce_fp, "\n");
2042 if ((len = ct->c_end - ct->c_begin) < 0)
2043 adios(NULL, "internal error(3)");
2046 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2047 content_error(ct->c_file, ct,
2048 "unable to open for reading");
2054 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2056 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2058 content_error(ct->c_file, ct, "error reading from");
2062 content_error(NULL, ct, "premature eof");
2070 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2071 if (ferror(ce->ce_fp)) {
2072 content_error(ce->ce_file, ct,
2073 "error writing to");
2078 fseek(ct->c_fp, 0L, SEEK_SET);
2080 if (fflush(ce->ce_fp)) {
2081 content_error(ce->ce_file, ct, "error writing to");
2085 fseek(ce->ce_fp, 0L, SEEK_SET);
2088 *file = ce->ce_file;
2093 return fileno(ce->ce_fp);
2096 free_encoding(ct, 0);