2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
11 #include <h/signals.h>
16 #include <h/mhparse.h>
21 extern int endian; /* mhmisc.c */
23 extern pid_t xpid; /* mhshowsbr.c */
26 ** Directory to place temp files. This must
27 ** be set before these routines are called.
32 ** Structures for TEXT messages
34 struct k2v SubText[] = {
35 { "plain", TEXT_PLAIN },
36 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
37 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
38 { NULL, TEXT_UNKNOWN } /* this one must be last! */
41 struct k2v Charset[] = {
42 { "us-ascii", CHARSET_USASCII },
43 { "iso-8859-1", CHARSET_LATIN },
44 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
48 ** Structures for MULTIPART messages
50 struct k2v SubMultiPart[] = {
51 { "mixed", MULTI_MIXED },
52 { "alternative", MULTI_ALTERNATE },
53 { "digest", MULTI_DIGEST },
54 { "parallel", MULTI_PARALLEL },
55 { NULL, MULTI_UNKNOWN } /* this one must be last! */
59 ** Structures for MESSAGE messages
61 struct k2v SubMessage[] = {
62 { "rfc822", MESSAGE_RFC822 },
63 { "partial", MESSAGE_PARTIAL },
64 { "external-body", MESSAGE_EXTERNAL },
65 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
69 ** Structure for APPLICATION messages
71 struct k2v SubApplication[] = {
72 { "octet-stream", APPLICATION_OCTETS },
73 { "postscript", APPLICATION_POSTSCRIPT },
74 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
81 int make_intermediates(char *);
82 void content_error(char *, CT, char *, ...);
85 void free_content(CT);
86 void free_encoding(CT, int);
91 static CT get_content(FILE *, char *, int);
92 static int get_comment(CT, unsigned char **, int);
94 static int InitGeneric(CT);
95 static int InitText(CT);
96 static int InitMultiPart(CT);
97 static void reverse_parts(CT);
98 static int InitMessage(CT);
99 static int InitApplication(CT);
100 static int init_encoding(CT, OpenCEFunc);
101 static unsigned long size_encoding(CT);
102 static int InitBase64(CT);
103 static int openBase64(CT, char **);
104 static int InitQuoted(CT);
105 static int openQuoted(CT, char **);
106 static int Init7Bit(CT);
108 struct str2init str2cts[] = {
109 { "application", CT_APPLICATION, InitApplication },
110 { "audio", CT_AUDIO, InitGeneric },
111 { "image", CT_IMAGE, InitGeneric },
112 { "message", CT_MESSAGE, InitMessage },
113 { "multipart", CT_MULTIPART, InitMultiPart },
114 { "text", CT_TEXT, InitText },
115 { "video", CT_VIDEO, InitGeneric },
116 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
117 { NULL, CT_UNKNOWN, NULL },
120 struct str2init str2ces[] = {
121 { "base64", CE_BASE64, InitBase64 },
122 { "quoted-printable", CE_QUOTED, InitQuoted },
123 { "8bit", CE_8BIT, Init7Bit },
124 { "7bit", CE_7BIT, Init7Bit },
125 { "binary", CE_BINARY, Init7Bit },
126 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
127 { NULL, CE_UNKNOWN, NULL },
134 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
145 ** Main entry point for parsing a MIME message or file.
146 ** It returns the Content structure for the top level
147 ** entity in the file.
150 parse_mime(char *file)
158 ** Check if file is actually standard input
160 if ((is_stdin = (strcmp(file, "-")==0))) {
161 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
163 advise("mhparse", "unable to create temporary file");
166 file = getcpy(tfile);
169 while (fgets(buffer, sizeof(buffer), stdin))
175 advise("stdin", "error reading");
180 advise(file, "error writing");
183 fseek(fp, 0L, SEEK_SET);
184 } else if ((fp = fopen(file, "r")) == NULL) {
185 advise(file, "unable to read");
189 if (!(ct = get_content(fp, file, 1))) {
192 advise(NULL, "unable to decode %s", file);
197 ct->c_unlink = 1; /* temp file to remove */
201 if (ct->c_end == 0L) {
202 fseek(fp, 0L, SEEK_END);
203 ct->c_end = ftell(fp);
206 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
218 ** Main routine for reading/parsing the headers
219 ** of a message content.
221 ** toplevel = 1 # we are at the top level of the message
222 ** toplevel = 0 # we are inside message type or multipart type
223 ** # other than multipart/digest
224 ** toplevel = -1 # we are inside multipart/digest
225 ** NB: on failure we will fclose(in)!
229 get_content(FILE *in, char *file, int toplevel)
232 char buf[BUFSIZ], name[NAMESZ];
237 /* allocate the content structure */
238 if (!(ct = (CT) calloc(1, sizeof(*ct))))
239 adios(NULL, "out of memory");
242 ct->c_file = getcpy(file);
243 ct->c_begin = ftell(ct->c_fp) + 1;
246 ** Parse the header fields for this
247 ** content into a linked list.
249 for (compnum = 1, state = FLD;;) {
250 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
256 /* get copies of the buffers */
260 /* if necessary, get rest of field */
261 while (state == FLDPLUS) {
262 state = m_getfld(state, name, buf,
264 vp = add(buf, vp); /* add to previous value */
267 /* Now add the header data to the list */
268 add_header(ct, np, vp);
270 /* continue, if this isn't the last header field */
271 if (state != FLDEOF) {
272 ct->c_begin = ftell(in) + 1;
279 ct->c_begin = ftell(in) - strlen(buf);
283 ct->c_begin = ftell(in);
288 adios(NULL, "message format error in component #%d",
292 adios(NULL, "getfld() returned %d", state);
295 /* break out of the loop */
300 ** Read the content headers. We will parse the
301 ** MIME related header fields into their various
302 ** structures and set internal flags related to
303 ** content type/subtype, etc.
306 hp = ct->c_first_hf; /* start at first header field */
308 /* Get MIME-Version field */
309 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
312 unsigned char *cp, *dp;
315 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
318 ct->c_vrsn = getcpy(hp->value);
320 /* Now, cleanup this field */
325 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
327 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
332 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
334 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
337 for (dp = cp; istoken(*dp); dp++)
341 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
344 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
347 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
348 /* Get Content-Type field */
349 struct str2init *s2i;
350 CI ci = &ct->c_ctinfo;
352 /* Check if we've already seen a Content-Type header */
354 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
358 /* Parse the Content-Type field */
359 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
363 ** Set the Init function and the internal
364 ** flag for this content type.
366 for (s2i = str2cts; s2i->si_key; s2i++)
367 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
369 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
371 ct->c_type = s2i->si_val;
372 ct->c_ctinitfnx = s2i->si_init;
374 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
375 /* Get Content-Transfer-Encoding field */
377 unsigned char *cp, *dp;
378 struct str2init *s2i;
381 ** Check if we've already seen the
382 ** Content-Transfer-Encoding field
385 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
389 /* get copy of this field */
390 ct->c_celine = cp = getcpy(hp->value);
394 for (dp = cp; istoken(*dp); dp++)
400 ** Find the internal flag and Init function
401 ** for this transfer encoding.
403 for (s2i = str2ces; s2i->si_key; s2i++)
404 if (!mh_strcasecmp(cp, s2i->si_key))
406 if (!s2i->si_key && !uprf(cp, "X-"))
409 ct->c_encoding = s2i->si_val;
411 /* Call the Init function for this encoding */
412 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
415 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
416 /* Get Content-ID field */
417 ct->c_id = add(hp->value, ct->c_id);
419 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
420 /* Get Content-Description field */
421 ct->c_descr = add(hp->value, ct->c_descr);
423 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
424 /* Get Content-Disposition field */
425 ct->c_dispo = add(hp->value, ct->c_dispo);
429 hp = hp->next; /* next header field */
433 ** Check if we saw a Content-Type field.
434 ** If not, then assign a default value for
435 ** it, and the Init function.
439 ** If we are inside a multipart/digest message,
440 ** so default type is message/rfc822
443 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
445 ct->c_type = CT_MESSAGE;
446 ct->c_ctinitfnx = InitMessage;
449 ** Else default type is text/plain
451 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
453 ct->c_type = CT_TEXT;
454 ct->c_ctinitfnx = InitText;
458 /* Use default Transfer-Encoding, if necessary */
460 ct->c_encoding = CE_7BIT;
473 ** small routine to add header field to list
477 add_header(CT ct, char *name, char *value)
481 /* allocate header field structure */
482 hp = mh_xmalloc(sizeof(*hp));
484 /* link data into header structure */
489 /* link header structure into the list */
490 if (ct->c_first_hf == NULL) {
491 ct->c_first_hf = hp; /* this is the first */
494 ct->c_last_hf->next = hp; /* add it to the end */
503 ** Make sure that buf contains at least one appearance of name,
504 ** followed by =. If not, insert both name and value, just after
505 ** first semicolon, if any. Note that name should not contain a
506 ** trailing =. And quotes will be added around the value. Typical
507 ** usage: make sure that a Content-Disposition header contains
508 ** filename="foo". If it doesn't and value does, use value from
512 incl_name_value(unsigned char *buf, char *name, char *value) {
515 /* Assume that name is non-null. */
517 char *name_plus_equal = concat(name, "=", NULL);
519 if (!strstr(buf, name_plus_equal)) {
522 char *prefix, *suffix;
524 /* Trim trailing space, esp. newline. */
525 for (cp = &buf[strlen(buf) - 1];
526 cp >= buf && isspace(*cp); --cp) {
530 insertion = concat("; ", name, "=", "\"", value, "\"",
534 ** Insert at first semicolon, if any.
535 ** If none, append to end.
537 prefix = getcpy(buf);
538 if ((cp = strchr(prefix, ';'))) {
539 suffix = concat(cp, NULL);
541 newbuf = concat(prefix, insertion, suffix,
546 newbuf = concat(buf, insertion, "\n", NULL);
554 free(name_plus_equal);
561 ** Extract just name_suffix="foo", if any, from value. If there isn't
562 ** one, return the entire value. Note that, for example, a name_suffix
563 ** of name will match filename="foo", and return foo.
566 extract_name_value(char *name_suffix, char *value) {
567 char *extracted_name_value = value;
568 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
569 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
572 free(name_suffix_plus_quote);
573 if (name_suffix_equals) {
574 char *name_suffix_begin;
577 for (cp = name_suffix_equals; *cp != '"'; ++cp)
579 name_suffix_begin = ++cp;
580 /* Find second \". */
581 for (; *cp != '"'; ++cp)
584 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
585 memcpy(extracted_name_value, name_suffix_begin,
586 cp - name_suffix_begin);
587 extracted_name_value[cp - name_suffix_begin] = '\0';
590 return extracted_name_value;
594 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
595 ** directives. Fills in the information of the CTinfo structure.
598 get_ctinfo(unsigned char *cp, CT ct, int magic)
607 i = strlen(invo_name) + 2;
609 /* store copy of Content-Type line */
610 cp = ct->c_ctline = getcpy(cp);
612 while (isspace(*cp)) /* trim leading spaces */
615 /* change newlines to spaces */
616 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
619 /* trim trailing spaces */
620 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
626 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
628 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
631 for (dp = cp; istoken(*dp); dp++)
634 ci->ci_type = getcpy(cp); /* store content type */
638 advise(NULL, "invalid %s: field in message %s (empty type)",
639 TYPE_FIELD, ct->c_file);
643 /* down case the content type string */
644 for (dp = ci->ci_type; *dp; dp++)
645 if (isalpha(*dp) && isupper(*dp))
651 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
656 ci->ci_subtype = getcpy("");
664 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
667 for (dp = cp; istoken(*dp); dp++)
670 ci->ci_subtype = getcpy(cp); /* store the content subtype */
673 if (!*ci->ci_subtype) {
674 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
678 /* down case the content subtype string */
679 for (dp = ci->ci_subtype; *dp; dp++)
680 if (isalpha(*dp) && isupper(*dp))
687 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
691 ** Parse attribute/value pairs given with Content-Type
693 ep = (ap = ci->ci_attrs) + NPARMS;
699 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
707 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
711 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
715 /* down case the attribute name */
716 for (dp = cp; istoken(*dp); dp++)
717 if (isalpha(*dp) && isupper(*dp))
720 for (up = dp; isspace(*dp);)
722 if (dp == cp || *dp != '=') {
723 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
727 vp = (*ap = getcpy(cp)) + (up - cp);
729 for (dp++; isspace(*dp);)
732 /* now add the attribute value */
733 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
736 for (cp = ++dp, dp = vp;;) {
740 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
745 if ((c = *cp++) == '\0')
760 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
765 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
773 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
778 ** Get any <Content-Id> given in buffer
780 if (magic && *cp == '<') {
785 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
786 advise(NULL, "invalid ID in message %s", ct->c_file);
792 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
803 ** Get any [Content-Description] given in buffer.
805 if (magic && *cp == '[') {
807 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
811 advise(NULL, "invalid description in message %s",
820 ct->c_descr = concat(ct->c_descr, "\n", NULL);
831 ** Get any {Content-Disposition} given in buffer.
833 if (magic && *cp == '{') {
835 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
839 advise(NULL, "invalid disposition in message %s",
848 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
859 ** Check if anything is left over
863 ci->ci_magic = getcpy(cp);
866 ** If there is a Content-Disposition header and
867 ** it doesn't have a *filename=, extract it from
868 ** the magic contents. The mhbasename call skips
869 ** any leading directory components.
872 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
874 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
882 get_comment(CT ct, unsigned char **ap, int istype)
887 char c, buffer[BUFSIZ], *dp;
899 advise(NULL, "invalid comment in message %s's %s: field",
900 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
905 if ((c = *cp++) == '\0')
928 if ((dp = ci->ci_comment)) {
929 ci->ci_comment = concat(dp, " ", buffer, NULL);
932 ci->ci_comment = getcpy(buffer);
947 ** Handles content types audio, image, and video.
948 ** There's not much to do right here.
954 return OK; /* not much to do here */
968 CI ci = &ct->c_ctinfo;
970 /* check for missing subtype */
971 if (!*ci->ci_subtype)
972 ci->ci_subtype = add("plain", ci->ci_subtype);
975 for (kv = SubText; kv->kv_key; kv++)
976 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
978 ct->c_subtype = kv->kv_value;
980 /* allocate text character set structure */
981 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
982 adios(NULL, "out of memory");
983 ct->c_ctparams = (void *) t;
985 /* scan for charset parameter */
986 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
987 if (!mh_strcasecmp(*ap, "charset"))
990 /* check if content specified a character set */
993 ct->c_charset = getcpy(norm_charmap(*ep));
994 /* match character set or set to CHARSET_UNKNOWN */
995 for (kv = Charset; kv->kv_key; kv++) {
996 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1000 t->tx_charset = kv->kv_value;
1002 t->tx_charset = CHARSET_UNSPECIFIED;
1014 InitMultiPart(CT ct)
1018 unsigned char *cp, *dp;
1020 char *bp, buffer[BUFSIZ];
1021 struct multipart *m;
1023 struct part *part, **next;
1024 CI ci = &ct->c_ctinfo;
1029 ** The encoding for multipart messages must be either
1030 ** 7bit, 8bit, or binary (per RFC2045).
1032 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1033 && ct->c_encoding != CE_BINARY) {
1034 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1039 for (kv = SubMultiPart; kv->kv_key; kv++)
1040 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1042 ct->c_subtype = kv->kv_value;
1045 ** Check for "boundary" parameter, which is
1046 ** required for multipart messages.
1049 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1050 if (!mh_strcasecmp(*ap, "boundary")) {
1056 /* complain if boundary parameter is missing */
1058 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1062 /* allocate primary structure for multipart info */
1063 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1064 adios(NULL, "out of memory");
1065 ct->c_ctparams = (void *) m;
1067 /* check if boundary parameter contains only whitespace characters */
1068 for (cp = bp; isspace(*cp); cp++)
1071 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1075 /* remove trailing whitespace from boundary parameter */
1076 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1081 /* record boundary separators */
1082 m->mp_start = concat(bp, "\n", NULL);
1083 m->mp_stop = concat(bp, "--\n", NULL);
1085 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1086 advise(ct->c_file, "unable to open for reading");
1090 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1092 next = &m->mp_parts;
1096 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1100 pos += strlen(buffer);
1101 if (buffer[0] != '-' || buffer[1] != '-')
1104 if (strcmp(buffer + 2, m->mp_start)!=0)
1107 if ((part = (struct part *) calloc(1, sizeof(*part)))
1109 adios(NULL, "out of memory");
1111 next = &part->mp_next;
1113 if (!(p = get_content(fp, ct->c_file,
1114 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1121 fseek(fp, pos, SEEK_SET);
1124 if (strcmp(buffer + 2, m->mp_start) == 0) {
1128 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1129 if (p->c_end < p->c_begin)
1130 p->c_begin = p->c_end;
1135 if (strcmp(buffer + 2, m->mp_stop) == 0)
1141 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1142 if (!inout && part) {
1144 p->c_end = ct->c_end;
1146 if (p->c_begin >= p->c_end) {
1147 for (next = &m->mp_parts; *next != part;
1148 next = &((*next)->mp_next))
1152 free((char *) part);
1157 /* reverse the order of the parts for multipart/alternative */
1158 if (ct->c_subtype == MULTI_ALTERNATE)
1162 ** label all subparts with part number, and
1163 ** then initialize the content of the subpart.
1168 char partnam[BUFSIZ];
1171 snprintf(partnam, sizeof(partnam), "%s.",
1173 pp = partnam + strlen(partnam);
1178 for (part = m->mp_parts, partnum = 1; part;
1179 part = part->mp_next, partnum++) {
1182 sprintf(pp, "%d", partnum);
1183 p->c_partno = getcpy(partnam);
1185 /* initialize the content of the subparts */
1186 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1201 ** reverse the order of the parts of a multipart
1205 reverse_parts(CT ct)
1208 struct multipart *m;
1209 struct part **base, **bmp, **next, *part;
1211 m = (struct multipart *) ct->c_ctparams;
1213 /* if only one part, just return */
1214 if (!m->mp_parts || !m->mp_parts->mp_next)
1217 /* count number of parts */
1219 for (part = m->mp_parts; part; part = part->mp_next)
1222 /* allocate array of pointers to the parts */
1223 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1224 adios(NULL, "out of memory");
1227 /* point at all the parts */
1228 for (part = m->mp_parts; part; part = part->mp_next)
1232 /* reverse the order of the parts */
1233 next = &m->mp_parts;
1234 for (bmp--; bmp >= base; bmp--) {
1237 next = &part->mp_next;
1241 /* free array of pointers */
1242 free((char *) base);
1254 CI ci = &ct->c_ctinfo;
1256 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1257 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1261 /* check for missing subtype */
1262 if (!*ci->ci_subtype)
1263 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1266 for (kv = SubMessage; kv->kv_key; kv++)
1267 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1269 ct->c_subtype = kv->kv_value;
1271 switch (ct->c_subtype) {
1272 case MESSAGE_RFC822:
1275 case MESSAGE_PARTIAL:
1280 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1281 adios(NULL, "out of memory");
1282 ct->c_ctparams = (void *) p;
1285 ** scan for parameters "id", "number",
1288 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1289 if (!mh_strcasecmp(*ap, "id")) {
1290 p->pm_partid = getcpy(*ep);
1293 if (!mh_strcasecmp(*ap, "number")) {
1294 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1296 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1301 if (!mh_strcasecmp(*ap, "total")) {
1302 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1309 if (!p->pm_partid || !p->pm_partno
1310 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1311 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1317 case MESSAGE_EXTERNAL:
1322 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1323 advise(ct->c_file, "unable to open for reading");
1327 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1329 if (!(p = get_content(fp, ct->c_file, 0))) {
1335 p->c_end = p->c_begin;
1340 switch (p->c_type) {
1345 if (p->c_subtype != MESSAGE_RFC822)
1350 (*p->c_ctinitfnx) (p);
1369 InitApplication(CT ct)
1372 CI ci = &ct->c_ctinfo;
1375 for (kv = SubApplication; kv->kv_key; kv++)
1376 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1378 ct->c_subtype = kv->kv_value;
1385 ** TRANSFER ENCODINGS
1389 init_encoding(CT ct, OpenCEFunc openfnx)
1393 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1394 adios(NULL, "out of memory");
1397 ct->c_ceopenfnx = openfnx;
1398 ct->c_ceclosefnx = close_encoding;
1399 ct->c_cesizefnx = size_encoding;
1406 close_encoding(CT ct)
1410 if (!(ce = ct->c_cefile))
1420 static unsigned long
1421 size_encoding(CT ct)
1429 if (!(ce = ct->c_cefile))
1430 return (ct->c_end - ct->c_begin);
1432 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1433 return (long) st.st_size;
1436 if (stat(ce->ce_file, &st) != NOTOK)
1437 return (long) st.st_size;
1442 if (ct->c_encoding == CE_EXTERNAL)
1443 return (ct->c_end - ct->c_begin);
1446 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1447 return (ct->c_end - ct->c_begin);
1449 if (fstat(fd, &st) != NOTOK)
1450 size = (long) st.st_size;
1454 (*ct->c_ceclosefnx) (ct);
1463 static unsigned char b642nib[0x80] = {
1464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1470 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1471 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1473 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1474 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1475 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1476 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1477 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1478 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1479 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1486 return init_encoding(ct, openBase64);
1491 openBase64(CT ct, char **file)
1494 int fd, len, skip, own_ct_fp = 0;
1496 unsigned char value, *b, *b1, *b2, *b3;
1497 unsigned char *cp, *ep;
1498 char buffer[BUFSIZ];
1499 /* sbeck -- handle suffixes */
1503 b = (unsigned char *) &bits;
1504 b1 = &b[endian > 0 ? 1 : 2];
1505 b2 = &b[endian > 0 ? 2 : 1];
1506 b3 = &b[endian > 0 ? 3 : 0];
1510 fseek(ce->ce_fp, 0L, SEEK_SET);
1515 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1516 content_error(ce->ce_file, ct,
1517 "unable to fopen for reading");
1523 if (*file == NULL) {
1524 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1527 ce->ce_file = getcpy(*file);
1531 /* sbeck@cise.ufl.edu -- handle suffixes */
1533 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1534 invo_name, ci->ci_type, ci->ci_subtype);
1535 cp = context_find(buffer);
1536 if (cp == NULL || *cp == '\0') {
1537 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1539 cp = context_find(buffer);
1541 if (cp != NULL && *cp != '\0') {
1542 if (ce->ce_unlink) {
1544 ** Temporary file already exists, so we rename to
1545 ** version with extension.
1547 char *file_org = strdup(ce->ce_file);
1548 ce->ce_file = add(cp, ce->ce_file);
1549 if (rename(file_org, ce->ce_file)) {
1550 adios(ce->ce_file, "unable to rename %s to ",
1556 ce->ce_file = add(cp, ce->ce_file);
1560 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1561 content_error(ce->ce_file, ct,
1562 "unable to fopen for reading/writing");
1566 if ((len = ct->c_end - ct->c_begin) < 0)
1567 adios(NULL, "internal error(1)");
1570 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1571 content_error(ct->c_file, ct,
1572 "unable to open for reading");
1582 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1584 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1586 content_error(ct->c_file, ct, "error reading from");
1590 content_error(NULL, ct, "premature eof");
1598 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1603 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1605 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1607 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1611 bits |= value << bitno;
1613 if ((bitno -= 6) < 0) {
1614 putc((char) *b1, ce->ce_fp);
1616 putc((char) *b2, ce->ce_fp);
1618 putc((char) *b3, ce->ce_fp);
1622 if (ferror(ce->ce_fp)) {
1623 content_error(ce->ce_file, ct,
1624 "error writing to");
1627 bitno = 18, bits = 0L, skip = 0;
1633 goto self_delimiting;
1642 fprintf(stderr, "premature ending (bitno %d)\n",
1645 content_error(NULL, ct, "invalid BASE64 encoding");
1650 fseek(ct->c_fp, 0L, SEEK_SET);
1652 if (fflush(ce->ce_fp)) {
1653 content_error(ce->ce_file, ct, "error writing to");
1657 fseek(ce->ce_fp, 0L, SEEK_SET);
1660 *file = ce->ce_file;
1665 return fileno(ce->ce_fp);
1668 free_encoding(ct, 0);
1681 static char hex2nib[0x80] = {
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1689 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1694 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1704 return init_encoding(ct, openQuoted);
1709 openQuoted(CT ct, char **file)
1711 int cc, len, quoted, own_ct_fp = 0;
1712 unsigned char *cp, *ep;
1713 char buffer[BUFSIZ];
1714 unsigned char mask = 0;
1716 /* sbeck -- handle suffixes */
1721 fseek(ce->ce_fp, 0L, SEEK_SET);
1726 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1727 content_error(ce->ce_file, ct,
1728 "unable to fopen for reading");
1734 if (*file == NULL) {
1735 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1738 ce->ce_file = getcpy(*file);
1742 /* sbeck@cise.ufl.edu -- handle suffixes */
1744 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1745 invo_name, ci->ci_type, ci->ci_subtype);
1746 cp = context_find(buffer);
1747 if (cp == NULL || *cp == '\0') {
1748 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1750 cp = context_find(buffer);
1752 if (cp != NULL && *cp != '\0') {
1753 if (ce->ce_unlink) {
1755 ** Temporary file already exists, so we rename to
1756 ** version with extension.
1758 char *file_org = strdup(ce->ce_file);
1759 ce->ce_file = add(cp, ce->ce_file);
1760 if (rename(file_org, ce->ce_file)) {
1761 adios(ce->ce_file, "unable to rename %s to ",
1767 ce->ce_file = add(cp, ce->ce_file);
1771 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1772 content_error(ce->ce_file, ct,
1773 "unable to fopen for reading/writing");
1777 if ((len = ct->c_end - ct->c_begin) < 0)
1778 adios(NULL, "internal error(2)");
1781 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1782 content_error(ct->c_file, ct,
1783 "unable to open for reading");
1791 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1793 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1794 content_error(NULL, ct, "premature eof");
1798 if ((cc = strlen(buffer)) > len)
1802 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1807 for (; cp < ep; cp++) {
1809 /* in an escape sequence */
1811 /* at byte 1 of an escape sequence */
1812 mask = hex2nib[*cp & 0x7f];
1813 /* next is byte 2 */
1816 /* at byte 2 of an escape sequence */
1818 mask |= hex2nib[*cp & 0x7f];
1819 putc(mask, ce->ce_fp);
1820 if (ferror(ce->ce_fp)) {
1821 content_error(ce->ce_file, ct, "error writing to");
1825 ** finished escape sequence; next may
1826 ** be literal or a new escape sequence
1830 /* on to next byte */
1834 /* not in an escape sequence */
1837 ** starting an escape sequence,
1840 if (cp + 1 < ep && cp[1] == '\n') {
1841 /* "=\n" soft line break, eat the \n */
1845 if (cp + 1 >= ep || cp + 2 >= ep) {
1847 ** We don't have 2 bytes left,
1848 ** so this is an invalid escape
1849 ** sequence; just show the raw bytes
1852 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1854 ** Next 2 bytes are hex digits,
1855 ** making this a valid escape
1856 ** sequence; let's decode it (above).
1862 ** One or both of the next 2 is
1863 ** out of range, making this an
1864 ** invalid escape sequence; just
1865 ** show the raw bytes (below).
1870 /* Just show the raw byte. */
1871 putc(*cp, ce->ce_fp);
1872 if (ferror(ce->ce_fp)) {
1873 content_error(ce->ce_file, ct,
1874 "error writing to");
1880 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1884 fseek(ct->c_fp, 0L, SEEK_SET);
1886 if (fflush(ce->ce_fp)) {
1887 content_error(ce->ce_file, ct, "error writing to");
1891 fseek(ce->ce_fp, 0L, SEEK_SET);
1894 *file = ce->ce_file;
1899 return fileno(ce->ce_fp);
1902 free_encoding(ct, 0);
1918 if (init_encoding(ct, open7Bit) == NOTOK)
1921 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1927 open7Bit(CT ct, char **file)
1929 int cc, fd, len, own_ct_fp = 0;
1930 char buffer[BUFSIZ];
1931 /* sbeck -- handle suffixes */
1938 fseek(ce->ce_fp, 0L, SEEK_SET);
1943 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1944 content_error(ce->ce_file, ct,
1945 "unable to fopen for reading");
1951 if (*file == NULL) {
1952 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1955 ce->ce_file = getcpy(*file);
1959 /* sbeck@cise.ufl.edu -- handle suffixes */
1961 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1962 invo_name, ci->ci_type, ci->ci_subtype);
1963 cp = context_find(buffer);
1964 if (cp == NULL || *cp == '\0') {
1965 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1967 cp = context_find(buffer);
1969 if (cp != NULL && *cp != '\0') {
1970 if (ce->ce_unlink) {
1972 ** Temporary file already exists, so we rename to
1973 ** version with extension.
1975 char *file_org = strdup(ce->ce_file);
1976 ce->ce_file = add(cp, ce->ce_file);
1977 if (rename(file_org, ce->ce_file)) {
1978 adios(ce->ce_file, "unable to rename %s to ",
1984 ce->ce_file = add(cp, ce->ce_file);
1988 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1989 content_error(ce->ce_file, ct,
1990 "unable to fopen for reading/writing");
1994 if (ct->c_type == CT_MULTIPART) {
1996 CI ci = &ct->c_ctinfo;
1999 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2001 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2002 strlen(ci->ci_subtype);
2003 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2004 putc(';', ce->ce_fp);
2007 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2010 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2011 fputs("\n\t", ce->ce_fp);
2014 putc(' ', ce->ce_fp);
2017 fprintf(ce->ce_fp, "%s", buffer);
2021 if (ci->ci_comment) {
2022 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2024 fputs("\n\t", ce->ce_fp);
2027 putc(' ', ce->ce_fp);
2030 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2033 fprintf(ce->ce_fp, "\n");
2035 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2037 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2039 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2040 fprintf(ce->ce_fp, "\n");
2043 if ((len = ct->c_end - ct->c_begin) < 0)
2044 adios(NULL, "internal error(3)");
2047 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2048 content_error(ct->c_file, ct,
2049 "unable to open for reading");
2055 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2057 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2059 content_error(ct->c_file, ct, "error reading from");
2063 content_error(NULL, ct, "premature eof");
2071 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2072 if (ferror(ce->ce_fp)) {
2073 content_error(ce->ce_file, ct,
2074 "error writing to");
2079 fseek(ct->c_fp, 0L, SEEK_SET);
2081 if (fflush(ce->ce_fp)) {
2082 content_error(ce->ce_file, ct, "error writing to");
2086 fseek(ce->ce_fp, 0L, SEEK_SET);
2089 *file = ce->ce_file;
2094 return fileno(ce->ce_fp);
2097 free_encoding(ct, 0);