2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 char buf[BUFSIZ], name[NAMESZ];
240 /* allocate the content structure */
241 if (!(ct = (CT) calloc(1, sizeof(*ct))))
242 adios(EX_OSERR, NULL, "out of memory");
245 ct->c_file = getcpy(file);
246 ct->c_begin = ftell(ct->c_fp) + 1;
249 ** Parse the header fields for this
250 ** content into a linked list.
252 for (compnum = 1, state = FLD;;) {
253 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
259 /* get copies of the buffers */
263 /* if necessary, get rest of field */
264 while (state == FLDPLUS) {
265 state = m_getfld(state, name, buf,
267 vp = add(buf, vp); /* add to previous value */
270 /* Now add the header data to the list */
271 add_header(ct, np, vp);
273 /* continue, if this isn't the last header field */
274 if (state != FLDEOF) {
275 ct->c_begin = ftell(in) + 1;
282 ct->c_begin = ftell(in) - strlen(buf);
286 ct->c_begin = ftell(in);
291 adios(EX_DATAERR, NULL, "message format error in component #%d",
295 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
298 /* break out of the loop */
303 ** Read the content headers. We will parse the
304 ** MIME related header fields into their various
305 ** structures and set internal flags related to
306 ** content type/subtype, etc.
309 hp = ct->c_first_hf; /* start at first header field */
311 /* Get MIME-Version field */
312 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
315 unsigned char *cp, *dp;
318 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
321 ct->c_vrsn = getcpy(hp->value);
323 /* Now, cleanup this field */
328 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
330 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
335 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
337 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
340 for (dp = cp; istoken(*dp); dp++)
344 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
347 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
350 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
351 /* Get Content-Type field */
352 struct str2init *s2i;
353 CI ci = &ct->c_ctinfo;
355 /* Check if we've already seen a Content-Type header */
357 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
361 /* Parse the Content-Type field */
362 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
366 ** Set the Init function and the internal
367 ** flag for this content type.
369 for (s2i = str2cts; s2i->si_key; s2i++)
370 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
372 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
374 ct->c_type = s2i->si_val;
375 ct->c_ctinitfnx = s2i->si_init;
377 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
378 /* Get Content-Transfer-Encoding field */
380 unsigned char *cp, *dp;
381 struct str2init *s2i;
384 ** Check if we've already seen the
385 ** Content-Transfer-Encoding field
388 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
392 /* get copy of this field */
393 ct->c_celine = cp = getcpy(hp->value);
397 for (dp = cp; istoken(*dp); dp++)
403 ** Find the internal flag and Init function
404 ** for this transfer encoding.
406 for (s2i = str2ces; s2i->si_key; s2i++)
407 if (!mh_strcasecmp(cp, s2i->si_key))
409 if (!s2i->si_key && !uprf(cp, "X-"))
412 ct->c_encoding = s2i->si_val;
414 /* Call the Init function for this encoding */
415 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
418 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
419 /* Get Content-ID field */
420 ct->c_id = add(hp->value, ct->c_id);
422 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
423 /* Get Content-Description field */
424 ct->c_descr = add(hp->value, ct->c_descr);
426 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
427 /* Get Content-Disposition field */
428 ct->c_dispo = add(hp->value, ct->c_dispo);
432 hp = hp->next; /* next header field */
436 ** Check if we saw a Content-Type field.
437 ** If not, then assign a default value for
438 ** it, and the Init function.
442 ** If we are inside a multipart/digest message,
443 ** so default type is message/rfc822
446 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
448 ct->c_type = CT_MESSAGE;
449 ct->c_ctinitfnx = InitMessage;
452 ** Else default type is text/plain
454 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
456 ct->c_type = CT_TEXT;
457 ct->c_ctinitfnx = InitText;
461 /* Use default Transfer-Encoding, if necessary */
463 ct->c_encoding = CE_7BIT;
476 ** small routine to add header field to list
480 add_header(CT ct, char *name, char *value)
484 /* allocate header field structure */
485 hp = mh_xmalloc(sizeof(*hp));
487 /* link data into header structure */
492 /* link header structure into the list */
493 if (ct->c_first_hf == NULL) {
494 ct->c_first_hf = hp; /* this is the first */
497 ct->c_last_hf->next = hp; /* add it to the end */
506 ** Make sure that buf contains at least one appearance of name,
507 ** followed by =. If not, insert both name and value, just after
508 ** first semicolon, if any. Note that name should not contain a
509 ** trailing =. And quotes will be added around the value. Typical
510 ** usage: make sure that a Content-Disposition header contains
511 ** filename="foo". If it doesn't and value does, use value from
515 incl_name_value(unsigned char *buf, char *name, char *value) {
518 /* Assume that name is non-null. */
520 char *name_plus_equal = concat(name, "=", NULL);
522 if (!strstr(buf, name_plus_equal)) {
525 char *prefix, *suffix;
527 /* Trim trailing space, esp. newline. */
528 for (cp = &buf[strlen(buf) - 1];
529 cp >= buf && isspace(*cp); --cp) {
533 insertion = concat("; ", name, "=", "\"", value, "\"",
537 ** Insert at first semicolon, if any.
538 ** If none, append to end.
540 prefix = getcpy(buf);
541 if ((cp = strchr(prefix, ';'))) {
542 suffix = concat(cp, NULL);
544 newbuf = concat(prefix, insertion, suffix,
549 newbuf = concat(buf, insertion, "\n", NULL);
557 free(name_plus_equal);
564 ** Extract just name_suffix="foo", if any, from value. If there isn't
565 ** one, return the entire value. Note that, for example, a name_suffix
566 ** of name will match filename="foo", and return foo.
569 extract_name_value(char *name_suffix, char *value) {
570 char *extracted_name_value = value;
571 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
572 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
575 free(name_suffix_plus_quote);
576 if (name_suffix_equals) {
577 char *name_suffix_begin;
580 for (cp = name_suffix_equals; *cp != '"'; ++cp)
582 name_suffix_begin = ++cp;
583 /* Find second \". */
584 for (; *cp != '"'; ++cp)
587 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
588 memcpy(extracted_name_value, name_suffix_begin,
589 cp - name_suffix_begin);
590 extracted_name_value[cp - name_suffix_begin] = '\0';
593 return extracted_name_value;
597 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
598 ** directives. Fills in the information of the CTinfo structure.
601 get_ctinfo(unsigned char *cp, CT ct, int magic)
610 i = strlen(invo_name) + 2;
612 /* store copy of Content-Type line */
613 cp = ct->c_ctline = getcpy(cp);
615 while (isspace(*cp)) /* trim leading spaces */
618 /* change newlines to spaces */
619 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
622 /* trim trailing spaces */
623 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
629 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
631 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
634 for (dp = cp; istoken(*dp); dp++)
637 ci->ci_type = getcpy(cp); /* store content type */
641 advise(NULL, "invalid %s: field in message %s (empty type)",
642 TYPE_FIELD, ct->c_file);
646 /* down case the content type string */
647 for (dp = ci->ci_type; *dp; dp++)
648 if (isalpha(*dp) && isupper(*dp))
654 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
659 ci->ci_subtype = getcpy("");
667 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
670 for (dp = cp; istoken(*dp); dp++)
673 ci->ci_subtype = getcpy(cp); /* store the content subtype */
676 if (!*ci->ci_subtype) {
677 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
681 /* down case the content subtype string */
682 for (dp = ci->ci_subtype; *dp; dp++)
683 if (isalpha(*dp) && isupper(*dp))
690 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
694 ** Parse attribute/value pairs given with Content-Type
696 ep = (ap = ci->ci_attrs) + NPARMS;
702 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
710 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
714 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
718 /* down case the attribute name */
719 for (dp = cp; istoken(*dp); dp++)
720 if (isalpha(*dp) && isupper(*dp))
723 for (up = dp; isspace(*dp);)
725 if (dp == cp || *dp != '=') {
726 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
730 vp = (*ap = getcpy(cp)) + (up - cp);
732 for (dp++; isspace(*dp);)
735 /* now add the attribute value */
736 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
739 for (cp = ++dp, dp = vp;;) {
743 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
748 if ((c = *cp++) == '\0')
763 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
768 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
776 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
781 ** Get any <Content-Id> given in buffer
783 if (magic && *cp == '<') {
788 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
789 advise(NULL, "invalid ID in message %s", ct->c_file);
795 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
806 ** Get any [Content-Description] given in buffer.
808 if (magic && *cp == '[') {
810 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
814 advise(NULL, "invalid description in message %s",
823 ct->c_descr = concat(ct->c_descr, "\n", NULL);
834 ** Get any {Content-Disposition} given in buffer.
836 if (magic && *cp == '{') {
838 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
842 advise(NULL, "invalid disposition in message %s",
851 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
862 ** Check if anything is left over
866 ci->ci_magic = getcpy(cp);
869 ** If there is a Content-Disposition header and
870 ** it doesn't have a *filename=, extract it from
871 ** the magic contents. The mhbasename call skips
872 ** any leading directory components.
875 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
877 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
885 get_comment(CT ct, unsigned char **ap, int istype)
890 char c, buffer[BUFSIZ], *dp;
902 advise(NULL, "invalid comment in message %s's %s: field",
903 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
908 if ((c = *cp++) == '\0')
931 if ((dp = ci->ci_comment)) {
932 ci->ci_comment = concat(dp, " ", buffer, NULL);
935 ci->ci_comment = getcpy(buffer);
950 ** Handles content types audio, image, and video.
951 ** There's not much to do right here.
957 return OK; /* not much to do here */
971 CI ci = &ct->c_ctinfo;
973 /* check for missing subtype */
974 if (!*ci->ci_subtype)
975 ci->ci_subtype = add("plain", ci->ci_subtype);
978 for (kv = SubText; kv->kv_key; kv++)
979 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
981 ct->c_subtype = kv->kv_value;
983 /* allocate text character set structure */
984 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
985 adios(EX_OSERR, NULL, "out of memory");
986 ct->c_ctparams = (void *) t;
988 /* scan for charset parameter */
989 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
990 if (!mh_strcasecmp(*ap, "charset"))
993 /* check if content specified a character set */
996 ct->c_charset = getcpy(norm_charmap(*ep));
997 /* match character set or set to CHARSET_UNKNOWN */
998 for (kv = Charset; kv->kv_key; kv++) {
999 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1003 t->tx_charset = kv->kv_value;
1005 t->tx_charset = CHARSET_UNSPECIFIED;
1017 InitMultiPart(CT ct)
1021 unsigned char *cp, *dp;
1023 char *bp, buffer[BUFSIZ];
1024 struct multipart *m;
1026 struct part *part, **next;
1027 CI ci = &ct->c_ctinfo;
1032 ** The encoding for multipart messages must be either
1033 ** 7bit, 8bit, or binary (per RFC2045).
1035 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1036 && ct->c_encoding != CE_BINARY) {
1037 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1042 for (kv = SubMultiPart; kv->kv_key; kv++)
1043 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1045 ct->c_subtype = kv->kv_value;
1048 ** Check for "boundary" parameter, which is
1049 ** required for multipart messages.
1052 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1053 if (!mh_strcasecmp(*ap, "boundary")) {
1059 /* complain if boundary parameter is missing */
1061 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1065 /* allocate primary structure for multipart info */
1066 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1067 adios(EX_OSERR, NULL, "out of memory");
1068 ct->c_ctparams = (void *) m;
1070 /* check if boundary parameter contains only whitespace characters */
1071 for (cp = bp; isspace(*cp); cp++)
1074 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1078 /* remove trailing whitespace from boundary parameter */
1079 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1084 /* record boundary separators */
1085 m->mp_start = concat(bp, "\n", NULL);
1086 m->mp_stop = concat(bp, "--\n", NULL);
1088 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1089 advise(ct->c_file, "unable to open for reading");
1093 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1095 next = &m->mp_parts;
1099 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1103 pos += strlen(buffer);
1104 if (buffer[0] != '-' || buffer[1] != '-')
1107 if (strcmp(buffer + 2, m->mp_start)!=0)
1110 if ((part = (struct part *) calloc(1, sizeof(*part)))
1112 adios(EX_OSERR, NULL, "out of memory");
1114 next = &part->mp_next;
1116 if (!(p = get_content(fp, ct->c_file,
1117 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1124 fseek(fp, pos, SEEK_SET);
1127 if (strcmp(buffer + 2, m->mp_start) == 0) {
1131 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1132 if (p->c_end < p->c_begin)
1133 p->c_begin = p->c_end;
1138 if (strcmp(buffer + 2, m->mp_stop) == 0)
1144 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1145 if (!inout && part) {
1147 p->c_end = ct->c_end;
1149 if (p->c_begin >= p->c_end) {
1150 for (next = &m->mp_parts; *next != part;
1151 next = &((*next)->mp_next))
1155 free((char *) part);
1160 /* reverse the order of the parts for multipart/alternative */
1161 if (ct->c_subtype == MULTI_ALTERNATE)
1165 ** label all subparts with part number, and
1166 ** then initialize the content of the subpart.
1171 char partnam[BUFSIZ];
1174 snprintf(partnam, sizeof(partnam), "%s.",
1176 pp = partnam + strlen(partnam);
1181 for (part = m->mp_parts, partnum = 1; part;
1182 part = part->mp_next, partnum++) {
1185 sprintf(pp, "%d", partnum);
1186 p->c_partno = getcpy(partnam);
1188 /* initialize the content of the subparts */
1189 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1204 ** reverse the order of the parts of a multipart
1208 reverse_parts(CT ct)
1211 struct multipart *m;
1212 struct part **base, **bmp, **next, *part;
1214 m = (struct multipart *) ct->c_ctparams;
1216 /* if only one part, just return */
1217 if (!m->mp_parts || !m->mp_parts->mp_next)
1220 /* count number of parts */
1222 for (part = m->mp_parts; part; part = part->mp_next)
1225 /* allocate array of pointers to the parts */
1226 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1227 adios(EX_OSERR, NULL, "out of memory");
1230 /* point at all the parts */
1231 for (part = m->mp_parts; part; part = part->mp_next)
1235 /* reverse the order of the parts */
1236 next = &m->mp_parts;
1237 for (bmp--; bmp >= base; bmp--) {
1240 next = &part->mp_next;
1244 /* free array of pointers */
1245 free((char *) base);
1257 CI ci = &ct->c_ctinfo;
1259 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1260 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1264 /* check for missing subtype */
1265 if (!*ci->ci_subtype)
1266 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1269 for (kv = SubMessage; kv->kv_key; kv++)
1270 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1272 ct->c_subtype = kv->kv_value;
1274 switch (ct->c_subtype) {
1275 case MESSAGE_RFC822:
1278 case MESSAGE_PARTIAL:
1283 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1284 adios(EX_OSERR, NULL, "out of memory");
1285 ct->c_ctparams = (void *) p;
1288 ** scan for parameters "id", "number",
1291 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1292 if (!mh_strcasecmp(*ap, "id")) {
1293 p->pm_partid = getcpy(*ep);
1296 if (!mh_strcasecmp(*ap, "number")) {
1297 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1299 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1304 if (!mh_strcasecmp(*ap, "total")) {
1305 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1312 if (!p->pm_partid || !p->pm_partno
1313 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1314 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1320 case MESSAGE_EXTERNAL:
1325 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1326 advise(ct->c_file, "unable to open for reading");
1330 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1332 if (!(p = get_content(fp, ct->c_file, 0))) {
1338 p->c_end = p->c_begin;
1343 switch (p->c_type) {
1348 if (p->c_subtype != MESSAGE_RFC822)
1353 (*p->c_ctinitfnx) (p);
1372 InitApplication(CT ct)
1375 CI ci = &ct->c_ctinfo;
1378 for (kv = SubApplication; kv->kv_key; kv++)
1379 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1381 ct->c_subtype = kv->kv_value;
1388 ** TRANSFER ENCODINGS
1392 init_encoding(CT ct, OpenCEFunc openfnx)
1396 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1397 adios(EX_OSERR, NULL, "out of memory");
1400 ct->c_ceopenfnx = openfnx;
1401 ct->c_ceclosefnx = close_encoding;
1402 ct->c_cesizefnx = size_encoding;
1409 close_encoding(CT ct)
1413 if (!(ce = ct->c_cefile))
1423 static unsigned long
1424 size_encoding(CT ct)
1432 if (!(ce = ct->c_cefile))
1433 return (ct->c_end - ct->c_begin);
1435 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1436 return (long) st.st_size;
1439 if (stat(ce->ce_file, &st) != NOTOK)
1440 return (long) st.st_size;
1445 if (ct->c_encoding == CE_EXTERNAL)
1446 return (ct->c_end - ct->c_begin);
1449 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1450 return (ct->c_end - ct->c_begin);
1452 if (fstat(fd, &st) != NOTOK)
1453 size = (long) st.st_size;
1457 (*ct->c_ceclosefnx) (ct);
1466 static unsigned char b642nib[0x80] = {
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1473 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1474 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1475 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1476 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1477 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1478 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1479 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1480 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1481 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1482 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1489 return init_encoding(ct, openBase64);
1494 openBase64(CT ct, char **file)
1497 int fd, len, skip, own_ct_fp = 0;
1499 unsigned char value, *b, *b1, *b2, *b3;
1500 unsigned char *cp, *ep;
1501 char buffer[BUFSIZ];
1502 /* sbeck -- handle suffixes */
1506 b = (unsigned char *) &bits;
1507 b1 = &b[endian > 0 ? 1 : 2];
1508 b2 = &b[endian > 0 ? 2 : 1];
1509 b3 = &b[endian > 0 ? 3 : 0];
1513 fseek(ce->ce_fp, 0L, SEEK_SET);
1518 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1519 content_error(ce->ce_file, ct,
1520 "unable to fopen for reading");
1526 if (*file == NULL) {
1527 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1530 ce->ce_file = getcpy(*file);
1534 /* sbeck@cise.ufl.edu -- handle suffixes */
1536 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1537 invo_name, ci->ci_type, ci->ci_subtype);
1538 cp = context_find(buffer);
1539 if (cp == NULL || *cp == '\0') {
1540 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1542 cp = context_find(buffer);
1544 if (cp != NULL && *cp != '\0') {
1545 if (ce->ce_unlink) {
1547 ** Temporary file already exists, so we rename to
1548 ** version with extension.
1550 char *file_org = strdup(ce->ce_file);
1551 ce->ce_file = add(cp, ce->ce_file);
1552 if (rename(file_org, ce->ce_file)) {
1553 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1559 ce->ce_file = add(cp, ce->ce_file);
1563 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1564 content_error(ce->ce_file, ct,
1565 "unable to fopen for reading/writing");
1569 if ((len = ct->c_end - ct->c_begin) < 0)
1570 adios(EX_SOFTWARE, NULL, "internal error(1)");
1573 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1574 content_error(ct->c_file, ct,
1575 "unable to open for reading");
1585 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1587 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1589 content_error(ct->c_file, ct, "error reading from");
1593 content_error(NULL, ct, "premature eof");
1601 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1606 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1608 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1610 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1614 bits |= value << bitno;
1616 if ((bitno -= 6) < 0) {
1617 putc((char) *b1, ce->ce_fp);
1619 putc((char) *b2, ce->ce_fp);
1621 putc((char) *b3, ce->ce_fp);
1625 if (ferror(ce->ce_fp)) {
1626 content_error(ce->ce_file, ct,
1627 "error writing to");
1630 bitno = 18, bits = 0L, skip = 0;
1636 goto self_delimiting;
1645 fprintf(stderr, "premature ending (bitno %d)\n",
1648 content_error(NULL, ct, "invalid BASE64 encoding");
1653 fseek(ct->c_fp, 0L, SEEK_SET);
1655 if (fflush(ce->ce_fp)) {
1656 content_error(ce->ce_file, ct, "error writing to");
1660 fseek(ce->ce_fp, 0L, SEEK_SET);
1663 *file = ce->ce_file;
1668 return fileno(ce->ce_fp);
1671 free_encoding(ct, 0);
1684 static char hex2nib[0x80] = {
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1692 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1707 return init_encoding(ct, openQuoted);
1712 openQuoted(CT ct, char **file)
1714 int cc, len, quoted, own_ct_fp = 0;
1715 unsigned char *cp, *ep;
1716 char buffer[BUFSIZ];
1717 unsigned char mask = 0;
1719 /* sbeck -- handle suffixes */
1724 fseek(ce->ce_fp, 0L, SEEK_SET);
1729 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1730 content_error(ce->ce_file, ct,
1731 "unable to fopen for reading");
1737 if (*file == NULL) {
1738 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1741 ce->ce_file = getcpy(*file);
1745 /* sbeck@cise.ufl.edu -- handle suffixes */
1747 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1748 invo_name, ci->ci_type, ci->ci_subtype);
1749 cp = context_find(buffer);
1750 if (cp == NULL || *cp == '\0') {
1751 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1753 cp = context_find(buffer);
1755 if (cp != NULL && *cp != '\0') {
1756 if (ce->ce_unlink) {
1758 ** Temporary file already exists, so we rename to
1759 ** version with extension.
1761 char *file_org = strdup(ce->ce_file);
1762 ce->ce_file = add(cp, ce->ce_file);
1763 if (rename(file_org, ce->ce_file)) {
1764 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1770 ce->ce_file = add(cp, ce->ce_file);
1774 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1775 content_error(ce->ce_file, ct,
1776 "unable to fopen for reading/writing");
1780 if ((len = ct->c_end - ct->c_begin) < 0)
1781 adios(EX_SOFTWARE, NULL, "internal error(2)");
1784 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1785 content_error(ct->c_file, ct,
1786 "unable to open for reading");
1794 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1796 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1797 content_error(NULL, ct, "premature eof");
1801 if ((cc = strlen(buffer)) > len)
1805 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1810 for (; cp < ep; cp++) {
1812 /* in an escape sequence */
1814 /* at byte 1 of an escape sequence */
1815 mask = hex2nib[*cp & 0x7f];
1816 /* next is byte 2 */
1819 /* at byte 2 of an escape sequence */
1821 mask |= hex2nib[*cp & 0x7f];
1822 putc(mask, ce->ce_fp);
1823 if (ferror(ce->ce_fp)) {
1824 content_error(ce->ce_file, ct, "error writing to");
1828 ** finished escape sequence; next may
1829 ** be literal or a new escape sequence
1833 /* on to next byte */
1837 /* not in an escape sequence */
1840 ** starting an escape sequence,
1843 if (cp + 1 < ep && cp[1] == '\n') {
1844 /* "=\n" soft line break, eat the \n */
1848 if (cp + 1 >= ep || cp + 2 >= ep) {
1850 ** We don't have 2 bytes left,
1851 ** so this is an invalid escape
1852 ** sequence; just show the raw bytes
1855 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1857 ** Next 2 bytes are hex digits,
1858 ** making this a valid escape
1859 ** sequence; let's decode it (above).
1865 ** One or both of the next 2 is
1866 ** out of range, making this an
1867 ** invalid escape sequence; just
1868 ** show the raw bytes (below).
1873 /* Just show the raw byte. */
1874 putc(*cp, ce->ce_fp);
1875 if (ferror(ce->ce_fp)) {
1876 content_error(ce->ce_file, ct,
1877 "error writing to");
1883 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1887 fseek(ct->c_fp, 0L, SEEK_SET);
1889 if (fflush(ce->ce_fp)) {
1890 content_error(ce->ce_file, ct, "error writing to");
1894 fseek(ce->ce_fp, 0L, SEEK_SET);
1897 *file = ce->ce_file;
1902 return fileno(ce->ce_fp);
1905 free_encoding(ct, 0);
1921 if (init_encoding(ct, open7Bit) == NOTOK)
1924 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1930 open7Bit(CT ct, char **file)
1932 int cc, fd, len, own_ct_fp = 0;
1933 char buffer[BUFSIZ];
1934 /* sbeck -- handle suffixes */
1941 fseek(ce->ce_fp, 0L, SEEK_SET);
1946 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1947 content_error(ce->ce_file, ct,
1948 "unable to fopen for reading");
1954 if (*file == NULL) {
1955 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1958 ce->ce_file = getcpy(*file);
1962 /* sbeck@cise.ufl.edu -- handle suffixes */
1964 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1965 invo_name, ci->ci_type, ci->ci_subtype);
1966 cp = context_find(buffer);
1967 if (cp == NULL || *cp == '\0') {
1968 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1970 cp = context_find(buffer);
1972 if (cp != NULL && *cp != '\0') {
1973 if (ce->ce_unlink) {
1975 ** Temporary file already exists, so we rename to
1976 ** version with extension.
1978 char *file_org = strdup(ce->ce_file);
1979 ce->ce_file = add(cp, ce->ce_file);
1980 if (rename(file_org, ce->ce_file)) {
1981 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1987 ce->ce_file = add(cp, ce->ce_file);
1991 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1992 content_error(ce->ce_file, ct,
1993 "unable to fopen for reading/writing");
1997 if (ct->c_type == CT_MULTIPART) {
1999 CI ci = &ct->c_ctinfo;
2002 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2004 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2005 strlen(ci->ci_subtype);
2006 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2007 putc(';', ce->ce_fp);
2010 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2013 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2014 fputs("\n\t", ce->ce_fp);
2017 putc(' ', ce->ce_fp);
2020 fprintf(ce->ce_fp, "%s", buffer);
2024 if (ci->ci_comment) {
2025 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2027 fputs("\n\t", ce->ce_fp);
2030 putc(' ', ce->ce_fp);
2033 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2036 fprintf(ce->ce_fp, "\n");
2038 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2040 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2042 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2043 fprintf(ce->ce_fp, "\n");
2046 if ((len = ct->c_end - ct->c_begin) < 0)
2047 adios(EX_SOFTWARE, NULL, "internal error(3)");
2050 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2051 content_error(ct->c_file, ct,
2052 "unable to open for reading");
2058 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2060 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2062 content_error(ct->c_file, ct, "error reading from");
2066 content_error(NULL, ct, "premature eof");
2074 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2075 if (ferror(ce->ce_fp)) {
2076 content_error(ce->ce_file, ct,
2077 "error writing to");
2082 fseek(ct->c_fp, 0L, SEEK_SET);
2084 if (fflush(ce->ce_fp)) {
2085 content_error(ce->ce_file, ct, "error writing to");
2089 fseek(ce->ce_fp, 0L, SEEK_SET);
2092 *file = ce->ce_file;
2097 return fileno(ce->ce_fp);
2100 free_encoding(ct, 0);