2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 char buf[BUFSIZ], name[NAMESZ];
240 /* allocate the content structure */
241 if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242 adios(EX_OSERR, NULL, "out of memory");
245 ct->c_file = getcpy(file);
246 ct->c_begin = ftell(ct->c_fp) + 1;
249 ** Parse the header fields for this
250 ** content into a linked list.
252 for (compnum = 1, state = FLD;;) {
253 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
259 /* get copies of the buffers */
263 /* if necessary, get rest of field */
264 while (state == FLDPLUS) {
265 state = m_getfld(state, name, buf,
267 vp = add(buf, vp); /* add to previous value */
270 /* Now add the header data to the list */
271 add_header(ct, np, vp);
273 /* continue, if this isn't the last header field */
274 if (state != FLDEOF) {
275 ct->c_begin = ftell(in) + 1;
282 ct->c_begin = ftell(in) - strlen(buf);
286 ct->c_begin = ftell(in);
291 adios(EX_DATAERR, NULL, "message format error in component #%d",
295 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
298 /* break out of the loop */
303 ** Read the content headers. We will parse the
304 ** MIME related header fields into their various
305 ** structures and set internal flags related to
306 ** content type/subtype, etc.
309 hp = ct->c_first_hf; /* start at first header field */
311 /* Get MIME-Version field */
312 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
315 unsigned char *cp, *dp;
318 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
321 ct->c_vrsn = getcpy(hp->value);
323 /* Now, cleanup this field */
328 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
330 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
335 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
337 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
340 for (dp = cp; istoken(*dp); dp++)
344 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
347 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
350 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
351 /* Get Content-Type field */
352 struct str2init *s2i;
353 CI ci = &ct->c_ctinfo;
355 /* Check if we've already seen a Content-Type header */
357 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
361 /* Parse the Content-Type field */
362 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
366 ** Set the Init function and the internal
367 ** flag for this content type.
369 for (s2i = str2cts; s2i->si_key; s2i++)
370 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
372 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
374 ct->c_type = s2i->si_val;
375 ct->c_ctinitfnx = s2i->si_init;
377 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
378 /* Get Content-Transfer-Encoding field */
380 unsigned char *cp, *dp;
381 struct str2init *s2i;
384 ** Check if we've already seen the
385 ** Content-Transfer-Encoding field
388 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
392 /* get copy of this field */
393 ct->c_celine = cp = getcpy(hp->value);
397 for (dp = cp; istoken(*dp); dp++)
403 ** Find the internal flag and Init function
404 ** for this transfer encoding.
406 for (s2i = str2ces; s2i->si_key; s2i++)
407 if (!mh_strcasecmp(cp, s2i->si_key))
409 if (!s2i->si_key && !uprf(cp, "X-"))
412 ct->c_encoding = s2i->si_val;
414 /* Call the Init function for this encoding */
415 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
418 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
419 /* Get Content-ID field */
420 ct->c_id = add(hp->value, ct->c_id);
422 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
423 /* Get Content-Description field */
424 ct->c_descr = add(hp->value, ct->c_descr);
426 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
427 /* Get Content-Disposition field */
428 ct->c_dispo = add(hp->value, ct->c_dispo);
432 hp = hp->next; /* next header field */
436 ** Check if we saw a Content-Type field.
437 ** If not, then assign a default value for
438 ** it, and the Init function.
442 ** If we are inside a multipart/digest message,
443 ** so default type is message/rfc822
446 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
448 ct->c_type = CT_MESSAGE;
449 ct->c_ctinitfnx = InitMessage;
452 ** Else default type is text/plain
454 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
456 ct->c_type = CT_TEXT;
457 ct->c_ctinitfnx = InitText;
461 /* Use default Transfer-Encoding, if necessary */
463 ct->c_encoding = CE_7BIT;
476 ** small routine to add header field to list
480 add_header(CT ct, char *name, char *value)
484 /* allocate header field structure */
485 hp = mh_xmalloc(sizeof(*hp));
487 /* link data into header structure */
492 /* link header structure into the list */
493 if (ct->c_first_hf == NULL) {
494 ct->c_first_hf = hp; /* this is the first */
497 ct->c_last_hf->next = hp; /* add it to the end */
506 ** Make sure that buf contains at least one appearance of name,
507 ** followed by =. If not, insert both name and value, just after
508 ** first semicolon, if any. Note that name should not contain a
509 ** trailing =. And quotes will be added around the value. Typical
510 ** usage: make sure that a Content-Disposition header contains
511 ** filename="foo". If it doesn't and value does, use value from
515 incl_name_value(unsigned char *buf, char *name, char *value) {
518 /* Assume that name is non-null. */
520 char *name_plus_equal = concat(name, "=", NULL);
522 if (!strstr(buf, name_plus_equal)) {
525 char *prefix, *suffix;
527 /* Trim trailing space, esp. newline. */
528 for (cp = &buf[strlen(buf) - 1];
529 cp >= buf && isspace(*cp); --cp) {
533 insertion = concat("; ", name, "=", "\"", value, "\"",
537 ** Insert at first semicolon, if any.
538 ** If none, append to end.
540 prefix = getcpy(buf);
541 if ((cp = strchr(prefix, ';'))) {
542 suffix = concat(cp, NULL);
544 newbuf = concat(prefix, insertion, suffix,
549 newbuf = concat(buf, insertion, "\n", NULL);
557 free(name_plus_equal);
564 ** Extract just name_suffix="foo", if any, from value. If there isn't
565 ** one, return the entire value. Note that, for example, a name_suffix
566 ** of name will match filename="foo", and return foo.
569 extract_name_value(char *name_suffix, char *value) {
570 char *extracted_name_value = value;
571 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
572 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
575 free(name_suffix_plus_quote);
576 if (name_suffix_equals) {
577 char *name_suffix_begin;
580 for (cp = name_suffix_equals; *cp != '"'; ++cp)
582 name_suffix_begin = ++cp;
583 /* Find second \". */
584 for (; *cp != '"'; ++cp)
587 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
588 memcpy(extracted_name_value, name_suffix_begin,
589 cp - name_suffix_begin);
590 extracted_name_value[cp - name_suffix_begin] = '\0';
593 return extracted_name_value;
597 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
598 ** directives. Fills in the information of the CTinfo structure.
601 get_ctinfo(unsigned char *cp, CT ct, int magic)
610 i = strlen(invo_name) + 2;
612 /* store copy of Content-Type line */
613 cp = ct->c_ctline = getcpy(cp);
615 while (isspace(*cp)) /* trim leading spaces */
618 /* change newlines to spaces */
619 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
622 /* trim trailing spaces */
623 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
629 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
631 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
634 for (dp = cp; istoken(*dp); dp++)
637 ci->ci_type = getcpy(cp); /* store content type */
641 advise(NULL, "invalid %s: field in message %s (empty type)",
642 TYPE_FIELD, ct->c_file);
646 /* down case the content type string */
647 for (dp = ci->ci_type; *dp; dp++)
648 if (isalpha(*dp) && isupper(*dp))
654 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
659 ci->ci_subtype = getcpy("");
667 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
670 for (dp = cp; istoken(*dp); dp++)
673 ci->ci_subtype = getcpy(cp); /* store the content subtype */
676 if (!*ci->ci_subtype) {
677 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
681 /* down case the content subtype string */
682 for (dp = ci->ci_subtype; *dp; dp++)
683 if (isalpha(*dp) && isupper(*dp))
690 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
694 ** Parse attribute/value pairs given with Content-Type
696 ep = (ap = ci->ci_attrs) + NPARMS;
702 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
710 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
714 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
718 /* down case the attribute name */
719 for (dp = cp; istoken(*dp); dp++)
720 if (isalpha(*dp) && isupper(*dp))
723 for (up = dp; isspace(*dp);)
725 if (dp == cp || *dp != '=') {
726 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
730 vp = (*ap = getcpy(cp)) + (up - cp);
732 for (dp++; isspace(*dp);)
735 /* now add the attribute value */
736 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
739 for (cp = ++dp, dp = vp;;) {
743 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
748 if ((c = *cp++) == '\0')
763 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
768 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
769 *ci->ci_values[ap - ci->ci_attrs] = '\0';
770 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
778 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
783 ** Get any <Content-Id> given in buffer
785 if (magic && *cp == '<') {
790 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
791 advise(NULL, "invalid ID in message %s", ct->c_file);
797 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
808 ** Get any [Content-Description] given in buffer.
810 if (magic && *cp == '[') {
812 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
816 advise(NULL, "invalid description in message %s",
825 ct->c_descr = concat(ct->c_descr, "\n", NULL);
836 ** Get any {Content-Disposition} given in buffer.
838 if (magic && *cp == '{') {
840 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
844 advise(NULL, "invalid disposition in message %s",
853 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
864 ** Check if anything is left over
868 ci->ci_magic = getcpy(cp);
871 ** If there is a Content-Disposition header and
872 ** it doesn't have a *filename=, extract it from
873 ** the magic contents. The mhbasename call skips
874 ** any leading directory components.
877 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
879 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
887 get_comment(CT ct, unsigned char **ap, int istype)
892 char c, buffer[BUFSIZ], *dp;
904 advise(NULL, "invalid comment in message %s's %s: field",
905 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
910 if ((c = *cp++) == '\0')
933 if ((dp = ci->ci_comment)) {
934 ci->ci_comment = concat(dp, " ", buffer, NULL);
937 ci->ci_comment = getcpy(buffer);
952 ** Handles content types audio, image, and video.
953 ** There's not much to do right here.
959 return OK; /* not much to do here */
973 CI ci = &ct->c_ctinfo;
975 /* check for missing subtype */
976 if (!*ci->ci_subtype)
977 ci->ci_subtype = add("plain", ci->ci_subtype);
980 for (kv = SubText; kv->kv_key; kv++)
981 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
983 ct->c_subtype = kv->kv_value;
985 /* allocate text character set structure */
986 if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
987 adios(EX_OSERR, NULL, "out of memory");
988 ct->c_ctparams = (void *) t;
990 /* scan for charset parameter */
991 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
992 if (!mh_strcasecmp(*ap, "charset"))
995 /* check if content specified a character set */
998 ct->c_charset = getcpy(norm_charmap(*ep));
999 /* match character set or set to CHARSET_UNKNOWN */
1000 for (kv = Charset; kv->kv_key; kv++) {
1001 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1005 t->tx_charset = kv->kv_value;
1007 t->tx_charset = CHARSET_UNSPECIFIED;
1019 InitMultiPart(CT ct)
1023 unsigned char *cp, *dp;
1025 char *bp, buffer[BUFSIZ];
1026 struct multipart *m;
1028 struct part *part, **next;
1029 CI ci = &ct->c_ctinfo;
1034 ** The encoding for multipart messages must be either
1035 ** 7bit, 8bit, or binary (per RFC2045).
1037 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1038 && ct->c_encoding != CE_BINARY) {
1039 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1040 ct->c_encoding = CE_7BIT;
1044 for (kv = SubMultiPart; kv->kv_key; kv++)
1045 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1047 ct->c_subtype = kv->kv_value;
1050 ** Check for "boundary" parameter, which is
1051 ** required for multipart messages.
1054 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1055 if (!mh_strcasecmp(*ap, "boundary")) {
1061 /* complain if boundary parameter is missing */
1063 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1067 /* allocate primary structure for multipart info */
1068 if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1069 adios(EX_OSERR, NULL, "out of memory");
1070 ct->c_ctparams = (void *) m;
1072 /* check if boundary parameter contains only whitespace characters */
1073 for (cp = bp; isspace(*cp); cp++)
1076 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1080 /* remove trailing whitespace from boundary parameter */
1081 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1086 /* record boundary separators */
1087 m->mp_start = concat(bp, "\n", NULL);
1088 m->mp_stop = concat(bp, "--\n", NULL);
1090 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1091 advise(ct->c_file, "unable to open for reading");
1095 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1097 next = &m->mp_parts;
1101 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1105 pos += strlen(buffer);
1106 if (buffer[0] != '-' || buffer[1] != '-')
1109 if (strcmp(buffer + 2, m->mp_start)!=0)
1112 if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1114 adios(EX_OSERR, NULL, "out of memory");
1116 next = &part->mp_next;
1118 if (!(p = get_content(fp, ct->c_file,
1119 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1126 fseek(fp, pos, SEEK_SET);
1129 if (strcmp(buffer + 2, m->mp_start) == 0) {
1133 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1134 if (p->c_end < p->c_begin)
1135 p->c_begin = p->c_end;
1140 if (strcmp(buffer + 2, m->mp_stop) == 0)
1146 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1147 if (!inout && part) {
1149 p->c_end = ct->c_end;
1151 if (p->c_begin >= p->c_end) {
1152 for (next = &m->mp_parts; *next != part;
1153 next = &((*next)->mp_next))
1157 free((char *) part);
1162 /* reverse the order of the parts for multipart/alternative */
1163 if (ct->c_subtype == MULTI_ALTERNATE)
1167 ** label all subparts with part number, and
1168 ** then initialize the content of the subpart.
1173 char partnam[BUFSIZ];
1176 snprintf(partnam, sizeof(partnam), "%s.",
1178 pp = partnam + strlen(partnam);
1183 for (part = m->mp_parts, partnum = 1; part;
1184 part = part->mp_next, partnum++) {
1187 sprintf(pp, "%d", partnum);
1188 p->c_partno = getcpy(partnam);
1190 /* initialize the content of the subparts */
1191 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1206 ** reverse the order of the parts of a multipart
1210 reverse_parts(CT ct)
1213 struct multipart *m;
1214 struct part **base, **bmp, **next, *part;
1216 m = (struct multipart *) ct->c_ctparams;
1218 /* if only one part, just return */
1219 if (!m->mp_parts || !m->mp_parts->mp_next)
1222 /* count number of parts */
1224 for (part = m->mp_parts; part; part = part->mp_next)
1227 /* allocate array of pointers to the parts */
1228 if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1229 adios(EX_OSERR, NULL, "out of memory");
1232 /* point at all the parts */
1233 for (part = m->mp_parts; part; part = part->mp_next)
1237 /* reverse the order of the parts */
1238 next = &m->mp_parts;
1239 for (bmp--; bmp >= base; bmp--) {
1242 next = &part->mp_next;
1246 /* free array of pointers */
1247 free((char *) base);
1259 CI ci = &ct->c_ctinfo;
1261 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1262 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1266 /* check for missing subtype */
1267 if (!*ci->ci_subtype)
1268 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1271 for (kv = SubMessage; kv->kv_key; kv++)
1272 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1274 ct->c_subtype = kv->kv_value;
1276 switch (ct->c_subtype) {
1277 case MESSAGE_RFC822:
1280 case MESSAGE_PARTIAL:
1285 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1286 adios(EX_OSERR, NULL, "out of memory");
1287 ct->c_ctparams = (void *) p;
1290 ** scan for parameters "id", "number",
1293 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1294 if (!mh_strcasecmp(*ap, "id")) {
1295 p->pm_partid = getcpy(*ep);
1298 if (!mh_strcasecmp(*ap, "number")) {
1299 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1301 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1306 if (!mh_strcasecmp(*ap, "total")) {
1307 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1314 if (!p->pm_partid || !p->pm_partno
1315 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1316 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1322 case MESSAGE_EXTERNAL:
1327 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1328 advise(ct->c_file, "unable to open for reading");
1332 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1334 if (!(p = get_content(fp, ct->c_file, 0))) {
1340 p->c_end = p->c_begin;
1345 switch (p->c_type) {
1350 if (p->c_subtype != MESSAGE_RFC822)
1355 (*p->c_ctinitfnx) (p);
1374 InitApplication(CT ct)
1377 CI ci = &ct->c_ctinfo;
1380 for (kv = SubApplication; kv->kv_key; kv++)
1381 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1383 ct->c_subtype = kv->kv_value;
1390 ** TRANSFER ENCODINGS
1394 init_encoding(CT ct, OpenCEFunc openfnx)
1398 if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1399 adios(EX_OSERR, NULL, "out of memory");
1402 ct->c_ceopenfnx = openfnx;
1403 ct->c_ceclosefnx = close_encoding;
1404 ct->c_cesizefnx = size_encoding;
1411 close_encoding(CT ct)
1415 if (!(ce = ct->c_cefile))
1425 static unsigned long
1426 size_encoding(CT ct)
1434 if (!(ce = ct->c_cefile))
1435 return (ct->c_end - ct->c_begin);
1437 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1438 return (long) st.st_size;
1441 if (stat(ce->ce_file, &st) != NOTOK)
1442 return (long) st.st_size;
1447 if (ct->c_encoding == CE_EXTERNAL)
1448 return (ct->c_end - ct->c_begin);
1451 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1452 return (ct->c_end - ct->c_begin);
1454 if (fstat(fd, &st) != NOTOK)
1455 size = (long) st.st_size;
1459 (*ct->c_ceclosefnx) (ct);
1468 static unsigned char b642nib[0x80] = {
1469 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1470 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1473 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1474 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1475 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1476 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1477 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1478 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1479 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1480 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1481 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1482 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1483 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1484 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1491 return init_encoding(ct, openBase64);
1496 openBase64(CT ct, char **file)
1499 int fd, len, skip, own_ct_fp = 0;
1501 unsigned char value, *b, *b1, *b2, *b3;
1502 unsigned char *cp, *ep;
1503 char buffer[BUFSIZ];
1504 /* sbeck -- handle suffixes */
1508 b = (unsigned char *) &bits;
1509 b1 = &b[endian > 0 ? 1 : 2];
1510 b2 = &b[endian > 0 ? 2 : 1];
1511 b3 = &b[endian > 0 ? 3 : 0];
1515 fseek(ce->ce_fp, 0L, SEEK_SET);
1520 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1521 content_error(ce->ce_file, ct,
1522 "unable to fopen for reading");
1528 if (*file == NULL) {
1529 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1532 ce->ce_file = getcpy(*file);
1536 /* sbeck@cise.ufl.edu -- handle suffixes */
1538 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1539 invo_name, ci->ci_type, ci->ci_subtype);
1540 cp = context_find(buffer);
1541 if (cp == NULL || *cp == '\0') {
1542 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1544 cp = context_find(buffer);
1546 if (cp != NULL && *cp != '\0') {
1547 if (ce->ce_unlink) {
1549 ** Temporary file already exists, so we rename to
1550 ** version with extension.
1552 char *file_org = strdup(ce->ce_file);
1553 ce->ce_file = add(cp, ce->ce_file);
1554 if (rename(file_org, ce->ce_file)) {
1555 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1561 ce->ce_file = add(cp, ce->ce_file);
1565 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1566 content_error(ce->ce_file, ct,
1567 "unable to fopen for reading/writing");
1571 if ((len = ct->c_end - ct->c_begin) < 0)
1572 adios(EX_SOFTWARE, NULL, "internal error(1)");
1575 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1576 content_error(ct->c_file, ct,
1577 "unable to open for reading");
1587 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1589 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1591 content_error(ct->c_file, ct, "error reading from");
1595 content_error(NULL, ct, "premature eof");
1603 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1608 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1610 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1612 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1616 bits |= value << bitno;
1618 if ((bitno -= 6) < 0) {
1619 putc((char) *b1, ce->ce_fp);
1621 putc((char) *b2, ce->ce_fp);
1623 putc((char) *b3, ce->ce_fp);
1627 if (ferror(ce->ce_fp)) {
1628 content_error(ce->ce_file, ct,
1629 "error writing to");
1632 bitno = 18, bits = 0L, skip = 0;
1638 goto self_delimiting;
1647 fprintf(stderr, "premature ending (bitno %d)\n",
1650 content_error(NULL, ct, "invalid BASE64 encoding");
1655 fseek(ct->c_fp, 0L, SEEK_SET);
1657 if (fflush(ce->ce_fp)) {
1658 content_error(ce->ce_file, ct, "error writing to");
1662 fseek(ce->ce_fp, 0L, SEEK_SET);
1665 *file = ce->ce_file;
1670 return fileno(ce->ce_fp);
1673 free_encoding(ct, 0);
1686 static char hex2nib[0x80] = {
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1694 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1697 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1700 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1702 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1709 return init_encoding(ct, openQuoted);
1714 openQuoted(CT ct, char **file)
1716 int cc, len, quoted, own_ct_fp = 0;
1717 unsigned char *cp, *ep;
1718 char buffer[BUFSIZ];
1719 unsigned char mask = 0;
1721 /* sbeck -- handle suffixes */
1726 fseek(ce->ce_fp, 0L, SEEK_SET);
1731 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1732 content_error(ce->ce_file, ct,
1733 "unable to fopen for reading");
1739 if (*file == NULL) {
1740 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1743 ce->ce_file = getcpy(*file);
1747 /* sbeck@cise.ufl.edu -- handle suffixes */
1749 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1750 invo_name, ci->ci_type, ci->ci_subtype);
1751 cp = context_find(buffer);
1752 if (cp == NULL || *cp == '\0') {
1753 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1755 cp = context_find(buffer);
1757 if (cp != NULL && *cp != '\0') {
1758 if (ce->ce_unlink) {
1760 ** Temporary file already exists, so we rename to
1761 ** version with extension.
1763 char *file_org = strdup(ce->ce_file);
1764 ce->ce_file = add(cp, ce->ce_file);
1765 if (rename(file_org, ce->ce_file)) {
1766 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1772 ce->ce_file = add(cp, ce->ce_file);
1776 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1777 content_error(ce->ce_file, ct,
1778 "unable to fopen for reading/writing");
1782 if ((len = ct->c_end - ct->c_begin) < 0)
1783 adios(EX_SOFTWARE, NULL, "internal error(2)");
1786 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1787 content_error(ct->c_file, ct,
1788 "unable to open for reading");
1796 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1798 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1799 content_error(NULL, ct, "premature eof");
1803 if ((cc = strlen(buffer)) > len)
1807 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1812 for (; cp < ep; cp++) {
1814 /* in an escape sequence */
1816 /* at byte 1 of an escape sequence */
1817 mask = hex2nib[*cp & 0x7f];
1818 /* next is byte 2 */
1821 /* at byte 2 of an escape sequence */
1823 mask |= hex2nib[*cp & 0x7f];
1824 putc(mask, ce->ce_fp);
1825 if (ferror(ce->ce_fp)) {
1826 content_error(ce->ce_file, ct, "error writing to");
1830 ** finished escape sequence; next may
1831 ** be literal or a new escape sequence
1835 /* on to next byte */
1839 /* not in an escape sequence */
1842 ** starting an escape sequence,
1845 if (cp + 1 < ep && cp[1] == '\n') {
1846 /* "=\n" soft line break, eat the \n */
1850 if (cp + 1 >= ep || cp + 2 >= ep) {
1852 ** We don't have 2 bytes left,
1853 ** so this is an invalid escape
1854 ** sequence; just show the raw bytes
1857 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1859 ** Next 2 bytes are hex digits,
1860 ** making this a valid escape
1861 ** sequence; let's decode it (above).
1867 ** One or both of the next 2 is
1868 ** out of range, making this an
1869 ** invalid escape sequence; just
1870 ** show the raw bytes (below).
1875 /* Just show the raw byte. */
1876 putc(*cp, ce->ce_fp);
1877 if (ferror(ce->ce_fp)) {
1878 content_error(ce->ce_file, ct,
1879 "error writing to");
1885 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1889 fseek(ct->c_fp, 0L, SEEK_SET);
1891 if (fflush(ce->ce_fp)) {
1892 content_error(ce->ce_file, ct, "error writing to");
1896 fseek(ce->ce_fp, 0L, SEEK_SET);
1899 *file = ce->ce_file;
1904 return fileno(ce->ce_fp);
1907 free_encoding(ct, 0);
1923 if (init_encoding(ct, open7Bit) == NOTOK)
1926 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1932 open7Bit(CT ct, char **file)
1934 int cc, fd, len, own_ct_fp = 0;
1935 char buffer[BUFSIZ];
1936 /* sbeck -- handle suffixes */
1943 fseek(ce->ce_fp, 0L, SEEK_SET);
1948 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1949 content_error(ce->ce_file, ct,
1950 "unable to fopen for reading");
1956 if (*file == NULL) {
1957 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1960 ce->ce_file = getcpy(*file);
1964 /* sbeck@cise.ufl.edu -- handle suffixes */
1966 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1967 invo_name, ci->ci_type, ci->ci_subtype);
1968 cp = context_find(buffer);
1969 if (cp == NULL || *cp == '\0') {
1970 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1972 cp = context_find(buffer);
1974 if (cp != NULL && *cp != '\0') {
1975 if (ce->ce_unlink) {
1977 ** Temporary file already exists, so we rename to
1978 ** version with extension.
1980 char *file_org = strdup(ce->ce_file);
1981 ce->ce_file = add(cp, ce->ce_file);
1982 if (rename(file_org, ce->ce_file)) {
1983 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1989 ce->ce_file = add(cp, ce->ce_file);
1993 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1994 content_error(ce->ce_file, ct,
1995 "unable to fopen for reading/writing");
1999 if (ct->c_type == CT_MULTIPART) {
2001 CI ci = &ct->c_ctinfo;
2004 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2006 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2007 strlen(ci->ci_subtype);
2008 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2009 putc(';', ce->ce_fp);
2012 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2015 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2016 fputs("\n\t", ce->ce_fp);
2019 putc(' ', ce->ce_fp);
2022 fprintf(ce->ce_fp, "%s", buffer);
2026 if (ci->ci_comment) {
2027 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2029 fputs("\n\t", ce->ce_fp);
2032 putc(' ', ce->ce_fp);
2035 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2038 fprintf(ce->ce_fp, "\n");
2040 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2042 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2044 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2045 fprintf(ce->ce_fp, "\n");
2048 if ((len = ct->c_end - ct->c_begin) < 0)
2049 adios(EX_SOFTWARE, NULL, "internal error(3)");
2052 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2053 content_error(ct->c_file, ct,
2054 "unable to open for reading");
2060 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2062 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2064 content_error(ct->c_file, ct, "error reading from");
2068 content_error(NULL, ct, "premature eof");
2076 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2077 if (ferror(ce->ce_fp)) {
2078 content_error(ce->ce_file, ct,
2079 "error writing to");
2084 fseek(ct->c_fp, 0L, SEEK_SET);
2086 if (fflush(ce->ce_fp)) {
2087 content_error(ce->ce_file, ct, "error writing to");
2091 fseek(ce->ce_fp, 0L, SEEK_SET);
2094 *file = ce->ce_file;
2099 return fileno(ce->ce_fp);
2102 free_encoding(ct, 0);