2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 char buf[BUFSIZ], name[NAMESZ];
240 /* allocate the content structure */
241 if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242 adios(EX_OSERR, NULL, "out of memory");
245 ct->c_file = getcpy(file);
246 ct->c_begin = ftell(ct->c_fp) + 1;
249 ** Parse the header fields for this
250 ** content into a linked list.
252 for (compnum = 1, state = FLD;;) {
253 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
258 /* get copies of the buffers */
262 /* if necessary, get rest of field */
263 while (state == FLDPLUS) {
264 state = m_getfld(state, name, buf,
266 vp = add(buf, vp); /* add to previous value */
269 /* Now add the header data to the list */
270 add_header(ct, np, vp);
272 ct->c_begin = ftell(in) + 1;
276 ct->c_begin = ftell(in) - strlen(buf);
280 ct->c_begin = ftell(in);
285 adios(EX_DATAERR, NULL, "message format error in component #%d",
289 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
292 /* break out of the loop */
297 ** Read the content headers. We will parse the
298 ** MIME related header fields into their various
299 ** structures and set internal flags related to
300 ** content type/subtype, etc.
303 hp = ct->c_first_hf; /* start at first header field */
305 /* Get MIME-Version field */
306 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
309 unsigned char *cp, *dp;
312 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
315 ct->c_vrsn = getcpy(hp->value);
317 /* Now, cleanup this field */
322 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
324 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
329 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
331 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
334 for (dp = cp; istoken(*dp); dp++)
338 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
341 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
344 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
345 /* Get Content-Type field */
346 struct str2init *s2i;
347 CI ci = &ct->c_ctinfo;
349 /* Check if we've already seen a Content-Type header */
351 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
355 /* Parse the Content-Type field */
356 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
360 ** Set the Init function and the internal
361 ** flag for this content type.
363 for (s2i = str2cts; s2i->si_key; s2i++)
364 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
366 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
368 ct->c_type = s2i->si_val;
369 ct->c_ctinitfnx = s2i->si_init;
371 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
372 /* Get Content-Transfer-Encoding field */
374 unsigned char *cp, *dp;
375 struct str2init *s2i;
378 ** Check if we've already seen the
379 ** Content-Transfer-Encoding field
382 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
386 /* get copy of this field */
387 ct->c_celine = cp = getcpy(hp->value);
391 for (dp = cp; istoken(*dp); dp++)
397 ** Find the internal flag and Init function
398 ** for this transfer encoding.
400 for (s2i = str2ces; s2i->si_key; s2i++)
401 if (!mh_strcasecmp(cp, s2i->si_key))
403 if (!s2i->si_key && !uprf(cp, "X-"))
406 ct->c_encoding = s2i->si_val;
408 /* Call the Init function for this encoding */
409 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
412 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
413 /* Get Content-ID field */
414 ct->c_id = add(hp->value, ct->c_id);
416 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
417 /* Get Content-Description field */
418 ct->c_descr = add(hp->value, ct->c_descr);
420 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
421 /* Get Content-Disposition field */
422 ct->c_dispo = add(hp->value, ct->c_dispo);
426 hp = hp->next; /* next header field */
430 ** Check if we saw a Content-Type field.
431 ** If not, then assign a default value for
432 ** it, and the Init function.
436 ** If we are inside a multipart/digest message,
437 ** so default type is message/rfc822
440 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
442 ct->c_type = CT_MESSAGE;
443 ct->c_ctinitfnx = InitMessage;
446 ** Else default type is text/plain
448 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
450 ct->c_type = CT_TEXT;
451 ct->c_ctinitfnx = InitText;
455 /* Use default Transfer-Encoding, if necessary */
457 ct->c_encoding = CE_7BIT;
470 ** small routine to add header field to list
474 add_header(CT ct, char *name, char *value)
478 /* allocate header field structure */
479 hp = mh_xmalloc(sizeof(*hp));
481 /* link data into header structure */
486 /* link header structure into the list */
487 if (ct->c_first_hf == NULL) {
488 ct->c_first_hf = hp; /* this is the first */
491 ct->c_last_hf->next = hp; /* add it to the end */
500 ** Make sure that buf contains at least one appearance of name,
501 ** followed by =. If not, insert both name and value, just after
502 ** first semicolon, if any. Note that name should not contain a
503 ** trailing =. And quotes will be added around the value. Typical
504 ** usage: make sure that a Content-Disposition header contains
505 ** filename="foo". If it doesn't and value does, use value from
509 incl_name_value(unsigned char *buf, char *name, char *value) {
512 /* Assume that name is non-null. */
514 char *name_plus_equal = concat(name, "=", NULL);
516 if (!strstr(buf, name_plus_equal)) {
519 char *prefix, *suffix;
521 /* Trim trailing space, esp. newline. */
522 for (cp = &buf[strlen(buf) - 1];
523 cp >= buf && isspace(*cp); --cp) {
527 insertion = concat("; ", name, "=", "\"", value, "\"",
531 ** Insert at first semicolon, if any.
532 ** If none, append to end.
534 prefix = getcpy(buf);
535 if ((cp = strchr(prefix, ';'))) {
536 suffix = concat(cp, NULL);
538 newbuf = concat(prefix, insertion, suffix,
543 newbuf = concat(buf, insertion, "\n", NULL);
551 free(name_plus_equal);
558 ** Extract just name_suffix="foo", if any, from value. If there isn't
559 ** one, return the entire value. Note that, for example, a name_suffix
560 ** of name will match filename="foo", and return foo.
563 extract_name_value(char *name_suffix, char *value) {
564 char *extracted_name_value = value;
565 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
566 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
569 free(name_suffix_plus_quote);
570 if (name_suffix_equals) {
571 char *name_suffix_begin;
574 for (cp = name_suffix_equals; *cp != '"'; ++cp)
576 name_suffix_begin = ++cp;
577 /* Find second \". */
578 for (; *cp != '"'; ++cp)
581 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
582 memcpy(extracted_name_value, name_suffix_begin,
583 cp - name_suffix_begin);
584 extracted_name_value[cp - name_suffix_begin] = '\0';
587 return extracted_name_value;
591 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
592 ** directives. Fills in the information of the CTinfo structure.
595 get_ctinfo(unsigned char *cp, CT ct, int magic)
604 i = strlen(invo_name) + 2;
606 /* store copy of Content-Type line */
607 cp = ct->c_ctline = getcpy(cp);
609 while (isspace(*cp)) /* trim leading spaces */
612 /* change newlines to spaces */
613 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
616 /* trim trailing spaces */
617 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
623 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
625 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
628 for (dp = cp; istoken(*dp); dp++)
631 ci->ci_type = getcpy(cp); /* store content type */
635 advise(NULL, "invalid %s: field in message %s (empty type)",
636 TYPE_FIELD, ct->c_file);
640 /* down case the content type string */
641 for (dp = ci->ci_type; *dp; dp++)
642 if (isalpha(*dp) && isupper(*dp))
648 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
653 ci->ci_subtype = getcpy("");
661 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
664 for (dp = cp; istoken(*dp); dp++)
667 ci->ci_subtype = getcpy(cp); /* store the content subtype */
670 if (!*ci->ci_subtype) {
671 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
675 /* down case the content subtype string */
676 for (dp = ci->ci_subtype; *dp; dp++)
677 if (isalpha(*dp) && isupper(*dp))
684 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
688 ** Parse attribute/value pairs given with Content-Type
690 ep = (ap = ci->ci_attrs) + NPARMS;
696 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
704 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
708 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
712 /* down case the attribute name */
713 for (dp = cp; istoken(*dp); dp++)
714 if (isalpha(*dp) && isupper(*dp))
717 for (up = dp; isspace(*dp);)
719 if (dp == cp || *dp != '=') {
720 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
724 vp = (*ap = getcpy(cp)) + (up - cp);
726 for (dp++; isspace(*dp);)
729 /* now add the attribute value */
730 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
733 for (cp = ++dp, dp = vp;;) {
737 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
742 if ((c = *cp++) == '\0')
757 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
762 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
763 *ci->ci_values[ap - ci->ci_attrs] = '\0';
764 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
772 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
777 ** Get any <Content-Id> given in buffer
779 if (magic && *cp == '<') {
784 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
785 advise(NULL, "invalid ID in message %s", ct->c_file);
791 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
802 ** Get any [Content-Description] given in buffer.
804 if (magic && *cp == '[') {
806 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
810 advise(NULL, "invalid description in message %s",
819 ct->c_descr = concat(ct->c_descr, "\n", NULL);
830 ** Get any {Content-Disposition} given in buffer.
832 if (magic && *cp == '{') {
834 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
838 advise(NULL, "invalid disposition in message %s",
847 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
858 ** Check if anything is left over
862 ci->ci_magic = getcpy(cp);
865 ** If there is a Content-Disposition header and
866 ** it doesn't have a *filename=, extract it from
867 ** the magic contents. The mhbasename call skips
868 ** any leading directory components.
871 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
873 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
881 get_comment(CT ct, unsigned char **ap, int istype)
886 char c, buffer[BUFSIZ], *dp;
898 advise(NULL, "invalid comment in message %s's %s: field",
899 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
904 if ((c = *cp++) == '\0')
927 if ((dp = ci->ci_comment)) {
928 ci->ci_comment = concat(dp, " ", buffer, NULL);
931 ci->ci_comment = getcpy(buffer);
946 ** Handles content types audio, image, and video.
947 ** There's not much to do right here.
953 return OK; /* not much to do here */
967 CI ci = &ct->c_ctinfo;
969 /* check for missing subtype */
970 if (!*ci->ci_subtype)
971 ci->ci_subtype = add("plain", ci->ci_subtype);
974 for (kv = SubText; kv->kv_key; kv++)
975 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
977 ct->c_subtype = kv->kv_value;
979 /* allocate text character set structure */
980 if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
981 adios(EX_OSERR, NULL, "out of memory");
982 ct->c_ctparams = (void *) t;
984 /* scan for charset parameter */
985 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
986 if (!mh_strcasecmp(*ap, "charset"))
989 /* check if content specified a character set */
992 ct->c_charset = getcpy(norm_charmap(*ep));
993 /* match character set or set to CHARSET_UNKNOWN */
994 for (kv = Charset; kv->kv_key; kv++) {
995 if (!mh_strcasecmp(*ep, kv->kv_key)) {
999 t->tx_charset = kv->kv_value;
1001 t->tx_charset = CHARSET_UNSPECIFIED;
1013 InitMultiPart(CT ct)
1017 unsigned char *cp, *dp;
1019 char *bp, buffer[BUFSIZ];
1020 struct multipart *m;
1022 struct part *part, **next;
1023 CI ci = &ct->c_ctinfo;
1028 ** The encoding for multipart messages must be either
1029 ** 7bit, 8bit, or binary (per RFC2045).
1031 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1032 && ct->c_encoding != CE_BINARY) {
1033 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1034 ct->c_encoding = CE_7BIT;
1038 for (kv = SubMultiPart; kv->kv_key; kv++)
1039 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1041 ct->c_subtype = kv->kv_value;
1044 ** Check for "boundary" parameter, which is
1045 ** required for multipart messages.
1048 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1049 if (!mh_strcasecmp(*ap, "boundary")) {
1055 /* complain if boundary parameter is missing */
1057 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1061 /* allocate primary structure for multipart info */
1062 if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1063 adios(EX_OSERR, NULL, "out of memory");
1064 ct->c_ctparams = (void *) m;
1066 /* check if boundary parameter contains only whitespace characters */
1067 for (cp = bp; isspace(*cp); cp++)
1070 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1074 /* remove trailing whitespace from boundary parameter */
1075 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1080 /* record boundary separators */
1081 m->mp_start = concat(bp, "\n", NULL);
1082 m->mp_stop = concat(bp, "--\n", NULL);
1084 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1085 advise(ct->c_file, "unable to open for reading");
1089 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1091 next = &m->mp_parts;
1095 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1099 pos += strlen(buffer);
1100 if (buffer[0] != '-' || buffer[1] != '-')
1103 if (strcmp(buffer + 2, m->mp_start)!=0)
1106 if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1108 adios(EX_OSERR, NULL, "out of memory");
1110 next = &part->mp_next;
1112 if (!(p = get_content(fp, ct->c_file,
1113 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1120 fseek(fp, pos, SEEK_SET);
1123 if (strcmp(buffer + 2, m->mp_start) == 0) {
1127 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1128 if (p->c_end < p->c_begin)
1129 p->c_begin = p->c_end;
1134 if (strcmp(buffer + 2, m->mp_stop) == 0)
1140 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1141 if (!inout && part) {
1143 p->c_end = ct->c_end;
1145 if (p->c_begin >= p->c_end) {
1146 for (next = &m->mp_parts; *next != part;
1147 next = &((*next)->mp_next))
1151 free((char *) part);
1156 /* reverse the order of the parts for multipart/alternative */
1157 if (ct->c_subtype == MULTI_ALTERNATE)
1161 ** label all subparts with part number, and
1162 ** then initialize the content of the subpart.
1167 char partnam[BUFSIZ];
1170 snprintf(partnam, sizeof(partnam), "%s.",
1172 pp = partnam + strlen(partnam);
1177 for (part = m->mp_parts, partnum = 1; part;
1178 part = part->mp_next, partnum++) {
1181 sprintf(pp, "%d", partnum);
1182 p->c_partno = getcpy(partnam);
1184 /* initialize the content of the subparts */
1185 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1200 ** reverse the order of the parts of a multipart
1204 reverse_parts(CT ct)
1207 struct multipart *m;
1208 struct part **base, **bmp, **next, *part;
1210 m = (struct multipart *) ct->c_ctparams;
1212 /* if only one part, just return */
1213 if (!m->mp_parts || !m->mp_parts->mp_next)
1216 /* count number of parts */
1218 for (part = m->mp_parts; part; part = part->mp_next)
1221 /* allocate array of pointers to the parts */
1222 if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1223 adios(EX_OSERR, NULL, "out of memory");
1226 /* point at all the parts */
1227 for (part = m->mp_parts; part; part = part->mp_next)
1231 /* reverse the order of the parts */
1232 next = &m->mp_parts;
1233 for (bmp--; bmp >= base; bmp--) {
1236 next = &part->mp_next;
1240 /* free array of pointers */
1241 free((char *) base);
1253 CI ci = &ct->c_ctinfo;
1255 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1256 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1260 /* check for missing subtype */
1261 if (!*ci->ci_subtype)
1262 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1265 for (kv = SubMessage; kv->kv_key; kv++)
1266 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1268 ct->c_subtype = kv->kv_value;
1270 switch (ct->c_subtype) {
1271 case MESSAGE_RFC822:
1274 case MESSAGE_PARTIAL:
1279 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1280 adios(EX_OSERR, NULL, "out of memory");
1281 ct->c_ctparams = (void *) p;
1284 ** scan for parameters "id", "number",
1287 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1288 if (!mh_strcasecmp(*ap, "id")) {
1289 p->pm_partid = getcpy(*ep);
1292 if (!mh_strcasecmp(*ap, "number")) {
1293 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1295 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1300 if (!mh_strcasecmp(*ap, "total")) {
1301 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1308 if (!p->pm_partid || !p->pm_partno
1309 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1310 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1316 case MESSAGE_EXTERNAL:
1321 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1322 advise(ct->c_file, "unable to open for reading");
1326 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1328 if (!(p = get_content(fp, ct->c_file, 0))) {
1334 p->c_end = p->c_begin;
1339 switch (p->c_type) {
1344 if (p->c_subtype != MESSAGE_RFC822)
1349 (*p->c_ctinitfnx) (p);
1368 InitApplication(CT ct)
1371 CI ci = &ct->c_ctinfo;
1374 for (kv = SubApplication; kv->kv_key; kv++)
1375 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1377 ct->c_subtype = kv->kv_value;
1384 ** TRANSFER ENCODINGS
1388 init_encoding(CT ct, OpenCEFunc openfnx)
1392 if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1393 adios(EX_OSERR, NULL, "out of memory");
1396 ct->c_ceopenfnx = openfnx;
1397 ct->c_ceclosefnx = close_encoding;
1398 ct->c_cesizefnx = size_encoding;
1405 close_encoding(CT ct)
1409 if (!(ce = ct->c_cefile))
1419 static unsigned long
1420 size_encoding(CT ct)
1428 if (!(ce = ct->c_cefile))
1429 return (ct->c_end - ct->c_begin);
1431 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1432 return (long) st.st_size;
1435 if (stat(ce->ce_file, &st) != NOTOK)
1436 return (long) st.st_size;
1441 if (ct->c_encoding == CE_EXTERNAL)
1442 return (ct->c_end - ct->c_begin);
1445 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1446 return (ct->c_end - ct->c_begin);
1448 if (fstat(fd, &st) != NOTOK)
1449 size = (long) st.st_size;
1453 (*ct->c_ceclosefnx) (ct);
1462 static unsigned char b642nib[0x80] = {
1463 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1464 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1465 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1466 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1467 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1468 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1469 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1470 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1471 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1472 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1473 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1474 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1475 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1476 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1477 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1478 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1485 return init_encoding(ct, openBase64);
1490 openBase64(CT ct, char **file)
1493 int fd, len, skip, own_ct_fp = 0;
1495 unsigned char value, *b, *b1, *b2, *b3;
1496 unsigned char *cp, *ep;
1497 char buffer[BUFSIZ];
1498 /* sbeck -- handle suffixes */
1502 b = (unsigned char *) &bits;
1503 b1 = &b[endian > 0 ? 1 : 2];
1504 b2 = &b[endian > 0 ? 2 : 1];
1505 b3 = &b[endian > 0 ? 3 : 0];
1509 fseek(ce->ce_fp, 0L, SEEK_SET);
1514 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1515 content_error(ce->ce_file, ct,
1516 "unable to fopen for reading");
1522 if (*file == NULL) {
1523 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1526 ce->ce_file = getcpy(*file);
1530 /* sbeck@cise.ufl.edu -- handle suffixes */
1532 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1533 invo_name, ci->ci_type, ci->ci_subtype);
1534 cp = context_find(buffer);
1535 if (cp == NULL || *cp == '\0') {
1536 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1538 cp = context_find(buffer);
1540 if (cp != NULL && *cp != '\0') {
1541 if (ce->ce_unlink) {
1543 ** Temporary file already exists, so we rename to
1544 ** version with extension.
1546 char *file_org = strdup(ce->ce_file);
1547 ce->ce_file = add(cp, ce->ce_file);
1548 if (rename(file_org, ce->ce_file)) {
1549 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1555 ce->ce_file = add(cp, ce->ce_file);
1559 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1560 content_error(ce->ce_file, ct,
1561 "unable to fopen for reading/writing");
1565 if ((len = ct->c_end - ct->c_begin) < 0)
1566 adios(EX_SOFTWARE, NULL, "internal error(1)");
1569 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1570 content_error(ct->c_file, ct,
1571 "unable to open for reading");
1581 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1583 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1585 content_error(ct->c_file, ct, "error reading from");
1589 content_error(NULL, ct, "premature eof");
1597 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1602 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1604 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1606 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1610 bits |= value << bitno;
1612 if ((bitno -= 6) < 0) {
1613 putc((char) *b1, ce->ce_fp);
1615 putc((char) *b2, ce->ce_fp);
1617 putc((char) *b3, ce->ce_fp);
1621 if (ferror(ce->ce_fp)) {
1622 content_error(ce->ce_file, ct,
1623 "error writing to");
1626 bitno = 18, bits = 0L, skip = 0;
1632 goto self_delimiting;
1641 fprintf(stderr, "premature ending (bitno %d)\n",
1644 content_error(NULL, ct, "invalid BASE64 encoding");
1649 fseek(ct->c_fp, 0L, SEEK_SET);
1651 if (fflush(ce->ce_fp)) {
1652 content_error(ce->ce_file, ct, "error writing to");
1656 fseek(ce->ce_fp, 0L, SEEK_SET);
1659 *file = ce->ce_file;
1664 return fileno(ce->ce_fp);
1667 free_encoding(ct, 0);
1680 static char hex2nib[0x80] = {
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1685 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1686 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1687 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1688 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1689 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1693 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1696 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1703 return init_encoding(ct, openQuoted);
1708 openQuoted(CT ct, char **file)
1710 int cc, len, quoted, own_ct_fp = 0;
1711 unsigned char *cp, *ep;
1712 char buffer[BUFSIZ];
1713 unsigned char mask = 0;
1715 /* sbeck -- handle suffixes */
1720 fseek(ce->ce_fp, 0L, SEEK_SET);
1725 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1726 content_error(ce->ce_file, ct,
1727 "unable to fopen for reading");
1733 if (*file == NULL) {
1734 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1737 ce->ce_file = getcpy(*file);
1741 /* sbeck@cise.ufl.edu -- handle suffixes */
1743 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1744 invo_name, ci->ci_type, ci->ci_subtype);
1745 cp = context_find(buffer);
1746 if (cp == NULL || *cp == '\0') {
1747 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1749 cp = context_find(buffer);
1751 if (cp != NULL && *cp != '\0') {
1752 if (ce->ce_unlink) {
1754 ** Temporary file already exists, so we rename to
1755 ** version with extension.
1757 char *file_org = strdup(ce->ce_file);
1758 ce->ce_file = add(cp, ce->ce_file);
1759 if (rename(file_org, ce->ce_file)) {
1760 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1766 ce->ce_file = add(cp, ce->ce_file);
1770 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1771 content_error(ce->ce_file, ct,
1772 "unable to fopen for reading/writing");
1776 if ((len = ct->c_end - ct->c_begin) < 0)
1777 adios(EX_SOFTWARE, NULL, "internal error(2)");
1780 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1781 content_error(ct->c_file, ct,
1782 "unable to open for reading");
1790 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1792 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1793 content_error(NULL, ct, "premature eof");
1797 if ((cc = strlen(buffer)) > len)
1801 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1806 for (; cp < ep; cp++) {
1808 /* in an escape sequence */
1810 /* at byte 1 of an escape sequence */
1811 mask = hex2nib[*cp & 0x7f];
1812 /* next is byte 2 */
1815 /* at byte 2 of an escape sequence */
1817 mask |= hex2nib[*cp & 0x7f];
1818 putc(mask, ce->ce_fp);
1819 if (ferror(ce->ce_fp)) {
1820 content_error(ce->ce_file, ct, "error writing to");
1824 ** finished escape sequence; next may
1825 ** be literal or a new escape sequence
1829 /* on to next byte */
1833 /* not in an escape sequence */
1836 ** starting an escape sequence,
1839 if (cp + 1 < ep && cp[1] == '\n') {
1840 /* "=\n" soft line break, eat the \n */
1844 if (cp + 1 >= ep || cp + 2 >= ep) {
1846 ** We don't have 2 bytes left,
1847 ** so this is an invalid escape
1848 ** sequence; just show the raw bytes
1851 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1853 ** Next 2 bytes are hex digits,
1854 ** making this a valid escape
1855 ** sequence; let's decode it (above).
1861 ** One or both of the next 2 is
1862 ** out of range, making this an
1863 ** invalid escape sequence; just
1864 ** show the raw bytes (below).
1869 /* Just show the raw byte. */
1870 putc(*cp, ce->ce_fp);
1871 if (ferror(ce->ce_fp)) {
1872 content_error(ce->ce_file, ct,
1873 "error writing to");
1879 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1883 fseek(ct->c_fp, 0L, SEEK_SET);
1885 if (fflush(ce->ce_fp)) {
1886 content_error(ce->ce_file, ct, "error writing to");
1890 fseek(ce->ce_fp, 0L, SEEK_SET);
1893 *file = ce->ce_file;
1898 return fileno(ce->ce_fp);
1901 free_encoding(ct, 0);
1917 if (init_encoding(ct, open7Bit) == NOTOK)
1920 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1926 open7Bit(CT ct, char **file)
1928 int cc, fd, len, own_ct_fp = 0;
1929 char buffer[BUFSIZ];
1930 /* sbeck -- handle suffixes */
1937 fseek(ce->ce_fp, 0L, SEEK_SET);
1942 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1943 content_error(ce->ce_file, ct,
1944 "unable to fopen for reading");
1950 if (*file == NULL) {
1951 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1954 ce->ce_file = getcpy(*file);
1958 /* sbeck@cise.ufl.edu -- handle suffixes */
1960 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1961 invo_name, ci->ci_type, ci->ci_subtype);
1962 cp = context_find(buffer);
1963 if (cp == NULL || *cp == '\0') {
1964 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1966 cp = context_find(buffer);
1968 if (cp != NULL && *cp != '\0') {
1969 if (ce->ce_unlink) {
1971 ** Temporary file already exists, so we rename to
1972 ** version with extension.
1974 char *file_org = strdup(ce->ce_file);
1975 ce->ce_file = add(cp, ce->ce_file);
1976 if (rename(file_org, ce->ce_file)) {
1977 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1983 ce->ce_file = add(cp, ce->ce_file);
1987 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1988 content_error(ce->ce_file, ct,
1989 "unable to fopen for reading/writing");
1993 if (ct->c_type == CT_MULTIPART) {
1995 CI ci = &ct->c_ctinfo;
1998 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2000 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2001 strlen(ci->ci_subtype);
2002 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2003 putc(';', ce->ce_fp);
2006 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2009 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2010 fputs("\n\t", ce->ce_fp);
2013 putc(' ', ce->ce_fp);
2016 fprintf(ce->ce_fp, "%s", buffer);
2020 if (ci->ci_comment) {
2021 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2023 fputs("\n\t", ce->ce_fp);
2026 putc(' ', ce->ce_fp);
2029 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2032 fprintf(ce->ce_fp, "\n");
2034 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2036 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2038 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2039 fprintf(ce->ce_fp, "\n");
2042 if ((len = ct->c_end - ct->c_begin) < 0)
2043 adios(EX_SOFTWARE, NULL, "internal error(3)");
2046 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2047 content_error(ct->c_file, ct,
2048 "unable to open for reading");
2054 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2056 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2058 content_error(ct->c_file, ct, "error reading from");
2062 content_error(NULL, ct, "premature eof");
2070 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2071 if (ferror(ce->ce_fp)) {
2072 content_error(ce->ce_file, ct,
2073 "error writing to");
2078 fseek(ct->c_fp, 0L, SEEK_SET);
2080 if (fflush(ce->ce_fp)) {
2081 content_error(ce->ce_file, ct, "error writing to");
2085 fseek(ce->ce_fp, 0L, SEEK_SET);
2088 *file = ce->ce_file;
2093 return fileno(ce->ce_fp);
2096 free_encoding(ct, 0);