2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
11 #include <h/signals.h>
17 #include <h/mhparse.h>
22 extern int endian; /* mhmisc.c */
24 extern pid_t xpid; /* mhshowsbr.c */
27 ** Directory to place temp files. This must
28 ** be set before these routines are called.
33 ** Structures for TEXT messages
35 struct k2v SubText[] = {
36 { "plain", TEXT_PLAIN },
37 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
38 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
39 { NULL, TEXT_UNKNOWN } /* this one must be last! */
42 struct k2v Charset[] = {
43 { "us-ascii", CHARSET_USASCII },
44 { "iso-8859-1", CHARSET_LATIN },
45 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
49 ** Structures for MULTIPART messages
51 struct k2v SubMultiPart[] = {
52 { "mixed", MULTI_MIXED },
53 { "alternative", MULTI_ALTERNATE },
54 { "digest", MULTI_DIGEST },
55 { "parallel", MULTI_PARALLEL },
56 { NULL, MULTI_UNKNOWN } /* this one must be last! */
60 ** Structures for MESSAGE messages
62 struct k2v SubMessage[] = {
63 { "rfc822", MESSAGE_RFC822 },
64 { "partial", MESSAGE_PARTIAL },
65 { "external-body", MESSAGE_EXTERNAL },
66 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
70 ** Structure for APPLICATION messages
72 struct k2v SubApplication[] = {
73 { "octet-stream", APPLICATION_OCTETS },
74 { "postscript", APPLICATION_POSTSCRIPT },
75 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
82 int make_intermediates(char *);
83 void content_error(char *, CT, char *, ...);
86 void free_content(CT);
87 void free_encoding(CT, int);
92 static CT get_content(FILE *, char *, int);
93 static int get_comment(CT, unsigned char **, int);
95 static int InitGeneric(CT);
96 static int InitText(CT);
97 static int InitMultiPart(CT);
98 static void reverse_parts(CT);
99 static int InitMessage(CT);
100 static int InitApplication(CT);
101 static int init_encoding(CT, OpenCEFunc);
102 static unsigned long size_encoding(CT);
103 static int InitBase64(CT);
104 static int openBase64(CT, char **);
105 static int InitQuoted(CT);
106 static int openQuoted(CT, char **);
107 static int Init7Bit(CT);
109 struct str2init str2cts[] = {
110 { "application", CT_APPLICATION, InitApplication },
111 { "audio", CT_AUDIO, InitGeneric },
112 { "image", CT_IMAGE, InitGeneric },
113 { "message", CT_MESSAGE, InitMessage },
114 { "multipart", CT_MULTIPART, InitMultiPart },
115 { "text", CT_TEXT, InitText },
116 { "video", CT_VIDEO, InitGeneric },
117 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
118 { NULL, CT_UNKNOWN, NULL },
121 struct str2init str2ces[] = {
122 { "base64", CE_BASE64, InitBase64 },
123 { "quoted-printable", CE_QUOTED, InitQuoted },
124 { "8bit", CE_8BIT, Init7Bit },
125 { "7bit", CE_7BIT, Init7Bit },
126 { "binary", CE_BINARY, Init7Bit },
127 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
128 { NULL, CE_UNKNOWN, NULL },
135 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
146 ** Main entry point for parsing a MIME message or file.
147 ** It returns the Content structure for the top level
148 ** entity in the file.
151 parse_mime(char *file)
159 ** Check if file is actually standard input
161 if ((is_stdin = (strcmp(file, "-")==0))) {
162 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
164 advise("mhparse", "unable to create temporary file");
167 file = getcpy(tfile);
170 while (fgets(buffer, sizeof(buffer), stdin))
176 advise("stdin", "error reading");
181 advise(file, "error writing");
184 fseek(fp, 0L, SEEK_SET);
185 } else if ((fp = fopen(file, "r")) == NULL) {
186 advise(file, "unable to read");
190 if (!(ct = get_content(fp, file, 1))) {
193 advise(NULL, "unable to decode %s", file);
198 ct->c_unlink = 1; /* temp file to remove */
202 if (ct->c_end == 0L) {
203 fseek(fp, 0L, SEEK_END);
204 ct->c_end = ftell(fp);
207 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
219 ** Main routine for reading/parsing the headers
220 ** of a message content.
222 ** toplevel = 1 # we are at the top level of the message
223 ** toplevel = 0 # we are inside message type or multipart type
224 ** # other than multipart/digest
225 ** toplevel = -1 # we are inside multipart/digest
226 ** NB: on failure we will fclose(in)!
230 get_content(FILE *in, char *file, int toplevel)
233 char buf[BUFSIZ], name[NAMESZ];
238 /* allocate the content structure */
239 if (!(ct = (CT) calloc(1, sizeof(*ct))))
240 adios(NULL, "out of memory");
243 ct->c_file = getcpy(file);
244 ct->c_begin = ftell(ct->c_fp) + 1;
247 ** Parse the header fields for this
248 ** content into a linked list.
250 for (compnum = 1, state = FLD;;) {
251 switch (state = m_getfld(state, name, buf, sizeof(buf), in)) {
257 /* get copies of the buffers */
261 /* if necessary, get rest of field */
262 while (state == FLDPLUS) {
263 state = m_getfld(state, name, buf,
265 vp = add(buf, vp); /* add to previous value */
268 /* Now add the header data to the list */
269 add_header(ct, np, vp);
271 /* continue, if this isn't the last header field */
272 if (state != FLDEOF) {
273 ct->c_begin = ftell(in) + 1;
280 ct->c_begin = ftell(in) - strlen(buf);
284 ct->c_begin = ftell(in);
289 adios(NULL, "message format error in component #%d",
293 adios(NULL, "getfld() returned %d", state);
296 /* break out of the loop */
301 ** Read the content headers. We will parse the
302 ** MIME related header fields into their various
303 ** structures and set internal flags related to
304 ** content type/subtype, etc.
307 hp = ct->c_first_hf; /* start at first header field */
309 /* Get MIME-Version field */
310 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
313 unsigned char *cp, *dp;
316 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
319 ct->c_vrsn = getcpy(hp->value);
321 /* Now, cleanup this field */
326 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
328 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
333 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
335 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
338 for (dp = cp; istoken(*dp); dp++)
342 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
345 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
348 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
349 /* Get Content-Type field */
350 struct str2init *s2i;
351 CI ci = &ct->c_ctinfo;
353 /* Check if we've already seen a Content-Type header */
355 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
359 /* Parse the Content-Type field */
360 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
364 ** Set the Init function and the internal
365 ** flag for this content type.
367 for (s2i = str2cts; s2i->si_key; s2i++)
368 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
370 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
372 ct->c_type = s2i->si_val;
373 ct->c_ctinitfnx = s2i->si_init;
375 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
376 /* Get Content-Transfer-Encoding field */
378 unsigned char *cp, *dp;
379 struct str2init *s2i;
382 ** Check if we've already seen the
383 ** Content-Transfer-Encoding field
386 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
390 /* get copy of this field */
391 ct->c_celine = cp = getcpy(hp->value);
395 for (dp = cp; istoken(*dp); dp++)
401 ** Find the internal flag and Init function
402 ** for this transfer encoding.
404 for (s2i = str2ces; s2i->si_key; s2i++)
405 if (!mh_strcasecmp(cp, s2i->si_key))
407 if (!s2i->si_key && !uprf(cp, "X-"))
410 ct->c_encoding = s2i->si_val;
412 /* Call the Init function for this encoding */
413 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
416 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
417 /* Get Content-ID field */
418 ct->c_id = add(hp->value, ct->c_id);
420 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
421 /* Get Content-Description field */
422 ct->c_descr = add(hp->value, ct->c_descr);
424 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
425 /* Get Content-Disposition field */
426 ct->c_dispo = add(hp->value, ct->c_dispo);
430 hp = hp->next; /* next header field */
434 ** Check if we saw a Content-Type field.
435 ** If not, then assign a default value for
436 ** it, and the Init function.
440 ** If we are inside a multipart/digest message,
441 ** so default type is message/rfc822
444 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
446 ct->c_type = CT_MESSAGE;
447 ct->c_ctinitfnx = InitMessage;
450 ** Else default type is text/plain
452 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
454 ct->c_type = CT_TEXT;
455 ct->c_ctinitfnx = InitText;
459 /* Use default Transfer-Encoding, if necessary */
461 ct->c_encoding = CE_7BIT;
474 ** small routine to add header field to list
478 add_header(CT ct, char *name, char *value)
482 /* allocate header field structure */
483 hp = mh_xmalloc(sizeof(*hp));
485 /* link data into header structure */
490 /* link header structure into the list */
491 if (ct->c_first_hf == NULL) {
492 ct->c_first_hf = hp; /* this is the first */
495 ct->c_last_hf->next = hp; /* add it to the end */
504 ** Make sure that buf contains at least one appearance of name,
505 ** followed by =. If not, insert both name and value, just after
506 ** first semicolon, if any. Note that name should not contain a
507 ** trailing =. And quotes will be added around the value. Typical
508 ** usage: make sure that a Content-Disposition header contains
509 ** filename="foo". If it doesn't and value does, use value from
513 incl_name_value(unsigned char *buf, char *name, char *value) {
516 /* Assume that name is non-null. */
518 char *name_plus_equal = concat(name, "=", NULL);
520 if (!strstr(buf, name_plus_equal)) {
523 char *prefix, *suffix;
525 /* Trim trailing space, esp. newline. */
526 for (cp = &buf[strlen(buf) - 1];
527 cp >= buf && isspace(*cp); --cp) {
531 insertion = concat("; ", name, "=", "\"", value, "\"",
535 ** Insert at first semicolon, if any.
536 ** If none, append to end.
538 prefix = getcpy(buf);
539 if ((cp = strchr(prefix, ';'))) {
540 suffix = concat(cp, NULL);
542 newbuf = concat(prefix, insertion, suffix,
547 newbuf = concat(buf, insertion, "\n", NULL);
555 free(name_plus_equal);
562 ** Extract just name_suffix="foo", if any, from value. If there isn't
563 ** one, return the entire value. Note that, for example, a name_suffix
564 ** of name will match filename="foo", and return foo.
567 extract_name_value(char *name_suffix, char *value) {
568 char *extracted_name_value = value;
569 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
570 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
573 free(name_suffix_plus_quote);
574 if (name_suffix_equals) {
575 char *name_suffix_begin;
578 for (cp = name_suffix_equals; *cp != '"'; ++cp)
580 name_suffix_begin = ++cp;
581 /* Find second \". */
582 for (; *cp != '"'; ++cp)
585 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
586 memcpy(extracted_name_value, name_suffix_begin,
587 cp - name_suffix_begin);
588 extracted_name_value[cp - name_suffix_begin] = '\0';
591 return extracted_name_value;
595 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
596 ** directives. Fills in the information of the CTinfo structure.
599 get_ctinfo(unsigned char *cp, CT ct, int magic)
608 i = strlen(invo_name) + 2;
610 /* store copy of Content-Type line */
611 cp = ct->c_ctline = getcpy(cp);
613 while (isspace(*cp)) /* trim leading spaces */
616 /* change newlines to spaces */
617 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
620 /* trim trailing spaces */
621 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
627 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
629 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
632 for (dp = cp; istoken(*dp); dp++)
635 ci->ci_type = getcpy(cp); /* store content type */
639 advise(NULL, "invalid %s: field in message %s (empty type)",
640 TYPE_FIELD, ct->c_file);
644 /* down case the content type string */
645 for (dp = ci->ci_type; *dp; dp++)
646 if (isalpha(*dp) && isupper(*dp))
652 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
657 ci->ci_subtype = getcpy("");
665 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
668 for (dp = cp; istoken(*dp); dp++)
671 ci->ci_subtype = getcpy(cp); /* store the content subtype */
674 if (!*ci->ci_subtype) {
675 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
679 /* down case the content subtype string */
680 for (dp = ci->ci_subtype; *dp; dp++)
681 if (isalpha(*dp) && isupper(*dp))
688 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
692 ** Parse attribute/value pairs given with Content-Type
694 ep = (ap = ci->ci_attrs) + NPARMS;
700 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
708 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
712 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
716 /* down case the attribute name */
717 for (dp = cp; istoken(*dp); dp++)
718 if (isalpha(*dp) && isupper(*dp))
721 for (up = dp; isspace(*dp);)
723 if (dp == cp || *dp != '=') {
724 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
728 vp = (*ap = getcpy(cp)) + (up - cp);
730 for (dp++; isspace(*dp);)
733 /* now add the attribute value */
734 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
737 for (cp = ++dp, dp = vp;;) {
741 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
746 if ((c = *cp++) == '\0')
761 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
766 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
774 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
779 ** Get any <Content-Id> given in buffer
781 if (magic && *cp == '<') {
786 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
787 advise(NULL, "invalid ID in message %s", ct->c_file);
793 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
804 ** Get any [Content-Description] given in buffer.
806 if (magic && *cp == '[') {
808 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
812 advise(NULL, "invalid description in message %s",
821 ct->c_descr = concat(ct->c_descr, "\n", NULL);
832 ** Get any {Content-Disposition} given in buffer.
834 if (magic && *cp == '{') {
836 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
840 advise(NULL, "invalid disposition in message %s",
849 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
860 ** Check if anything is left over
864 ci->ci_magic = getcpy(cp);
867 ** If there is a Content-Disposition header and
868 ** it doesn't have a *filename=, extract it from
869 ** the magic contents. The mhbasename call skips
870 ** any leading directory components.
873 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
875 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
883 get_comment(CT ct, unsigned char **ap, int istype)
888 char c, buffer[BUFSIZ], *dp;
900 advise(NULL, "invalid comment in message %s's %s: field",
901 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
906 if ((c = *cp++) == '\0')
929 if ((dp = ci->ci_comment)) {
930 ci->ci_comment = concat(dp, " ", buffer, NULL);
933 ci->ci_comment = getcpy(buffer);
948 ** Handles content types audio, image, and video.
949 ** There's not much to do right here.
955 return OK; /* not much to do here */
968 char **ap, **ep, *cp;
971 CI ci = &ct->c_ctinfo;
973 /* check for missing subtype */
974 if (!*ci->ci_subtype)
975 ci->ci_subtype = add("plain", ci->ci_subtype);
978 for (kv = SubText; kv->kv_key; kv++)
979 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
981 ct->c_subtype = kv->kv_value;
983 /* allocate text character set structure */
984 if ((t = (struct text *) calloc(1, sizeof(*t))) == NULL)
985 adios(NULL, "out of memory");
986 ct->c_ctparams = (void *) t;
988 /* scan for charset parameter */
989 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
990 if (!mh_strcasecmp(*ap, "charset"))
993 /* check if content specified a character set */
995 /* match character set or set to CHARSET_UNKNOWN */
996 for (kv = Charset; kv->kv_key; kv++) {
997 if (!mh_strcasecmp(*ep, kv->kv_key)) {
1002 t->tx_charset = kv->kv_value;
1004 t->tx_charset = CHARSET_UNSPECIFIED;
1008 ** If we can not handle character set natively,
1009 ** then check profile for string to modify the
1010 ** terminal or display method.
1012 ** termproc is for mhshow, though mhlist -debug prints it, too.
1014 if (chset != NULL && !check_charset(chset, strlen(chset))) {
1015 snprintf(buffer, sizeof(buffer), "%s-charset-%s",
1017 if ((cp = context_find(buffer)))
1018 ct->c_termproc = getcpy(cp);
1030 InitMultiPart(CT ct)
1034 unsigned char *cp, *dp;
1036 char *bp, buffer[BUFSIZ];
1037 struct multipart *m;
1039 struct part *part, **next;
1040 CI ci = &ct->c_ctinfo;
1045 ** The encoding for multipart messages must be either
1046 ** 7bit, 8bit, or binary (per RFC2045).
1048 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1049 && ct->c_encoding != CE_BINARY) {
1050 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1055 for (kv = SubMultiPart; kv->kv_key; kv++)
1056 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1058 ct->c_subtype = kv->kv_value;
1061 ** Check for "boundary" parameter, which is
1062 ** required for multipart messages.
1065 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1066 if (!mh_strcasecmp(*ap, "boundary")) {
1072 /* complain if boundary parameter is missing */
1074 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1078 /* allocate primary structure for multipart info */
1079 if ((m = (struct multipart *) calloc(1, sizeof(*m))) == NULL)
1080 adios(NULL, "out of memory");
1081 ct->c_ctparams = (void *) m;
1083 /* check if boundary parameter contains only whitespace characters */
1084 for (cp = bp; isspace(*cp); cp++)
1087 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1091 /* remove trailing whitespace from boundary parameter */
1092 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1097 /* record boundary separators */
1098 m->mp_start = concat(bp, "\n", NULL);
1099 m->mp_stop = concat(bp, "--\n", NULL);
1101 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1102 advise(ct->c_file, "unable to open for reading");
1106 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1108 next = &m->mp_parts;
1112 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1116 pos += strlen(buffer);
1117 if (buffer[0] != '-' || buffer[1] != '-')
1120 if (strcmp(buffer + 2, m->mp_start)!=0)
1123 if ((part = (struct part *) calloc(1, sizeof(*part)))
1125 adios(NULL, "out of memory");
1127 next = &part->mp_next;
1129 if (!(p = get_content(fp, ct->c_file,
1130 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1137 fseek(fp, pos, SEEK_SET);
1140 if (strcmp(buffer + 2, m->mp_start) == 0) {
1144 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1145 if (p->c_end < p->c_begin)
1146 p->c_begin = p->c_end;
1151 if (strcmp(buffer + 2, m->mp_stop) == 0)
1157 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1158 if (!inout && part) {
1160 p->c_end = ct->c_end;
1162 if (p->c_begin >= p->c_end) {
1163 for (next = &m->mp_parts; *next != part;
1164 next = &((*next)->mp_next))
1168 free((char *) part);
1173 /* reverse the order of the parts for multipart/alternative */
1174 if (ct->c_subtype == MULTI_ALTERNATE)
1178 ** label all subparts with part number, and
1179 ** then initialize the content of the subpart.
1184 char partnam[BUFSIZ];
1187 snprintf(partnam, sizeof(partnam), "%s.",
1189 pp = partnam + strlen(partnam);
1194 for (part = m->mp_parts, partnum = 1; part;
1195 part = part->mp_next, partnum++) {
1198 sprintf(pp, "%d", partnum);
1199 p->c_partno = getcpy(partnam);
1201 /* initialize the content of the subparts */
1202 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1217 ** reverse the order of the parts of a multipart
1221 reverse_parts(CT ct)
1224 struct multipart *m;
1225 struct part **base, **bmp, **next, *part;
1227 m = (struct multipart *) ct->c_ctparams;
1229 /* if only one part, just return */
1230 if (!m->mp_parts || !m->mp_parts->mp_next)
1233 /* count number of parts */
1235 for (part = m->mp_parts; part; part = part->mp_next)
1238 /* allocate array of pointers to the parts */
1239 if (!(base = (struct part **) calloc((size_t) (i + 1), sizeof(*base))))
1240 adios(NULL, "out of memory");
1243 /* point at all the parts */
1244 for (part = m->mp_parts; part; part = part->mp_next)
1248 /* reverse the order of the parts */
1249 next = &m->mp_parts;
1250 for (bmp--; bmp >= base; bmp--) {
1253 next = &part->mp_next;
1257 /* free array of pointers */
1258 free((char *) base);
1270 CI ci = &ct->c_ctinfo;
1272 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1273 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1277 /* check for missing subtype */
1278 if (!*ci->ci_subtype)
1279 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1282 for (kv = SubMessage; kv->kv_key; kv++)
1283 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1285 ct->c_subtype = kv->kv_value;
1287 switch (ct->c_subtype) {
1288 case MESSAGE_RFC822:
1291 case MESSAGE_PARTIAL:
1296 if ((p = (struct partial *) calloc(1, sizeof(*p))) == NULL)
1297 adios(NULL, "out of memory");
1298 ct->c_ctparams = (void *) p;
1301 ** scan for parameters "id", "number",
1304 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1305 if (!mh_strcasecmp(*ap, "id")) {
1306 p->pm_partid = getcpy(*ep);
1309 if (!mh_strcasecmp(*ap, "number")) {
1310 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1312 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1317 if (!mh_strcasecmp(*ap, "total")) {
1318 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1325 if (!p->pm_partid || !p->pm_partno
1326 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1327 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1333 case MESSAGE_EXTERNAL:
1338 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1339 advise(ct->c_file, "unable to open for reading");
1343 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1345 if (!(p = get_content(fp, ct->c_file, 0))) {
1351 p->c_end = p->c_begin;
1356 switch (p->c_type) {
1361 if (p->c_subtype != MESSAGE_RFC822)
1366 (*p->c_ctinitfnx) (p);
1385 InitApplication(CT ct)
1388 CI ci = &ct->c_ctinfo;
1391 for (kv = SubApplication; kv->kv_key; kv++)
1392 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1394 ct->c_subtype = kv->kv_value;
1401 ** TRANSFER ENCODINGS
1405 init_encoding(CT ct, OpenCEFunc openfnx)
1409 if ((ce = (CE) calloc(1, sizeof(*ce))) == NULL)
1410 adios(NULL, "out of memory");
1413 ct->c_ceopenfnx = openfnx;
1414 ct->c_ceclosefnx = close_encoding;
1415 ct->c_cesizefnx = size_encoding;
1422 close_encoding(CT ct)
1426 if (!(ce = ct->c_cefile))
1436 static unsigned long
1437 size_encoding(CT ct)
1445 if (!(ce = ct->c_cefile))
1446 return (ct->c_end - ct->c_begin);
1448 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1449 return (long) st.st_size;
1452 if (stat(ce->ce_file, &st) != NOTOK)
1453 return (long) st.st_size;
1458 if (ct->c_encoding == CE_EXTERNAL)
1459 return (ct->c_end - ct->c_begin);
1462 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1463 return (ct->c_end - ct->c_begin);
1465 if (fstat(fd, &st) != NOTOK)
1466 size = (long) st.st_size;
1470 (*ct->c_ceclosefnx) (ct);
1479 static unsigned char b642nib[0x80] = {
1480 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1481 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1482 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1483 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1484 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1485 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1486 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1487 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1488 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1489 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1490 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1491 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1492 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1493 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1494 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1495 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1502 return init_encoding(ct, openBase64);
1507 openBase64(CT ct, char **file)
1510 int fd, len, skip, own_ct_fp = 0;
1512 unsigned char value, *b, *b1, *b2, *b3;
1513 unsigned char *cp, *ep;
1514 char buffer[BUFSIZ];
1515 /* sbeck -- handle suffixes */
1519 b = (unsigned char *) &bits;
1520 b1 = &b[endian > 0 ? 1 : 2];
1521 b2 = &b[endian > 0 ? 2 : 1];
1522 b3 = &b[endian > 0 ? 3 : 0];
1526 fseek(ce->ce_fp, 0L, SEEK_SET);
1531 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1532 content_error(ce->ce_file, ct,
1533 "unable to fopen for reading");
1539 if (*file == NULL) {
1540 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1543 ce->ce_file = getcpy(*file);
1547 /* sbeck@cise.ufl.edu -- handle suffixes */
1549 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1550 invo_name, ci->ci_type, ci->ci_subtype);
1551 cp = context_find(buffer);
1552 if (cp == NULL || *cp == '\0') {
1553 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1555 cp = context_find(buffer);
1557 if (cp != NULL && *cp != '\0') {
1558 if (ce->ce_unlink) {
1560 ** Temporary file already exists, so we rename to
1561 ** version with extension.
1563 char *file_org = strdup(ce->ce_file);
1564 ce->ce_file = add(cp, ce->ce_file);
1565 if (rename(file_org, ce->ce_file)) {
1566 adios(ce->ce_file, "unable to rename %s to ",
1572 ce->ce_file = add(cp, ce->ce_file);
1576 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1577 content_error(ce->ce_file, ct,
1578 "unable to fopen for reading/writing");
1582 if ((len = ct->c_end - ct->c_begin) < 0)
1583 adios(NULL, "internal error(1)");
1586 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1587 content_error(ct->c_file, ct,
1588 "unable to open for reading");
1598 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1600 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1602 content_error(ct->c_file, ct, "error reading from");
1606 content_error(NULL, ct, "premature eof");
1614 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1619 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1621 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1623 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1627 bits |= value << bitno;
1629 if ((bitno -= 6) < 0) {
1630 putc((char) *b1, ce->ce_fp);
1632 putc((char) *b2, ce->ce_fp);
1634 putc((char) *b3, ce->ce_fp);
1638 if (ferror(ce->ce_fp)) {
1639 content_error(ce->ce_file, ct,
1640 "error writing to");
1643 bitno = 18, bits = 0L, skip = 0;
1649 goto self_delimiting;
1658 fprintf(stderr, "premature ending (bitno %d)\n",
1661 content_error(NULL, ct, "invalid BASE64 encoding");
1666 fseek(ct->c_fp, 0L, SEEK_SET);
1668 if (fflush(ce->ce_fp)) {
1669 content_error(ce->ce_file, ct, "error writing to");
1673 fseek(ce->ce_fp, 0L, SEEK_SET);
1676 *file = ce->ce_file;
1681 return fileno(ce->ce_fp);
1684 free_encoding(ct, 0);
1697 static char hex2nib[0x80] = {
1698 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1699 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1700 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1701 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1702 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1703 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1704 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1705 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1706 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1707 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1708 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1709 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1710 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1711 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1712 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1713 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1720 return init_encoding(ct, openQuoted);
1725 openQuoted(CT ct, char **file)
1727 int cc, len, quoted, own_ct_fp = 0;
1728 unsigned char *cp, *ep;
1729 char buffer[BUFSIZ];
1730 unsigned char mask = 0;
1732 /* sbeck -- handle suffixes */
1737 fseek(ce->ce_fp, 0L, SEEK_SET);
1742 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1743 content_error(ce->ce_file, ct,
1744 "unable to fopen for reading");
1750 if (*file == NULL) {
1751 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1754 ce->ce_file = getcpy(*file);
1758 /* sbeck@cise.ufl.edu -- handle suffixes */
1760 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1761 invo_name, ci->ci_type, ci->ci_subtype);
1762 cp = context_find(buffer);
1763 if (cp == NULL || *cp == '\0') {
1764 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1766 cp = context_find(buffer);
1768 if (cp != NULL && *cp != '\0') {
1769 if (ce->ce_unlink) {
1770 // Temporary file already exists, so we rename to
1771 // version with extension.
1772 char *file_org = strdup(ce->ce_file);
1773 ce->ce_file = add(cp, ce->ce_file);
1774 if (rename(file_org, ce->ce_file)) {
1775 adios(ce->ce_file, "unable to rename %s to ",
1781 ce->ce_file = add(cp, ce->ce_file);
1785 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1786 content_error(ce->ce_file, ct,
1787 "unable to fopen for reading/writing");
1791 if ((len = ct->c_end - ct->c_begin) < 0)
1792 adios(NULL, "internal error(2)");
1795 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1796 content_error(ct->c_file, ct,
1797 "unable to open for reading");
1805 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1807 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1808 content_error(NULL, ct, "premature eof");
1812 if ((cc = strlen(buffer)) > len)
1816 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1821 for (; cp < ep; cp++) {
1823 /* in an escape sequence */
1825 /* at byte 1 of an escape sequence */
1826 mask = hex2nib[*cp & 0x7f];
1827 /* next is byte 2 */
1830 /* at byte 2 of an escape sequence */
1832 mask |= hex2nib[*cp & 0x7f];
1833 putc(mask, ce->ce_fp);
1834 if (ferror(ce->ce_fp)) {
1835 content_error(ce->ce_file, ct, "error writing to");
1839 ** finished escape sequence; next may
1840 ** be literal or a new escape sequence
1844 /* on to next byte */
1848 /* not in an escape sequence */
1851 ** starting an escape sequence,
1854 if (cp + 1 < ep && cp[1] == '\n') {
1855 /* "=\n" soft line break, eat the \n */
1859 if (cp + 1 >= ep || cp + 2 >= ep) {
1861 ** We don't have 2 bytes left,
1862 ** so this is an invalid escape
1863 ** sequence; just show the raw bytes
1866 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1868 ** Next 2 bytes are hex digits,
1869 ** making this a valid escape
1870 ** sequence; let's decode it (above).
1876 ** One or both of the next 2 is
1877 ** out of range, making this an
1878 ** invalid escape sequence; just
1879 ** show the raw bytes (below).
1884 /* Just show the raw byte. */
1885 putc(*cp, ce->ce_fp);
1886 if (ferror(ce->ce_fp)) {
1887 content_error(ce->ce_file, ct,
1888 "error writing to");
1894 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1898 fseek(ct->c_fp, 0L, SEEK_SET);
1900 if (fflush(ce->ce_fp)) {
1901 content_error(ce->ce_file, ct, "error writing to");
1905 fseek(ce->ce_fp, 0L, SEEK_SET);
1908 *file = ce->ce_file;
1913 return fileno(ce->ce_fp);
1916 free_encoding(ct, 0);
1932 if (init_encoding(ct, open7Bit) == NOTOK)
1935 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1941 open7Bit(CT ct, char **file)
1943 int cc, fd, len, own_ct_fp = 0;
1944 char buffer[BUFSIZ];
1945 /* sbeck -- handle suffixes */
1952 fseek(ce->ce_fp, 0L, SEEK_SET);
1957 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1958 content_error(ce->ce_file, ct,
1959 "unable to fopen for reading");
1965 if (*file == NULL) {
1966 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1969 ce->ce_file = getcpy(*file);
1973 /* sbeck@cise.ufl.edu -- handle suffixes */
1975 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1976 invo_name, ci->ci_type, ci->ci_subtype);
1977 cp = context_find(buffer);
1978 if (cp == NULL || *cp == '\0') {
1979 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1981 cp = context_find(buffer);
1983 if (cp != NULL && *cp != '\0') {
1984 if (ce->ce_unlink) {
1986 ** Temporary file already exists, so we rename to
1987 ** version with extension.
1989 char *file_org = strdup(ce->ce_file);
1990 ce->ce_file = add(cp, ce->ce_file);
1991 if (rename(file_org, ce->ce_file)) {
1992 adios(ce->ce_file, "unable to rename %s to ",
1998 ce->ce_file = add(cp, ce->ce_file);
2002 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
2003 content_error(ce->ce_file, ct,
2004 "unable to fopen for reading/writing");
2008 if (ct->c_type == CT_MULTIPART) {
2010 CI ci = &ct->c_ctinfo;
2013 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
2015 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
2016 strlen(ci->ci_subtype);
2017 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
2018 putc(';', ce->ce_fp);
2021 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
2024 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
2025 fputs("\n\t", ce->ce_fp);
2028 putc(' ', ce->ce_fp);
2031 fprintf(ce->ce_fp, "%s", buffer);
2035 if (ci->ci_comment) {
2036 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2038 fputs("\n\t", ce->ce_fp);
2041 putc(' ', ce->ce_fp);
2044 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2047 fprintf(ce->ce_fp, "\n");
2049 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2051 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2053 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2054 fprintf(ce->ce_fp, "\n");
2057 if ((len = ct->c_end - ct->c_begin) < 0)
2058 adios(NULL, "internal error(3)");
2061 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2062 content_error(ct->c_file, ct,
2063 "unable to open for reading");
2069 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2071 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2073 content_error(ct->c_file, ct, "error reading from");
2077 content_error(NULL, ct, "premature eof");
2085 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2086 if (ferror(ce->ce_fp)) {
2087 content_error(ce->ce_file, ct,
2088 "error writing to");
2093 fseek(ct->c_fp, 0L, SEEK_SET);
2095 if (fflush(ce->ce_fp)) {
2096 content_error(ce->ce_file, ct, "error writing to");
2100 fseek(ce->ce_fp, 0L, SEEK_SET);
2103 *file = ce->ce_file;
2108 return fileno(ce->ce_fp);
2111 free_encoding(ct, 0);