2 ** mhparse.c -- routines to parse the contents of MIME messages
4 ** This code is Copyright (c) 2002, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/mhparse.h>
24 extern int endian; /* mhmisc.c */
26 extern pid_t xpid; /* mhshowsbr.c */
29 ** Directory to place temp files. This must
30 ** be set before these routines are called.
35 ** Structures for TEXT messages
37 struct k2v SubText[] = {
38 { "plain", TEXT_PLAIN },
39 { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */
40 { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */
41 { NULL, TEXT_UNKNOWN } /* this one must be last! */
44 struct k2v Charset[] = {
45 { "us-ascii", CHARSET_USASCII },
46 { "iso-8859-1", CHARSET_LATIN },
47 { NULL, CHARSET_UNKNOWN } /* this one must be last! */
51 ** Structures for MULTIPART messages
53 struct k2v SubMultiPart[] = {
54 { "mixed", MULTI_MIXED },
55 { "alternative", MULTI_ALTERNATE },
56 { "digest", MULTI_DIGEST },
57 { "parallel", MULTI_PARALLEL },
58 { NULL, MULTI_UNKNOWN } /* this one must be last! */
62 ** Structures for MESSAGE messages
64 struct k2v SubMessage[] = {
65 { "rfc822", MESSAGE_RFC822 },
66 { "partial", MESSAGE_PARTIAL },
67 { "external-body", MESSAGE_EXTERNAL },
68 { NULL, MESSAGE_UNKNOWN } /* this one must be last! */
72 ** Structure for APPLICATION messages
74 struct k2v SubApplication[] = {
75 { "octet-stream", APPLICATION_OCTETS },
76 { "postscript", APPLICATION_POSTSCRIPT },
77 { NULL, APPLICATION_UNKNOWN } /* this one must be last! */
84 int make_intermediates(char *);
85 void content_error(char *, CT, char *, ...);
88 void free_content(CT);
89 void free_encoding(CT, int);
94 static CT get_content(FILE *, char *, int);
95 static int get_comment(CT, unsigned char **, int);
97 static int InitGeneric(CT);
98 static int InitText(CT);
99 static int InitMultiPart(CT);
100 static void reverse_parts(CT);
101 static int InitMessage(CT);
102 static int InitApplication(CT);
103 static int init_encoding(CT, OpenCEFunc);
104 static unsigned long size_encoding(CT);
105 static int InitBase64(CT);
106 static int openBase64(CT, char **);
107 static int InitQuoted(CT);
108 static int openQuoted(CT, char **);
109 static int Init7Bit(CT);
111 struct str2init str2cts[] = {
112 { "application", CT_APPLICATION, InitApplication },
113 { "audio", CT_AUDIO, InitGeneric },
114 { "image", CT_IMAGE, InitGeneric },
115 { "message", CT_MESSAGE, InitMessage },
116 { "multipart", CT_MULTIPART, InitMultiPart },
117 { "text", CT_TEXT, InitText },
118 { "video", CT_VIDEO, InitGeneric },
119 { NULL, CT_EXTENSION, NULL }, /* these two must be last! */
120 { NULL, CT_UNKNOWN, NULL },
123 struct str2init str2ces[] = {
124 { "base64", CE_BASE64, InitBase64 },
125 { "quoted-printable", CE_QUOTED, InitQuoted },
126 { "8bit", CE_8BIT, Init7Bit },
127 { "7bit", CE_7BIT, Init7Bit },
128 { "binary", CE_BINARY, Init7Bit },
129 { NULL, CE_EXTENSION, NULL }, /* these two must be last! */
130 { NULL, CE_UNKNOWN, NULL },
137 if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
148 ** Main entry point for parsing a MIME message or file.
149 ** It returns the Content structure for the top level
150 ** entity in the file.
153 parse_mime(char *file)
161 ** Check if file is actually standard input
163 if ((is_stdin = (strcmp(file, "-")==0))) {
164 char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
166 advise("mhparse", "unable to create temporary file");
169 file = getcpy(tfile);
172 while (fgets(buffer, sizeof(buffer), stdin))
178 advise("stdin", "error reading");
183 advise(file, "error writing");
186 fseek(fp, 0L, SEEK_SET);
187 } else if ((fp = fopen(file, "r")) == NULL) {
188 advise(file, "unable to read");
192 if (!(ct = get_content(fp, file, 1))) {
195 advise(NULL, "unable to decode %s", file);
200 ct->c_unlink = 1; /* temp file to remove */
204 if (ct->c_end == 0L) {
205 fseek(fp, 0L, SEEK_END);
206 ct->c_end = ftell(fp);
209 if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
221 ** Main routine for reading/parsing the headers
222 ** of a message content.
224 ** toplevel = 1 # we are at the top level of the message
225 ** toplevel = 0 # we are inside message type or multipart type
226 ** # other than multipart/digest
227 ** toplevel = -1 # we are inside multipart/digest
228 ** NB: on failure we will fclose(in)!
232 get_content(FILE *in, char *file, int toplevel)
235 struct field f = {{0}};
240 /* allocate the content structure */
241 if (!(ct = (CT) mh_xcalloc(1, sizeof(*ct))))
242 adios(EX_OSERR, NULL, "out of memory");
245 ct->c_file = getcpy(file);
246 ct->c_begin = ftell(ct->c_fp) + 1;
249 ** Parse the header fields for this
250 ** content into a linked list.
252 for (compnum = 1, state = FLD2;;) {
253 switch (state = m_getfld2(state, &f, in)) {
257 /* add the header data to the list */
258 add_header(ct, getcpy(f.name), getcpy(f.value));
260 ct->c_begin = ftell(in) + 1;
264 ct->c_begin = ftell(in) - strlen(f.value);
268 ct->c_begin = ftell(in);
274 adios(EX_DATAERR, NULL, "message format error in component #%d",
278 adios(EX_SOFTWARE, NULL, "getfld() returned %d", state);
284 ** Read the content headers. We will parse the
285 ** MIME related header fields into their various
286 ** structures and set internal flags related to
287 ** content type/subtype, etc.
290 hp = ct->c_first_hf; /* start at first header field */
292 /* Get MIME-Version field */
293 if (!mh_strcasecmp(hp->name, VRSN_FIELD)) {
296 unsigned char *cp, *dp;
299 advise(NULL, "message %s has multiple %s: fields", ct->c_file, VRSN_FIELD);
302 ct->c_vrsn = getcpy(hp->value);
304 /* Now, cleanup this field */
309 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
311 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
316 fprintf(stderr, "%s: %s\n", VRSN_FIELD, cp);
318 if (*cp == '(' && get_comment(ct, &cp, 0) == NOTOK)
321 for (dp = cp; istoken(*dp); dp++)
325 ucmp = !mh_strcasecmp(cp, VRSN_VALUE);
328 admonish(NULL, "message %s has unknown value for %s: field (%s)", ct->c_file, VRSN_FIELD, cp);
331 } else if (!mh_strcasecmp(hp->name, TYPE_FIELD)) {
332 /* Get Content-Type field */
333 struct str2init *s2i;
334 CI ci = &ct->c_ctinfo;
336 /* Check if we've already seen a Content-Type header */
338 advise(NULL, "message %s has multiple %s: fields", ct->c_file, TYPE_FIELD);
342 /* Parse the Content-Type field */
343 if (get_ctinfo(hp->value, ct, 0) == NOTOK)
347 ** Set the Init function and the internal
348 ** flag for this content type.
350 for (s2i = str2cts; s2i->si_key; s2i++)
351 if (!mh_strcasecmp(ci->ci_type, s2i->si_key))
353 if (!s2i->si_key && !uprf(ci->ci_type, "X-"))
355 ct->c_type = s2i->si_val;
356 ct->c_ctinitfnx = s2i->si_init;
358 } else if (!mh_strcasecmp(hp->name, ENCODING_FIELD)) {
359 /* Get Content-Transfer-Encoding field */
361 unsigned char *cp, *dp;
362 struct str2init *s2i;
365 ** Check if we've already seen the
366 ** Content-Transfer-Encoding field
369 advise(NULL, "message %s has multiple %s: fields", ct->c_file, ENCODING_FIELD);
373 /* get copy of this field */
374 ct->c_celine = cp = getcpy(hp->value);
378 for (dp = cp; istoken(*dp); dp++)
384 ** Find the internal flag and Init function
385 ** for this transfer encoding.
387 for (s2i = str2ces; s2i->si_key; s2i++)
388 if (!mh_strcasecmp(cp, s2i->si_key))
390 if (!s2i->si_key && !uprf(cp, "X-"))
393 ct->c_encoding = s2i->si_val;
395 /* Call the Init function for this encoding */
396 if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
399 } else if (!mh_strcasecmp(hp->name, ID_FIELD)) {
400 /* Get Content-ID field */
401 ct->c_id = add(hp->value, ct->c_id);
403 } else if (!mh_strcasecmp(hp->name, DESCR_FIELD)) {
404 /* Get Content-Description field */
405 ct->c_descr = add(hp->value, ct->c_descr);
407 } else if (!mh_strcasecmp(hp->name, DISPO_FIELD)) {
408 /* Get Content-Disposition field */
409 ct->c_dispo = add(hp->value, ct->c_dispo);
413 hp = hp->next; /* next header field */
417 ** Check if we saw a Content-Type field.
418 ** If not, then assign a default value for
419 ** it, and the Init function.
423 ** If we are inside a multipart/digest message,
424 ** so default type is message/rfc822
427 if (get_ctinfo("message/rfc822", ct, 0) == NOTOK)
429 ct->c_type = CT_MESSAGE;
430 ct->c_ctinitfnx = InitMessage;
433 ** Else default type is text/plain
435 if (get_ctinfo("text/plain", ct, 0) == NOTOK)
437 ct->c_type = CT_TEXT;
438 ct->c_ctinitfnx = InitText;
442 /* Use default Transfer-Encoding, if necessary */
444 ct->c_encoding = CE_7BIT;
457 ** small routine to add header field to list
461 add_header(CT ct, char *name, char *value)
465 /* allocate header field structure */
466 hp = mh_xmalloc(sizeof(*hp));
468 /* link data into header structure */
473 /* link header structure into the list */
474 if (ct->c_first_hf == NULL) {
475 ct->c_first_hf = hp; /* this is the first */
478 ct->c_last_hf->next = hp; /* add it to the end */
487 ** Make sure that buf contains at least one appearance of name,
488 ** followed by =. If not, insert both name and value, just after
489 ** first semicolon, if any. Note that name should not contain a
490 ** trailing =. And quotes will be added around the value. Typical
491 ** usage: make sure that a Content-Disposition header contains
492 ** filename="foo". If it doesn't and value does, use value from
496 incl_name_value(unsigned char *buf, char *name, char *value) {
499 /* Assume that name is non-null. */
501 char *name_plus_equal = concat(name, "=", NULL);
503 if (!strstr(buf, name_plus_equal)) {
506 char *prefix, *suffix;
508 /* Trim trailing space, esp. newline. */
509 for (cp = &buf[strlen(buf) - 1];
510 cp >= buf && isspace(*cp); --cp) {
514 insertion = concat("; ", name, "=", "\"", value, "\"",
518 ** Insert at first semicolon, if any.
519 ** If none, append to end.
521 prefix = getcpy(buf);
522 if ((cp = strchr(prefix, ';'))) {
523 suffix = concat(cp, NULL);
525 newbuf = concat(prefix, insertion, suffix,
530 newbuf = concat(buf, insertion, "\n", NULL);
538 free(name_plus_equal);
545 ** Extract just name_suffix="foo", if any, from value. If there isn't
546 ** one, return the entire value. Note that, for example, a name_suffix
547 ** of name will match filename="foo", and return foo.
550 extract_name_value(char *name_suffix, char *value) {
551 char *extracted_name_value = value;
552 char *name_suffix_plus_quote = concat(name_suffix, "=\"", NULL);
553 char *name_suffix_equals = strstr(value, name_suffix_plus_quote);
556 free(name_suffix_plus_quote);
557 if (name_suffix_equals) {
558 char *name_suffix_begin;
561 for (cp = name_suffix_equals; *cp != '"'; ++cp)
563 name_suffix_begin = ++cp;
564 /* Find second \". */
565 for (; *cp != '"'; ++cp)
568 extracted_name_value = mh_xmalloc(cp - name_suffix_begin + 1);
569 memcpy(extracted_name_value, name_suffix_begin,
570 cp - name_suffix_begin);
571 extracted_name_value[cp - name_suffix_begin] = '\0';
574 return extracted_name_value;
578 ** Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
579 ** directives. Fills in the information of the CTinfo structure.
582 get_ctinfo(unsigned char *cp, CT ct, int magic)
591 i = strlen(invo_name) + 2;
593 /* store copy of Content-Type line */
594 cp = ct->c_ctline = getcpy(cp);
596 while (isspace(*cp)) /* trim leading spaces */
599 /* change newlines to spaces */
600 for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
603 /* trim trailing spaces */
604 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
610 fprintf(stderr, "%s: %s\n", TYPE_FIELD, cp);
612 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
615 for (dp = cp; istoken(*dp); dp++)
618 ci->ci_type = getcpy(cp); /* store content type */
622 advise(NULL, "invalid %s: field in message %s (empty type)",
623 TYPE_FIELD, ct->c_file);
627 /* down case the content type string */
628 for (dp = ci->ci_type; *dp; dp++)
629 if (isalpha(*dp) && isupper(*dp))
635 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
640 ci->ci_subtype = getcpy("");
648 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
651 for (dp = cp; istoken(*dp); dp++)
654 ci->ci_subtype = getcpy(cp); /* store the content subtype */
657 if (!*ci->ci_subtype) {
658 advise(NULL, "invalid %s: field in message %s (empty subtype for \"%s\")", TYPE_FIELD, ct->c_file, ci->ci_type);
662 /* down case the content subtype string */
663 for (dp = ci->ci_subtype; *dp; dp++)
664 if (isalpha(*dp) && isupper(*dp))
671 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
675 ** Parse attribute/value pairs given with Content-Type
677 ep = (ap = ci->ci_attrs) + NPARMS;
683 advise(NULL, "too many parameters in message %s's %s: field (%d max)", ct->c_file, TYPE_FIELD, NPARMS);
691 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
695 advise (NULL, "extraneous trailing ';' in message %s's %s: parameter list", ct->c_file, TYPE_FIELD);
699 /* down case the attribute name */
700 for (dp = cp; istoken(*dp); dp++)
701 if (isalpha(*dp) && isupper(*dp))
704 for (up = dp; isspace(*dp);)
706 if (dp == cp || *dp != '=') {
707 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*sparameter %s (error detected at offset %d)", ct->c_file, TYPE_FIELD, i, i, "", cp, dp - cp);
711 vp = (*ap = getcpy(cp)) + (up - cp);
713 for (dp++; isspace(*dp);)
716 /* now add the attribute value */
717 ci->ci_values[ap - ci->ci_attrs] = vp = *ap + (dp - cp);
720 for (cp = ++dp, dp = vp;;) {
724 advise(NULL, "invalid quoted-string in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
729 if ((c = *cp++) == '\0')
744 for (cp = dp, dp = vp; istoken(*cp); cp++, dp++)
749 advise(NULL, "invalid parameter in message %s's %s: field\n%*.*s(parameter %s)", ct->c_file, TYPE_FIELD, i, i, "", *ap);
750 *ci->ci_values[ap - ci->ci_attrs] = '\0';
751 *ci->ci_attrs[ap - ci->ci_attrs] = '\0';
759 if (*cp == '(' && get_comment(ct, &cp, 1) == NOTOK)
764 ** Get any <Content-Id> given in buffer
766 if (magic && *cp == '<') {
771 if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
772 advise(NULL, "invalid ID in message %s", ct->c_file);
778 ct->c_id = concat("<", ct->c_id, ">\n", NULL);
789 ** Get any [Content-Description] given in buffer.
791 if (magic && *cp == '[') {
793 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
797 advise(NULL, "invalid description in message %s",
806 ct->c_descr = concat(ct->c_descr, "\n", NULL);
817 ** Get any {Content-Disposition} given in buffer.
819 if (magic && *cp == '{') {
821 for (dp = cp + strlen(cp) - 1; dp >= cp; dp--)
825 advise(NULL, "invalid disposition in message %s",
834 ct->c_dispo = concat(ct->c_dispo, "\n", NULL);
845 ** Check if anything is left over
849 ci->ci_magic = getcpy(cp);
852 ** If there is a Content-Disposition header and
853 ** it doesn't have a *filename=, extract it from
854 ** the magic contents. The mhbasename call skips
855 ** any leading directory components.
858 ct->c_dispo = incl_name_value(ct->c_dispo, "filename", mhbasename(extract_name_value("name", ci->ci_magic)));
860 advise(NULL, "extraneous information in message %s's %s: field\n%*.*s(%s)", ct->c_file, TYPE_FIELD, i, i, "", cp);
868 get_comment(CT ct, unsigned char **ap, int istype)
873 char c, buffer[BUFSIZ], *dp;
885 advise(NULL, "invalid comment in message %s's %s: field",
886 ct->c_file, istype ? TYPE_FIELD : VRSN_FIELD);
891 if ((c = *cp++) == '\0')
914 if ((dp = ci->ci_comment)) {
915 ci->ci_comment = concat(dp, " ", buffer, NULL);
918 ci->ci_comment = getcpy(buffer);
933 ** Handles content types audio, image, and video.
934 ** There's not much to do right here.
940 return OK; /* not much to do here */
954 CI ci = &ct->c_ctinfo;
956 /* check for missing subtype */
957 if (!*ci->ci_subtype)
958 ci->ci_subtype = add("plain", ci->ci_subtype);
961 for (kv = SubText; kv->kv_key; kv++)
962 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
964 ct->c_subtype = kv->kv_value;
966 /* allocate text character set structure */
967 if ((t = (struct text *) mh_xcalloc(1, sizeof(*t))) == NULL)
968 adios(EX_OSERR, NULL, "out of memory");
969 ct->c_ctparams = (void *) t;
971 /* scan for charset parameter */
972 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
973 if (!mh_strcasecmp(*ap, "charset"))
976 /* check if content specified a character set */
979 ct->c_charset = getcpy(norm_charmap(*ep));
980 /* match character set or set to CHARSET_UNKNOWN */
981 for (kv = Charset; kv->kv_key; kv++) {
982 if (!mh_strcasecmp(*ep, kv->kv_key)) {
986 t->tx_charset = kv->kv_value;
988 t->tx_charset = CHARSET_UNSPECIFIED;
1000 InitMultiPart(CT ct)
1004 unsigned char *cp, *dp;
1006 char *bp, buffer[BUFSIZ];
1007 struct multipart *m;
1009 struct part *part, **next;
1010 CI ci = &ct->c_ctinfo;
1015 ** The encoding for multipart messages must be either
1016 ** 7bit, 8bit, or binary (per RFC2045).
1018 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
1019 && ct->c_encoding != CE_BINARY) {
1020 admonish(NULL, "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary", ci->ci_type, ci->ci_subtype, ct->c_file);
1021 ct->c_encoding = CE_7BIT;
1025 for (kv = SubMultiPart; kv->kv_key; kv++)
1026 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1028 ct->c_subtype = kv->kv_value;
1031 ** Check for "boundary" parameter, which is
1032 ** required for multipart messages.
1035 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1036 if (!mh_strcasecmp(*ap, "boundary")) {
1042 /* complain if boundary parameter is missing */
1044 advise (NULL, "a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1048 /* allocate primary structure for multipart info */
1049 if ((m = (struct multipart *) mh_xcalloc(1, sizeof(*m))) == NULL)
1050 adios(EX_OSERR, NULL, "out of memory");
1051 ct->c_ctparams = (void *) m;
1053 /* check if boundary parameter contains only whitespace characters */
1054 for (cp = bp; isspace(*cp); cp++)
1057 advise(NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1061 /* remove trailing whitespace from boundary parameter */
1062 for (cp = bp, dp = cp + strlen(cp) - 1; dp > cp; dp--)
1067 /* record boundary separators */
1068 m->mp_start = concat(bp, "\n", NULL);
1069 m->mp_stop = concat(bp, "--\n", NULL);
1071 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1072 advise(ct->c_file, "unable to open for reading");
1076 fseek(fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1078 next = &m->mp_parts;
1082 while (fgets(buffer, sizeof(buffer) - 1, fp)) {
1086 pos += strlen(buffer);
1087 if (buffer[0] != '-' || buffer[1] != '-')
1090 if (strcmp(buffer + 2, m->mp_start)!=0)
1093 if ((part = (struct part *) mh_xcalloc(1, sizeof(*part)))
1095 adios(EX_OSERR, NULL, "out of memory");
1097 next = &part->mp_next;
1099 if (!(p = get_content(fp, ct->c_file,
1100 ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1107 fseek(fp, pos, SEEK_SET);
1110 if (strcmp(buffer + 2, m->mp_start) == 0) {
1114 p->c_end = ftell(fp) - (strlen(buffer) + 1);
1115 if (p->c_end < p->c_begin)
1116 p->c_begin = p->c_end;
1121 if (strcmp(buffer + 2, m->mp_stop) == 0)
1127 advise(NULL, "bogus multipart content in message %s", ct->c_file);
1128 if (!inout && part) {
1130 p->c_end = ct->c_end;
1132 if (p->c_begin >= p->c_end) {
1133 for (next = &m->mp_parts; *next != part;
1134 next = &((*next)->mp_next))
1138 free((char *) part);
1143 /* reverse the order of the parts for multipart/alternative */
1144 if (ct->c_subtype == MULTI_ALTERNATE)
1148 ** label all subparts with part number, and
1149 ** then initialize the content of the subpart.
1154 char partnam[BUFSIZ];
1157 snprintf(partnam, sizeof(partnam), "%s.",
1159 pp = partnam + strlen(partnam);
1164 for (part = m->mp_parts, partnum = 1; part;
1165 part = part->mp_next, partnum++) {
1168 sprintf(pp, "%d", partnum);
1169 p->c_partno = getcpy(partnam);
1171 /* initialize the content of the subparts */
1172 if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1187 ** reverse the order of the parts of a multipart
1191 reverse_parts(CT ct)
1194 struct multipart *m;
1195 struct part **base, **bmp, **next, *part;
1197 m = (struct multipart *) ct->c_ctparams;
1199 /* if only one part, just return */
1200 if (!m->mp_parts || !m->mp_parts->mp_next)
1203 /* count number of parts */
1205 for (part = m->mp_parts; part; part = part->mp_next)
1208 /* allocate array of pointers to the parts */
1209 if (!(base = (struct part **) mh_xcalloc((size_t) (i + 1), sizeof(*base))))
1210 adios(EX_OSERR, NULL, "out of memory");
1213 /* point at all the parts */
1214 for (part = m->mp_parts; part; part = part->mp_next)
1218 /* reverse the order of the parts */
1219 next = &m->mp_parts;
1220 for (bmp--; bmp >= base; bmp--) {
1223 next = &part->mp_next;
1227 /* free array of pointers */
1228 free((char *) base);
1240 CI ci = &ct->c_ctinfo;
1242 if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1243 admonish(NULL, "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", ci->ci_type, ci->ci_subtype, ct->c_file);
1247 /* check for missing subtype */
1248 if (!*ci->ci_subtype)
1249 ci->ci_subtype = add("rfc822", ci->ci_subtype);
1252 for (kv = SubMessage; kv->kv_key; kv++)
1253 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1255 ct->c_subtype = kv->kv_value;
1257 switch (ct->c_subtype) {
1258 case MESSAGE_RFC822:
1261 case MESSAGE_PARTIAL:
1266 if ((p = (struct partial *) mh_xcalloc(1, sizeof(*p))) == NULL)
1267 adios(EX_OSERR, NULL, "out of memory");
1268 ct->c_ctparams = (void *) p;
1271 ** scan for parameters "id", "number",
1274 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1275 if (!mh_strcasecmp(*ap, "id")) {
1276 p->pm_partid = getcpy(*ep);
1279 if (!mh_strcasecmp(*ap, "number")) {
1280 if (sscanf(*ep, "%d", &p->pm_partno) != 1 || p->pm_partno < 1) {
1282 advise(NULL, "invalid %s parameter for \"%s/%s\" type in message %s's %s field", *ap, ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1287 if (!mh_strcasecmp(*ap, "total")) {
1288 if (sscanf(*ep, "%d", &p->pm_maxno) != 1 ||
1295 if (!p->pm_partid || !p->pm_partno
1296 || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1297 advise(NULL, "invalid parameters for \"%s/%s\" type in message %s's %s field", ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1303 case MESSAGE_EXTERNAL:
1308 if (!ct->c_fp && (ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1309 advise(ct->c_file, "unable to open for reading");
1313 fseek(fp = ct->c_fp, ct->c_begin, SEEK_SET);
1315 if (!(p = get_content(fp, ct->c_file, 0))) {
1321 p->c_end = p->c_begin;
1326 switch (p->c_type) {
1331 if (p->c_subtype != MESSAGE_RFC822)
1336 (*p->c_ctinitfnx) (p);
1355 InitApplication(CT ct)
1358 CI ci = &ct->c_ctinfo;
1361 for (kv = SubApplication; kv->kv_key; kv++)
1362 if (!mh_strcasecmp(ci->ci_subtype, kv->kv_key))
1364 ct->c_subtype = kv->kv_value;
1371 ** TRANSFER ENCODINGS
1375 init_encoding(CT ct, OpenCEFunc openfnx)
1379 if ((ce = (CE) mh_xcalloc(1, sizeof(*ce))) == NULL)
1380 adios(EX_OSERR, NULL, "out of memory");
1383 ct->c_ceopenfnx = openfnx;
1384 ct->c_ceclosefnx = close_encoding;
1385 ct->c_cesizefnx = size_encoding;
1392 close_encoding(CT ct)
1396 if (!(ce = ct->c_cefile))
1406 static unsigned long
1407 size_encoding(CT ct)
1415 if (!(ce = ct->c_cefile))
1416 return (ct->c_end - ct->c_begin);
1418 if (ce->ce_fp && fstat(fileno(ce->ce_fp), &st) != NOTOK)
1419 return (long) st.st_size;
1422 if (stat(ce->ce_file, &st) != NOTOK)
1423 return (long) st.st_size;
1428 if (ct->c_encoding == CE_EXTERNAL)
1429 return (ct->c_end - ct->c_begin);
1432 if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1433 return (ct->c_end - ct->c_begin);
1435 if (fstat(fd, &st) != NOTOK)
1436 size = (long) st.st_size;
1440 (*ct->c_ceclosefnx) (ct);
1449 static unsigned char b642nib[0x80] = {
1450 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1453 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1454 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1455 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
1456 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
1457 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1458 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
1459 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
1460 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1461 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
1462 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
1463 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
1464 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
1465 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
1472 return init_encoding(ct, openBase64);
1477 openBase64(CT ct, char **file)
1480 int fd, len, skip, own_ct_fp = 0;
1482 unsigned char value, *b, *b1, *b2, *b3;
1483 unsigned char *cp, *ep;
1484 char buffer[BUFSIZ];
1485 /* sbeck -- handle suffixes */
1489 b = (unsigned char *) &bits;
1490 b1 = &b[endian > 0 ? 1 : 2];
1491 b2 = &b[endian > 0 ? 2 : 1];
1492 b3 = &b[endian > 0 ? 3 : 0];
1496 fseek(ce->ce_fp, 0L, SEEK_SET);
1501 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1502 content_error(ce->ce_file, ct,
1503 "unable to fopen for reading");
1509 if (*file == NULL) {
1510 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1513 ce->ce_file = getcpy(*file);
1517 /* sbeck@cise.ufl.edu -- handle suffixes */
1519 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1520 invo_name, ci->ci_type, ci->ci_subtype);
1521 cp = context_find(buffer);
1522 if (cp == NULL || *cp == '\0') {
1523 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1525 cp = context_find(buffer);
1527 if (cp != NULL && *cp != '\0') {
1528 if (ce->ce_unlink) {
1530 ** Temporary file already exists, so we rename to
1531 ** version with extension.
1533 char *file_org = strdup(ce->ce_file);
1534 ce->ce_file = add(cp, ce->ce_file);
1535 if (rename(file_org, ce->ce_file)) {
1536 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1542 ce->ce_file = add(cp, ce->ce_file);
1546 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1547 content_error(ce->ce_file, ct,
1548 "unable to fopen for reading/writing");
1552 if ((len = ct->c_end - ct->c_begin) < 0)
1553 adios(EX_SOFTWARE, NULL, "internal error(1)");
1556 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1557 content_error(ct->c_file, ct,
1558 "unable to open for reading");
1568 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1570 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
1572 content_error(ct->c_file, ct, "error reading from");
1576 content_error(NULL, ct, "premature eof");
1584 for (ep = (cp = buffer) + cc; cp < ep; cp++) {
1589 if (skip || (*cp & 0x80) || (value = b642nib[*cp & 0x7f]) > 0x3f) {
1591 fprintf(stderr, "*cp=0x%x pos=%ld skip=%d\n", *cp, (long) (lseek(fd, (off_t) 0, SEEK_CUR) - (ep - cp)), skip);
1593 content_error(NULL, ct, "invalid BASE64 encoding -- continuing");
1597 bits |= value << bitno;
1599 if ((bitno -= 6) < 0) {
1600 putc((char) *b1, ce->ce_fp);
1602 putc((char) *b2, ce->ce_fp);
1604 putc((char) *b3, ce->ce_fp);
1608 if (ferror(ce->ce_fp)) {
1609 content_error(ce->ce_file, ct,
1610 "error writing to");
1613 bitno = 18, bits = 0L, skip = 0;
1619 goto self_delimiting;
1628 fprintf(stderr, "premature ending (bitno %d)\n",
1631 content_error(NULL, ct, "invalid BASE64 encoding");
1636 fseek(ct->c_fp, 0L, SEEK_SET);
1638 if (fflush(ce->ce_fp)) {
1639 content_error(ce->ce_file, ct, "error writing to");
1643 fseek(ce->ce_fp, 0L, SEEK_SET);
1646 *file = ce->ce_file;
1651 return fileno(ce->ce_fp);
1654 free_encoding(ct, 0);
1667 static char hex2nib[0x80] = {
1668 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1671 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1672 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1674 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1675 0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1676 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1677 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1678 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1680 0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1690 return init_encoding(ct, openQuoted);
1695 openQuoted(CT ct, char **file)
1697 int cc, len, quoted, own_ct_fp = 0;
1698 unsigned char *cp, *ep;
1699 char buffer[BUFSIZ];
1700 unsigned char mask = 0;
1702 /* sbeck -- handle suffixes */
1707 fseek(ce->ce_fp, 0L, SEEK_SET);
1712 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1713 content_error(ce->ce_file, ct,
1714 "unable to fopen for reading");
1720 if (*file == NULL) {
1721 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1724 ce->ce_file = getcpy(*file);
1728 /* sbeck@cise.ufl.edu -- handle suffixes */
1730 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1731 invo_name, ci->ci_type, ci->ci_subtype);
1732 cp = context_find(buffer);
1733 if (cp == NULL || *cp == '\0') {
1734 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1736 cp = context_find(buffer);
1738 if (cp != NULL && *cp != '\0') {
1739 if (ce->ce_unlink) {
1741 ** Temporary file already exists, so we rename to
1742 ** version with extension.
1744 char *file_org = strdup(ce->ce_file);
1745 ce->ce_file = add(cp, ce->ce_file);
1746 if (rename(file_org, ce->ce_file)) {
1747 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1753 ce->ce_file = add(cp, ce->ce_file);
1757 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1758 content_error(ce->ce_file, ct,
1759 "unable to fopen for reading/writing");
1763 if ((len = ct->c_end - ct->c_begin) < 0)
1764 adios(EX_SOFTWARE, NULL, "internal error(2)");
1767 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
1768 content_error(ct->c_file, ct,
1769 "unable to open for reading");
1777 fseek(ct->c_fp, ct->c_begin, SEEK_SET);
1779 if (fgets(buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) {
1780 content_error(NULL, ct, "premature eof");
1784 if ((cc = strlen(buffer)) > len)
1788 for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
1793 for (; cp < ep; cp++) {
1795 /* in an escape sequence */
1797 /* at byte 1 of an escape sequence */
1798 mask = hex2nib[*cp & 0x7f];
1799 /* next is byte 2 */
1802 /* at byte 2 of an escape sequence */
1804 mask |= hex2nib[*cp & 0x7f];
1805 putc(mask, ce->ce_fp);
1806 if (ferror(ce->ce_fp)) {
1807 content_error(ce->ce_file, ct, "error writing to");
1811 ** finished escape sequence; next may
1812 ** be literal or a new escape sequence
1816 /* on to next byte */
1820 /* not in an escape sequence */
1823 ** starting an escape sequence,
1826 if (cp + 1 < ep && cp[1] == '\n') {
1827 /* "=\n" soft line break, eat the \n */
1831 if (cp + 1 >= ep || cp + 2 >= ep) {
1833 ** We don't have 2 bytes left,
1834 ** so this is an invalid escape
1835 ** sequence; just show the raw bytes
1838 } else if (isxdigit(cp[1]) && isxdigit(cp[2])) {
1840 ** Next 2 bytes are hex digits,
1841 ** making this a valid escape
1842 ** sequence; let's decode it (above).
1848 ** One or both of the next 2 is
1849 ** out of range, making this an
1850 ** invalid escape sequence; just
1851 ** show the raw bytes (below).
1856 /* Just show the raw byte. */
1857 putc(*cp, ce->ce_fp);
1858 if (ferror(ce->ce_fp)) {
1859 content_error(ce->ce_file, ct,
1860 "error writing to");
1866 content_error(NULL, ct, "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
1870 fseek(ct->c_fp, 0L, SEEK_SET);
1872 if (fflush(ce->ce_fp)) {
1873 content_error(ce->ce_file, ct, "error writing to");
1877 fseek(ce->ce_fp, 0L, SEEK_SET);
1880 *file = ce->ce_file;
1885 return fileno(ce->ce_fp);
1888 free_encoding(ct, 0);
1904 if (init_encoding(ct, open7Bit) == NOTOK)
1907 ct->c_cesizefnx = NULL; /* no need to decode for real size */
1913 open7Bit(CT ct, char **file)
1915 int cc, fd, len, own_ct_fp = 0;
1916 char buffer[BUFSIZ];
1917 /* sbeck -- handle suffixes */
1924 fseek(ce->ce_fp, 0L, SEEK_SET);
1929 if ((ce->ce_fp = fopen(ce->ce_file, "r")) == NULL) {
1930 content_error(ce->ce_file, ct,
1931 "unable to fopen for reading");
1937 if (*file == NULL) {
1938 ce->ce_file = getcpy(m_mktemp(tmp, NULL, NULL));
1941 ce->ce_file = getcpy(*file);
1945 /* sbeck@cise.ufl.edu -- handle suffixes */
1947 snprintf(buffer, sizeof(buffer), "%s-suffix-%s/%s",
1948 invo_name, ci->ci_type, ci->ci_subtype);
1949 cp = context_find(buffer);
1950 if (cp == NULL || *cp == '\0') {
1951 snprintf(buffer, sizeof(buffer), "%s-suffix-%s", invo_name,
1953 cp = context_find(buffer);
1955 if (cp != NULL && *cp != '\0') {
1956 if (ce->ce_unlink) {
1958 ** Temporary file already exists, so we rename to
1959 ** version with extension.
1961 char *file_org = strdup(ce->ce_file);
1962 ce->ce_file = add(cp, ce->ce_file);
1963 if (rename(file_org, ce->ce_file)) {
1964 adios(EX_IOERR, ce->ce_file, "unable to rename %s to ",
1970 ce->ce_file = add(cp, ce->ce_file);
1974 if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
1975 content_error(ce->ce_file, ct,
1976 "unable to fopen for reading/writing");
1980 if (ct->c_type == CT_MULTIPART) {
1982 CI ci = &ct->c_ctinfo;
1985 fprintf(ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type,
1987 len += strlen(TYPE_FIELD) + 2 + strlen(ci->ci_type) + 1 +
1988 strlen(ci->ci_subtype);
1989 for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) {
1990 putc(';', ce->ce_fp);
1993 snprintf(buffer, sizeof(buffer), "%s=\"%s\"",
1996 if (len + 1 + (cc = strlen(buffer)) >= CPERLIN) {
1997 fputs("\n\t", ce->ce_fp);
2000 putc(' ', ce->ce_fp);
2003 fprintf(ce->ce_fp, "%s", buffer);
2007 if (ci->ci_comment) {
2008 if (len + 1 + (cc = 2 + strlen(ci->ci_comment))
2010 fputs("\n\t", ce->ce_fp);
2013 putc(' ', ce->ce_fp);
2016 fprintf(ce->ce_fp, "(%s)", ci->ci_comment);
2019 fprintf(ce->ce_fp, "\n");
2021 fprintf(ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2023 fprintf(ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2025 fprintf(ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2026 fprintf(ce->ce_fp, "\n");
2029 if ((len = ct->c_end - ct->c_begin) < 0)
2030 adios(EX_SOFTWARE, NULL, "internal error(3)");
2033 if ((ct->c_fp = fopen(ct->c_file, "r")) == NULL) {
2034 content_error(ct->c_file, ct,
2035 "unable to open for reading");
2041 lseek(fd = fileno(ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2043 switch (cc = read(fd, buffer, sizeof(buffer) - 1)) {
2045 content_error(ct->c_file, ct, "error reading from");
2049 content_error(NULL, ct, "premature eof");
2057 fwrite(buffer, sizeof(*buffer), cc, ce->ce_fp);
2058 if (ferror(ce->ce_fp)) {
2059 content_error(ce->ce_file, ct,
2060 "error writing to");
2065 fseek(ct->c_fp, 0L, SEEK_SET);
2067 if (fflush(ce->ce_fp)) {
2068 content_error(ce->ce_file, ct, "error writing to");
2072 fseek(ce->ce_fp, 0L, SEEK_SET);
2075 *file = ce->ce_file;
2080 return fileno(ce->ce_fp);
2083 free_encoding(ct, 0);