Merge
authormarkus schnalke <meillo@marmaro.de>
Wed, 21 Oct 2015 15:39:15 +0000 (17:39 +0200)
committermarkus schnalke <meillo@marmaro.de>
Wed, 21 Oct 2015 15:39:15 +0000 (17:39 +0200)
13 files changed:
etc/mhl.format
etc/mhl.headers
h/prototypes.h
man/mhbuild.man1
sbr/Makefile.in
sbr/encode_rfc2047.c [new file with mode: 0644]
sbr/fmt_scan.c
sbr/unquote.c [new file with mode: 0644]
test/tests/mhbuild/test-header-encode [new file with mode: 0644]
test/tests/pick/test-rfc2047 [new file with mode: 0644]
uip/mhbuild.c
uip/pick.c
uip/sendfiles.sh

index 9636dbb..60904de 100644 (file)
@@ -8,8 +8,8 @@ leftadjust,compwidth=9
 ignores=msgid,message-id,received,content-type,content-transfer-encoding,content-id
 Date:formatfield="%<(nodate{text})%{text}%|%(pretty{text})%>"
 From:decode
-To:
-Cc:
+To:decode
+Cc:decode
 Subject:decode
 :
 extras:nocomponent
index 1a01a7f..a398837 100644 (file)
@@ -8,7 +8,7 @@ overflowtext="***",overflowoffset=5
 leftadjust,compwidth=9
 Date:formatfield="%<(nodate{text})%{text}%|%(pretty{text})%>"
 From:decode
-To:
-Cc:
+To:decode
+Cc:decode
 Subject:decode
 :
index 848115f..8575b22 100644 (file)
@@ -39,6 +39,7 @@ void cpydata(int, int, char *, char *);
 void cpydgst(int, int, char *, char *);
 int decode_rfc2047(char *, char *, size_t);
 int default_done(int);
+int encode_rfc2047(const char *name, char **value, const char *charset);
 int execprog(char *, char **);
 int execprogl(char *, char *, ...);
 char *expandfol(char *);
@@ -73,6 +74,7 @@ char *m_mktemp(const char *, int *, FILE **);
 char *m_mktemp2(const char *, const char *, int *, FILE **);
 void thisisanmbox(FILE *);
 int makedir(char *);
+int mh_strcasecmp(const char *s1, const char *s2);
 char *norm_charmap(char *);
 char *new_fs(char *, char *);
 int pidwait(pid_t, int);
@@ -108,11 +110,12 @@ char *toabsdir(char *);
 char *trim(unsigned char *);
 char *trimcpy(unsigned char *);
 int unputenv(char *);
+void unquote_string(const char *input, char *output);
 int uprf(char *, char *);
 int vfgets(FILE *, char **);
 char *write_charset_8bit(void);
 
-int mh_strcasecmp(const char *s1, const char *s2);
+
 
 
 /*
@@ -160,3 +163,4 @@ int is_readonly(struct msgs *);
 void set_readonly(struct msgs *);
 int other_files(struct msgs *);
 void set_other_files(struct msgs *);
+
index 62d01be..98fb39d 100644 (file)
@@ -24,7 +24,7 @@ creates multi-media messages as specified in RFC\-2045
 thru RFC\-2049.  Currently
 .B mhbuild
 only supports encodings in
-message bodies, and does not support the encoding of message headers as
+message bodies, and does support the encoding of message headers as
 specified in RFC\-2047.
 .PP
 If you specify the name of the composition file as `-',
index 61fc96c..304b50b 100644 (file)
@@ -69,7 +69,8 @@ SRCS = addrsbr.c ambigsw.c brkstring.c  \
        seq_setprev.c seq_setunseen.c signals.c  \
        smatch.c snprintb.c strcasecmp.c  \
        strindex.c trim.c trimcpy.c uprf.c vfgets.c fmt_def.c  \
-       mf.c utils.c m_mktemp.c seq_msgstats.c
+       mf.c utils.c m_mktemp.c seq_msgstats.c \
+       unquote.c encode_rfc2047.c
 
 OBJS =  $(SRCS:.c=.o)
 
diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c
new file mode 100644 (file)
index 0000000..852c261
--- /dev/null
@@ -0,0 +1,672 @@
+/*
+** Routines to encode message headers using RFC 2047-encoding.
+**
+** This code is Copyright (c) 2002, by the authors of nmh.  See the
+** COPYRIGHT file in the root directory of the nmh distribution for
+** complete copyright information.
+*/
+
+#include <h/mh.h>
+#include <h/mhparse.h>
+#include <h/addrsbr.h>
+#include <h/utils.h>
+
+#include <ctype.h>
+
+/*
+** List of headers that contain addresses and as a result require special
+** handling
+*/
+
+static char *address_headers[] = {
+       "To",
+       "From",
+       "cc",
+       "Bcc",
+       "Reply-To",
+       "Sender",
+       "Resent-To",
+       "Resent-From",
+       "Resent-cc",
+       "Resent-Bcc",
+       "Resent-Reply-To",
+       "Resent-Sender",
+       NULL,
+};
+
+/*
+** Macros we use for parsing headers
+**
+** Todo: convert the macros to functions
+*/
+
+#define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
+
+#define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
+                        (c >= 'a' && c <= 'z') || \
+                        c == '!' || c == '*' || c == '+' || c == '-' || \
+                        c == '/' || c == '=' || c == '_')
+#define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
+
+#define base64len(n) ((((n) + 2) / 3) * 4)    /* String len to base64 len */
+#define strbase64(n) ((n) / 4 * 3)            /* Chars that fit in base64 */
+
+#define ENCODELINELIMIT        76
+
+static void unfold_header(char **, int);
+static int field_encode_address(const char *, char **, const char *);
+static int field_encode_quoted(const char *, char **, const char *, int,
+               int, int);
+static int scanstring(const char *, int *, int *, int *);
+static int utf8len(const char *);
+/*static int pref_encoding(int, int, int);*/
+
+/*
+** Encode a message header using RFC 2047 encoding.  We make the assumption
+** that all characters < 128 are ASCII and as a consequence don't need any
+** encoding.
+*/
+int
+encode_rfc2047(const char *name, char **value, const char *charset)
+{
+       int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
+       char *p;
+
+       /* First, check to see if we even need to encode the header */
+
+       for (p = *value; *p != '\0'; p++) {
+               if (isascii((unsigned char) *p)) {
+                       asciicount++;
+                       if (qpspecial((unsigned char) *p)) {
+                               qpspecialcount++;
+                       }
+               } else {
+                       eightbitcount++;
+               }
+       }
+
+       if (eightbitcount == 0) {
+               return 0;
+       }
+
+       /*
+       ** Some rules from RFC 2047:
+       **
+       ** - Encoded words cannot be more than 75 characters long
+       ** - Multiple "long" encoded words must be on new lines.
+       **
+       ** Also, we're not permitted to encode email addresses, so
+       ** we need to actually _parse_ email addresses and only encode
+       ** the right bits.
+       */
+
+       /*
+       ** If charset was NULL, then get the value from the locale.  But
+       ** we reject it if it returns US-ASCII
+       */
+
+       if (charset == NULL) {
+               charset = write_charset_8bit();
+       }
+       if (strcasecmp(charset, "US-ASCII") == 0) {
+               advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+               return 1;
+       }
+
+       /*
+       ** If we have an address header, then we need to parse the addresses
+       ** and only encode the names or comments.  Otherwise, handle it
+       ** normally.
+       */
+
+       for (i = 0; address_headers[i]; i++) {
+               if (strcasecmp(name, address_headers[i]) == 0) {
+                       return field_encode_address(name, value, charset);
+               }
+       }
+
+       /*
+       ** On the encoding we choose, and the specifics of encoding:
+       **
+       ** - If a specified encoding is passed in, we use that.
+       ** - Otherwise, pick which encoding is shorter.
+       **
+       ** We don't quite handle continuation right here, but it should be
+       ** pretty close.
+       */
+
+       unfold_header(value, asciicount + eightbitcount);
+
+       return field_encode_quoted(name, value, charset, asciicount,
+                       eightbitcount + qpspecialcount, 0);
+}
+
+/*
+** Encode our specified header (or field) using quoted-printable
+*/
+
+static int
+field_encode_quoted(const char *name, char **value, const char *charset,
+               int ascii, int encoded, int phraserules)
+{
+       int prefixlen = name ? strlen(name) + 2: 0;
+       int outlen = 0, column, newline = 1, utf8;
+       int charsetlen = strlen(charset);
+       char *output = NULL, *p, *q = NULL;
+
+       /*
+       ** Right now we just encode the whole thing.  Maybe later on we'll
+       ** only encode things on a per-atom basis.
+       */
+
+       p = *value;
+
+       column = prefixlen + 2;    /* Header name plus ": " */
+
+       utf8 = strcasecmp(charset, "UTF-8") == 0;
+
+       while (*p != '\0') {
+               /* Start a new line, if it's time */
+               if (newline) {
+                       int tokenlen;
+
+                       /*
+                       ** If it's the start of the header, we don't need
+                       ** to pad it
+                       **
+                       ** The length of the output string is ...
+                       ** =?charset?Q?...?=  so that's
+                       ** 7+strlen(charset) + 2 for \n NUL
+                       **
+                       ** plus 1 for every ASCII character and 3 for
+                       ** every eight bit or special character (eight
+                       ** bit characters are written as =XX).
+                       */
+                       outlen += 9 + charsetlen + ascii + 3 * encoded;
+
+                       if (output) {
+                               /* continue the header */
+                               int curlen = q - output, i;
+                               outlen += prefixlen + 1; /* Header plus \n ": " */
+                               output = mh_xrealloc(output, outlen);
+                               q = output + curlen;
+                               *q++ = '?';
+                               *q++ = '=';
+                               *q++ = '\n';
+                               for (i = 0; i < prefixlen; i++) {
+                                       *q++ = ' ';
+                               }
+                       } else {
+                               /* do the initial allocation */
+                               /*
+                               ** A bit of a hack here; the header can
+                               ** contain multiple spaces (probably at
+                               ** least one) until we get to the actual
+                               ** text. Copy until we get to a non-space.
+                               */
+                               output = mh_xmalloc(outlen);
+                               q = output;
+                               while (is_fws(*p)) {
+                                       *q++ = *p++;
+                               }
+                       }
+
+                       tokenlen = snprintf(q, outlen - (q - output),
+                                       "=?%s?Q?", charset);
+                       q += tokenlen;
+                       column = prefixlen + tokenlen;
+                       newline = 0;
+               }
+
+               /*
+               ** Process each character, encoding if necessary
+               **
+               ** Note that we have a different set of rules if we're
+               ** processing RFC 5322 'phrase' (something you'd see in
+               ** an address header).
+               */
+
+               column++;
+
+               if (*p == ' ') {
+                       *q++ = '_';
+                       ascii--;
+               } else if (isascii((unsigned char) *p) && (phraserules ?
+                               qphrasevalid((unsigned char) *p)
+                               : !qpspecial((unsigned char) *p))) {
+                       *q++ = *p;
+                       ascii--;
+               } else {
+                       snprintf(q, outlen - (q - output), "=%02X",
+                                       (unsigned char) *p);
+                       q += 3;
+                       column += 2;   /* column already incremented by 1 above */
+                       encoded--;
+               }
+
+               p++;
+
+               if (prefixlen == 0) {
+                       /*
+                       ** We haven't been passed in a header name,
+                       ** so don't ever wrap the field (we're likely
+                       ** doing an address).
+                       */
+                       continue;
+               }
+               /*
+               ** We're not allowed more than ENCODELINELIMIT characters
+               ** per line, so reserve some room for the final ?=.
+               */
+               if (column >= ENCODELINELIMIT - 2) {
+                       newline = 1;
+               } else if (utf8) {
+                       /*
+                       ** Okay, this is a bit weird, but to explain a
+                       ** bit more ...
+                       **
+                       ** RFC 2047 prohibits the splitting of multibyte
+                       ** characters across encoded words.  Right now
+                       ** we only handle the case of UTF-8, the most
+                       ** common multibyte encoding.
+                       **
+                       ** p is now pointing at the next input character.
+                       ** If we're using UTF-8 _and_ we'd go over
+                       ** ENCODELINELIMIT given the length of the
+                       ** complete character, then trigger a newline now.
+                       ** Note that we check the length * 3 since we
+                       ** have to allow for the encoded output.
+                       */
+                       if (column + (utf8len(p)*3) > ENCODELINELIMIT - 2) {
+                               newline = 1;
+                       }
+               }
+       }
+
+       if (q == NULL) {
+               /*
+               ** This should never happen, but just in case.
+               ** Found by clang static analyzer.
+               */
+               admonish (NULL, "null output encoding for %s", *value);
+               return 1;
+       }
+       *q++ = '?';
+       *q++ = '=';
+
+       if (prefixlen) {
+               *q++ = '\n';
+       }
+       *q = '\0';
+
+       free(*value);
+       *value = output;
+
+       return 0;
+}
+
+/*
+** Calculate the length of a UTF-8 character.
+**
+** If it's not a UTF-8 character (or we're in the middle of a multibyte
+** character) then simply return 0.
+*/
+static int
+utf8len(const char *p)
+{
+       int len = 1;
+
+       if (*p == '\0') {
+               return 0;
+       }
+       if (isascii((unsigned char) *p) ||
+                       (((unsigned char) *p) & 0xc0) == 0x80) {
+               return 0;
+       }
+       p++;
+       while ((((unsigned char) *p++) & 0xc0) == 0x80) {
+               len++;
+       }
+
+       return len;
+}
+
+/*
+** "Unfold" a header, making it a single line (without continuation)
+**
+** We cheat a bit here; we never make the string longer, so using the
+** original length here is fine.
+*/
+static void
+unfold_header(char **value, int len)
+{
+       char *str = mh_xmalloc(len + 1);
+       char *p = str, *q = *value;
+
+       while (*q != '\0') {
+               if (*q == '\n') {
+                       /*
+                       ** When we get a newline, skip to the next
+                       ** non-whitespace character and add a space to
+                       ** replace all of the whitespace
+                       **
+                       ** This has the side effect of stripping off the
+                       ** final newline for the header; we put it back
+                       ** in the encoding routine.
+                       */
+                       while (is_fws(*q)) {
+                               q++;
+                       }
+                       if (*q == '\0') {
+                               break;
+                       }
+                       *p++ = ' ';
+               } else {
+                       *p++ = *q++;
+               }
+       }
+       *p = '\0';
+
+       free(*value);
+       *value = str;
+}
+
+/*
+** Decode a header containing addresses. This means we have to parse
+** each address and only encode the display-name or comment field.
+*/
+static int
+field_encode_address(const char *name, char **value, const char *charset)
+{
+       int prefixlen = strlen(name) + 2;
+       int column = prefixlen, groupflag;
+       int asciichars, specialchars, eightbitchars;
+       int reformat = 0, errflag = 0;
+       size_t len;
+       char *mp, *cp = NULL, *output = NULL;
+       char *tmpbuf = NULL;
+       size_t tmpbufsize = 0;
+       struct mailname *mn;
+       char errbuf[BUFSIZ];
+
+       /*
+       ** Because these are addresses, we need to handle them individually.
+       **
+       ** Break them down and process them one by one.  This means we
+       ** have to rewrite the whole header, but that's unavoidable.
+       */
+
+       /*
+       ** The output headers always have to start with a space first;
+       ** this is just the way the API works right now.
+       */
+
+       output = add(" ", output);
+
+       for (groupflag = 0; (mp = getname(*value)); ) {
+               if ((mn = getm(mp, NULL, 0, AD_HOST, errbuf)) == NULL) {
+                       advise(NULL, "%s: %s", errbuf, mp);
+                       errflag++;
+                       continue;
+               }
+
+               reformat = 0;
+
+               /*
+               ** We only care if the phrase (m_pers) or any trailing
+               ** comment (m_note) have 8-bit characters.  If doing q-p,
+               ** we also need to encode anything marked as qspecial().
+               ** Unquote it first so the specialchars count is right.
+               */
+
+               if (! mn->m_pers) {
+                       goto check_note;
+               }
+
+               if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
+                       tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+               }
+
+               unquote_string(mn->m_pers, tmpbuf);
+
+               if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+                               &specialchars)) {
+                       /*
+                       ** If we have 8-bit characters, encode it.
+                       */
+
+                       /*
+                       ** This is okay, because the output of
+                       ** unquote_string will be either equal or shorter
+                       ** than the original.
+                       */
+                       strcpy(mn->m_pers, tmpbuf);
+
+                       if (field_encode_quoted(NULL, &mn->m_pers, charset,
+                                       asciichars,
+                                       eightbitchars + specialchars, 1)) {
+                               errflag++;
+                               goto out;
+                       }
+
+                       reformat++;
+               }
+
+               check_note:
+
+               /*
+               ** The "note" field is generally a comment at the end
+               ** of the address, at least as how it's implemented here.
+               ** Notes are always surrounded by parenthesis (since they're
+               ** comments).  Strip them out and then put them back when
+               ** we format the final field, but they do not get encoded.
+               */
+
+               if (! mn->m_note) {
+                       goto do_reformat;
+               }
+
+               if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
+                       tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+               }
+
+               if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
+                       advise(NULL, "Internal error: Invalid note field \"%s\"",
+                                       mn->m_note);
+                       errflag++;
+                       goto out;
+               }
+
+               strncpy(tmpbuf, mn->m_note + 1, len - 1);
+               tmpbuf[len - 2] = '\0';
+
+               if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+                               &specialchars)) {
+                       /*
+                       ** If we have 8-bit characters, encode it.
+                       */
+
+                       if (field_encode_quoted(NULL, &tmpbuf, charset,
+                                       asciichars,
+                                       eightbitchars + specialchars, 1)) {
+                               errflag++;
+                               goto out;
+                       }
+
+                       reformat++;
+
+                       /*
+                       ** Make sure the size of tmpbuf is correct (it
+                       ** always gets reallocated in the above functions).
+                       */
+
+                       tmpbufsize = strlen(tmpbuf) + 1;
+
+                       /*
+                       ** Put the note field back surrounded by
+                       ** parenthesis.
+                       */
+
+                       mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
+
+                       snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
+               }
+
+do_reformat:
+
+               /*
+               ** So, some explanation is in order.
+               **
+               ** We know we need to rewrite at least one address in the
+               ** header, otherwise we wouldn't be here.  If we had to
+               ** reformat this particular address, then run it through
+               ** adrformat().  Otherwise we can use m_text directly.
+               */
+
+               /*
+               ** If we were in a group but are no longer, make sure we
+               ** add a semicolon (which needs to be FIRST, as it needs
+               ** to be at the end of the last address).
+               */
+
+               if (groupflag && ! mn->m_ingrp) {
+                       output = add(";", output);
+                       column += 1;
+               }
+
+               groupflag = mn->m_ingrp;
+
+               if (mn->m_gname) {
+                       cp = add(mn->m_gname, NULL);
+               }
+
+               if (reformat) {
+                       cp = add(adrformat(mn), cp);
+               } else {
+                       cp = add(mn->m_text, cp);
+               }
+
+               len = strlen(cp);
+
+               /*
+               ** If we're not at the beginning of the line, add a
+               ** command and either a space or a newline.
+               */
+
+               if (column != prefixlen) {
+                       if (len + column + 2 > OUTPUTLINELEN) {
+
+                               if ((size_t) (prefixlen + 3) < tmpbufsize) {
+                                       tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
+                               }
+
+                               snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
+                               output = add(tmpbuf, output);
+                       } else {
+                               output = add(", ", output);
+                               column += 2;
+                       }
+               }
+
+               /*
+               ** Finally add the address
+               */
+
+               output = add(cp, output);
+               column += len;
+               free(cp);
+               cp = NULL;
+       }
+
+       /*
+       ** Just in case we're at the end of a list
+       */
+
+       if (groupflag) {
+               output = add(";", output);
+       }
+
+       output = add("\n", output);
+
+       free(*value);
+       *value = output;
+       output = NULL;
+
+out:
+
+       if (tmpbuf) {
+               free(tmpbuf);
+       }
+       if (output) {
+               free(output);
+       }
+
+       return errflag > 0;
+}
+
+/*
+** Scan a string, check for characters that need to be encoded
+*/
+
+static int
+scanstring(const char *string, int *asciilen, int *eightbitchars,
+               int *specialchars)
+{
+       *asciilen = 0;
+       *eightbitchars = 0;
+       *specialchars = 0;
+
+       for (; *string != '\0'; string++) {
+               if ((isascii((unsigned char) *string))) {
+                       (*asciilen)++;
+                       /*
+                       ** So, a space is not a valid phrase character, but
+                       ** we're counting an exception here, because in q-p
+                       ** a space can be directly encoded as an underscore.
+                       */
+                       if (!qphrasevalid((unsigned char) *string) &&
+                                       *string != ' ') {
+                               (*specialchars)++;
+                       }
+               } else {
+                       (*eightbitchars)++;
+               }
+       }
+
+       return *eightbitchars > 0;
+}
+
+#if 0
+
+/*
+** This function is to be used to decide which encoding algorithm we should
+** use if one is not given.  Basically, we pick whichever one is the shorter
+** of the two.
+**
+** Arguments are:
+**
+** ascii       - Number of ASCII characters in to-be-encoded string.
+** specials    - Number of ASCII characters in to-be-encoded string that
+**                still require encoding under quoted-printable.  Note that
+**                these are included in the "ascii" total.
+** eightbit    - Eight-bit characters in the to-be-encoded string.
+**
+** Returns one of CE_BASE64 or CE_QUOTED.
+**/
+static int
+pref_encoding(int ascii, int specials, int eightbits)
+{
+       /*
+       ** The length of the q-p encoding is:
+       **
+       ** ascii - specials + (specials + eightbits) * 3.
+       **
+       ** The length of the base64 encoding is:
+       **
+       ** base64len(ascii + eightbits) (See macro for details)
+       */
+       return base64len(ascii + eightbits) < (ascii - specials +
+                       (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
+}
+
+#endif
index a8e773a..6cfc176 100644 (file)
@@ -688,32 +688,10 @@ fmt_scan(struct format *format, char *scanl, int width, int *dat)
                /* UNQUOTEs RFC-2822 quoted-string and quoted-pair */
                case FT_LS_UNQUOTE:
                        if (str) {
-                               int m;
                                strncpy(buffer, str, sizeof(buffer));
                                /* strncpy doesn't NUL-terminate if it fills the buffer */
                                buffer[sizeof(buffer)-1] = '\0';
-                               str = buffer;
-
-                               /* we will parse from buffer to buffer2 */
-                               n = 0; /* n is the input position in str */
-                               m = 0; /* m is the ouput position in buffer2 */
-
-                               while ( str[n] != '\0') {
-                                       switch ( str[n] ) {
-                                       case '\\':
-                                               n++;
-                                               if ( str[n] != '\0')
-                                                       buffer2[m++] = str[n++];
-                                               break;
-                                       case '"':
-                                               n++;
-                                               break;
-                                       default:
-                                               buffer2[m++] = str[n++];
-                                               break;
-                                       }
-                               }
-                               buffer2[m] = '\0';
+                               unquote_string(buffer, buffer2);
                                str = buffer2;
                        }
                        break;
diff --git a/sbr/unquote.c b/sbr/unquote.c
new file mode 100644 (file)
index 0000000..aa40a0c
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+** unquote.c: Handle quote removal and quoted-pair strings on
+** RFC 2822-5322 atoms.
+**
+** This code is Copyright (c) 2013, by the authors of nmh.  See the
+** COPYRIGHT file in the root directory of the nmh distribution for
+** complete copyright information.
+*/
+
+#include <h/mh.h>
+
+/*
+** Remove quotes and quoted-pair sequences from RFC-5322 atoms.
+**
+** Currently the actual algorithm is simpler than it technically should
+** be: any quotes are simply eaten, unless they're preceded by the escape
+** character (\).  This seems to be sufficient for our needs for now.
+**
+** Arguments:
+**
+** input      - The input string
+** output     - The output string; is assumed to have at least as much
+**              room as the input string.  At worst the output string will
+**              be the same size as the input string; it might be smaller.
+*/
+void
+unquote_string(const char *input, char *output)
+{
+       int inpos = 0;
+       int outpos = 0;
+
+       while (input[inpos] != '\0') {
+               switch (input[inpos]) {
+               case '\\':
+                       inpos++;
+                       if (input[inpos] != '\0')
+                               output[outpos++] = input[inpos++];
+                       break;
+               case '"':
+                       inpos++;
+                       break;
+               default:
+                       output[outpos++] = input[inpos++];
+                       break;
+               }
+       }
+       output[outpos] = '\0';
+}
diff --git a/test/tests/mhbuild/test-header-encode b/test/tests/mhbuild/test-header-encode
new file mode 100644 (file)
index 0000000..422f970
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/sh
+######################################################
+#
+# Test encoding headers according to RFC 2047
+#
+######################################################
+
+# TODO: Move to a common file tests can source; need more framework...
+failed=0
+export MM_CHARSET=UTF-8
+check() {
+    diff -u $expected $actual
+    if [ $? -ne 0 ]; then
+        failed=$((failed + 1))
+    fi
+}
+
+# Make a draft file forwarding two messages.
+mkdraft() {
+    cat > $draft <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is รค test
+
+This is a test
+EOF
+}
+
+# Munge the Content-Id from the draft after mhbuild, so we have a reliable
+# value to check.
+mungedraft() {
+    sed 's/\(Content-ID:\) <[^>][^>]*>/\1 <TESTID>/' $draft > $actual
+}
+
+draft=$MH_TEST_DIR/$$.draft
+expected=$MH_TEST_DIR/$$.expected
+actual=$MH_TEST_DIR/$$.actual
+
+# check mhbuild
+cat > $expected <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?Q?This_is_=C3=A4_test?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+Content-ID: <TESTID>
+
+This is a test
+EOF
+mkdraft
+mhbuild $draft
+mungedraft
+check
+
+exit $failed
diff --git a/test/tests/pick/test-rfc2047 b/test/tests/pick/test-rfc2047
new file mode 100644 (file)
index 0000000..b89a71a
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+######################################################
+#
+# Test pick parse rfc2047-header
+#
+######################################################
+
+set -e
+
+expected_err=$MH_TEST_DIR/$$.expected_err
+expected_out=$MH_TEST_DIR/$$.expected_out
+actual_err=$MH_TEST_DIR/$$.actual_err
+actual_out=$MH_TEST_DIR/$$.actual_out
+
+# Test MIME-encoded header.
+cat >"$MH_TEST_DIR/Mail/inbox/13" <<EOF
+From: Test13 <test13@example.com>
+To: Some User <user@example.com>
+Date: Fri, 29 Sep 2006 00:00:00
+Message-Id: 13@test.nmh
+Subject: =?us-ascii?q?=66=6f=6f?=
+ =?utf-8?q?=62=61=72?=
+
+This is message number 13, with MIME-encoded Subject "foobar".
+EOF
+
+echo 13 >"$expected_out"
+cat /dev/null > $expected_err
+
+pick -subject foobar 13 > $actual_out 2> $actual_err
+diff -u $expected_err $actual_err
+diff -u $expected_out $actual_out
index 140cfdb..1fdedb6 100644 (file)
@@ -319,6 +319,7 @@ build_mime(char *infile)
        struct part **pp;
        CT ct;
        FILE *in;
+       HF hp;
 
        umask(~m_gmprot());
 
@@ -411,6 +412,16 @@ finish_field:
        }
 
        /*
+       ** Iterate through the list of headers and call the function to
+       ** MIME-ify them if required.
+       */
+       for (hp = ct->c_first_hf; hp != NULL; hp = hp->next) {
+               if (encode_rfc2047(hp->name, &hp->value, NULL)) {
+                       adios(EX_DATAERR, NULL, "Unable to encode header \"%s\"", hp->name);
+               }
+       }
+
+       /*
        ** Now add the MIME-Version header field
        ** to the list of header fields.
        */
@@ -1529,6 +1540,9 @@ build_headers(CT ct)
        if (ct->c_descr) {
                np = getcpy(DESCR_FIELD);
                vp = concat(" ", ct->c_descr, NULL);
+               if (encode_rfc2047(DESCR_FIELD, &vp, NULL)) {
+                       adios(EX_DATAERR, NULL, "Unable to encode %s header", DESCR_FIELD);
+               }
                add_header(ct, np, vp);
        }
 
index 819d81a..8ae58a4 100644 (file)
@@ -381,6 +381,7 @@ static struct swit parswit[] = {
 
 
 static char linebuf[LBSIZE + 1];
+static char decoded_linebuf[LBSIZE + 1];
 
 /* the magic array for case-independence */
 static char cc[] = {
@@ -1016,8 +1017,17 @@ plist
                p1 = linebuf;
                p2 = n->n_expbuf;
 
+               /*
+               ** Attempt to decode as a MIME header.  If it's the
+               ** last header, body will be 1 and lf will be at least 1.
+               */
+               if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
+                               decoded_linebuf, sizeof decoded_linebuf)) {
+                       p1 = decoded_linebuf;
+               }
+
                if (n->n_circf) {
-                       if (advance(p1, p2))
+                       if (advance(p1, p2)) 
                                return 1;
                        continue;
                }
index 40d8f72..5e4cd67 100755 (executable)
@@ -3,7 +3,7 @@
 # Send multiples files non-interactively
 
 # adjust if needed
-attachment_header='Attach'
+attachment_header=`mhparam Attachment-Header`
 
 
 if [ $# -lt 3 ]; then
@@ -23,13 +23,12 @@ Files:     $*
 !
 
 draft=`mktemp /tmp/sendfiles.XXXXXX`
-trap 'rm -f "$draft"' 1 2 3 15
-cat >"$draft" <<!
-To: $rcpt
-Subject: $subject
-!
+trap 'rm -f "$draft"' 1 2 15
+
+anno "$draft" -component To -text "$rcpt" -nodate
+anno "$draft" -component Subject -text "$subject" -nodate
 for i in "$@" ; do
-       echo "$attachment_header: $i" >>"$draft"
+       anno "$draft" -component  "$attachment_header" -text "$i" -nodate
 done
 
 send "$draft"