use iconv to convert RFC-2047 encoded headers to the character set used
authorOliver Kiddle <okiddle@yahoo.co.uk>
Wed, 23 Feb 2005 16:20:47 +0000 (16:20 +0000)
committerOliver Kiddle <okiddle@yahoo.co.uk>
Wed, 23 Feb 2005 16:20:47 +0000 (16:20 +0000)
by the current locale

ChangeLog
configure.in
h/prototypes.h
sbr/fmt_rfc2047.c
sbr/fmt_scan.c

index 9a6c974..d835a07 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2005-02-23  Oliver Kiddle  <okiddle@yahoo.co.uk>
 
+       * use iconv to convert RFC-2047 encoded headers to the
+       character set used by the current locale
+
        * sbr/folder_read.c fix Debian bug #202667: crash when a
        message's filename overflows an int when converted
 
index 85e5181..1b1df6b 100644 (file)
@@ -445,7 +445,7 @@ AC_HEADER_STAT
 AC_CHECK_HEADERS(string.h memory.h stdlib.h unistd.h errno.h fcntl.h \
                  limits.h crypt.h termcap.h termio.h termios.h locale.h \
                  langinfo.h netdb.h sys/param.h sys/time.h sys/utsname.h \
-                 arpa/inet.h arpa/ftp.h)
+                 iconv.h arpa/inet.h arpa/ftp.h)
 
 
 AC_CACHE_CHECK(POSIX termios, nmh_cv_sys_posix_termios,
@@ -547,6 +547,46 @@ for lib in $termcap_curses_order; do
 done
 AC_SUBST(TERMLIB)dnl
 
+dnl ---------------
+dnl CHECK FOR ICONV
+dnl ---------------
+
+dnl Find iconv. It may be in libiconv and may be iconv() or libiconv()
+if test "x$ac_cv_header_iconv_h" = "xyes"; then
+  AC_CHECK_FUNC(iconv, ac_found_iconv=yes, ac_found_iconv=no)
+  if test "x$ac_found_iconv" = "xno"; then
+    AC_CHECK_LIB(iconv, iconv, ac_found_iconv=yes)
+    if test "x$ac_found_iconv" = "xno"; then
+      AC_CHECK_LIB(iconv, libiconv, ac_found_iconv=yes)
+    fi
+    if test "x$ac_found_iconv" != "xno"; then
+      LIBS="-liconv $LIBS"
+    fi
+  fi
+fi
+if test "x$ac_found_iconv" = xyes; then
+  AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.])
+fi
+
+dnl Check if iconv uses const in prototype declaration
+if test "x$ac_found_iconv" = "xyes"; then
+  AC_CACHE_CHECK(for iconv declaration, ac_cv_iconv_const,
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <stdlib.h>
+        #include <iconv.h>]],
+        [[#ifdef __cplusplus
+          "C"
+          #endif
+          #if defined(__STDC__) || defined(__cplusplus)
+          size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);
+          #else
+          size_t iconv();
+          #endif]])],
+      [ac_cv_iconv_const=],
+      [ac_cv_iconv_const=const])])
+  AC_DEFINE_UNQUOTED([ICONV_CONST], $ac_cv_iconv_const,
+    [Define as const if the declaration of iconv() needs const.])
+fi
+
 dnl --------------
 dnl CHECK FOR NDBM
 dnl --------------
index c2a876f..edddc47 100644 (file)
@@ -61,6 +61,7 @@ int gans (char *, struct swit *);
 char **getans (char *, struct swit *);
 int getanswer (char *);
 char **getarguments (char *, int, char **, int);
+char *get_charset();
 char *getcpy (char *);
 char *getfolder(int);
 int lkclose(int, char*);
index b4bf4eb..a87fc0e 100644 (file)
  */
 
 #include <h/mh.h>
+#ifdef HAVE_ICONV
+#  include <iconv.h>
+#  include <errno.h>
+#endif
 
 static signed char hexindex[] = {
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
@@ -61,6 +65,12 @@ decode_rfc2047 (char *str, char *dst)
     int between_encodings = 0; /* are we between two encodings?          */
     int equals_pending = 0;    /* is there a '=' pending?                */
     int whitespace = 0;                /* how much whitespace between encodings? */
+#ifdef HAVE_ICONV
+    int use_iconv = 0;          /* are we converting encoding with iconv? */
+    iconv_t cd;
+    int fromutf8;
+    char *saveq, *convbuf;
+#endif
 
     if (!str)
        return 0;
@@ -73,6 +83,14 @@ decode_rfc2047 (char *str, char *dst)
        return 0;
 
     for (p = str, q = dst; *p; p++) {
+
+        /* reset iconv */
+#ifdef HAVE_ICONV
+        if (use_iconv) {
+           iconv_close(cd);
+           use_iconv = 0;
+        }
+#endif
        /*
         * If we had an '=' character pending from
         * last iteration, then add it first.
@@ -106,9 +124,20 @@ decode_rfc2047 (char *str, char *dst)
            if (!*pp)
                continue;
 
-           /* Check if character set is OK */
-           if (!check_charset(startofmime, pp - startofmime))
+           /* Check if character set can be handled natively */
+           if (!check_charset(startofmime, pp - startofmime)) {
+#ifdef HAVE_ICONV
+               /* .. it can't. We'll use iconv then. */
+               *pp = '\0';
+               cd = iconv_open(get_charset(), startofmime);
+               fromutf8 = !strcasecmp(startofmime, "UTF-8");
+               *pp = '?';
+                if (cd == (iconv_t)-1) continue;
+               use_iconv = 1;
+#else
                continue;
+#endif
+           }
 
            startofmime = pp + 1;
 
@@ -159,6 +188,14 @@ decode_rfc2047 (char *str, char *dst)
            if (between_encodings)
                q -= whitespace;
 
+#ifdef HAVE_ICONV
+           if (use_iconv) {
+               saveq = q;
+               if (!(q = convbuf = (char *)malloc(endofmime - startofmime)))
+                   continue;
+            }
+#endif
+
            /* Now decode the text */
            if (quoted_printable) {
                for (pp = startofmime; pp < endofmime; pp++) {
@@ -218,6 +255,35 @@ decode_rfc2047 (char *str, char *dst)
                }
            }
 
+#ifdef HAVE_ICONV
+            /* Convert to native character set */
+           if (use_iconv) {
+               size_t inbytes = q - convbuf;
+               size_t outbytes = BUFSIZ;
+               ICONV_CONST char *start = convbuf;
+               
+               while (inbytes) {
+                   if (iconv(cd, &start, &inbytes, &saveq, &outbytes) ==
+                           (size_t)-1) {
+                       if (errno != EILSEQ) break;
+                       /* character couldn't be converted. we output a `?'
+                        * and try to carry on which won't work if
+                        * either encoding was stateful */
+                       iconv (cd, 0, 0, &saveq, &outbytes);
+                       *saveq++ = '?';
+                        /* skip to next input character */
+                       if (fromutf8) {
+                           for (start++;(*start & 192) == 128;start++)
+                               inbytes--;
+                       } else
+                           start++, inbytes--;
+                   }
+               }
+               q = saveq;
+               free(convbuf);
+           }
+#endif
+           
            /*
             * Now that we are done decoding this particular
             * encoded word, advance string to trailing '='.
@@ -229,6 +295,9 @@ decode_rfc2047 (char *str, char *dst)
            whitespace = 0;             /* re-initialize amount of whitespace */
        }
     }
+#ifdef HAVE_ICONV
+    if (use_iconv) iconv_close(cd);
+#endif
 
     /* If an equals was pending at end of string, add it now. */
     if (equals_pending)
index c9b882d..357484b 100644 (file)
@@ -130,7 +130,7 @@ match (char *str, char *sub)
                                sp++;\
                        }\
                        while ((c = (unsigned char) *sp++) && --i >= 0 && cp < ep)\
-                               if (isgraph(c)) \
+                               if (!iscntrl(c) && !isspace(c)) \
                                    *cp++ = c;\
                                else {\
                                        while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\
@@ -148,7 +148,7 @@ match (char *str, char *sub)
                    while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\
                        sp++;\
                    while((c = (unsigned char) *sp++) && cp < ep)\
-                       if (isgraph(c)) \
+                       if (!iscntrl(c) && !isspace(c)) \
                            *cp++ = c;\
                        else {\
                            while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\