From: Oliver Kiddle Date: Wed, 23 Feb 2005 16:20:47 +0000 (+0000) Subject: use iconv to convert RFC-2047 encoded headers to the character set used X-Git-Tag: RELEASE_1_2~44 X-Git-Url: http://git.marmaro.de/?p=mmh;a=commitdiff_plain;h=4c24408bdff496a631709326b0d07a4e12fa9277 use iconv to convert RFC-2047 encoded headers to the character set used by the current locale --- diff --git a/ChangeLog b/ChangeLog index 9a6c974..d835a07 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2005-02-23 Oliver Kiddle + * use iconv to convert RFC-2047 encoded headers to the + character set used by the current locale + * sbr/folder_read.c fix Debian bug #202667: crash when a message's filename overflows an int when converted diff --git a/configure.in b/configure.in index 85e5181..1b1df6b 100644 --- a/configure.in +++ b/configure.in @@ -445,7 +445,7 @@ AC_HEADER_STAT AC_CHECK_HEADERS(string.h memory.h stdlib.h unistd.h errno.h fcntl.h \ limits.h crypt.h termcap.h termio.h termios.h locale.h \ langinfo.h netdb.h sys/param.h sys/time.h sys/utsname.h \ - arpa/inet.h arpa/ftp.h) + iconv.h arpa/inet.h arpa/ftp.h) AC_CACHE_CHECK(POSIX termios, nmh_cv_sys_posix_termios, @@ -547,6 +547,46 @@ for lib in $termcap_curses_order; do done AC_SUBST(TERMLIB)dnl +dnl --------------- +dnl CHECK FOR ICONV +dnl --------------- + +dnl Find iconv. It may be in libiconv and may be iconv() or libiconv() +if test "x$ac_cv_header_iconv_h" = "xyes"; then + AC_CHECK_FUNC(iconv, ac_found_iconv=yes, ac_found_iconv=no) + if test "x$ac_found_iconv" = "xno"; then + AC_CHECK_LIB(iconv, iconv, ac_found_iconv=yes) + if test "x$ac_found_iconv" = "xno"; then + AC_CHECK_LIB(iconv, libiconv, ac_found_iconv=yes) + fi + if test "x$ac_found_iconv" != "xno"; then + LIBS="-liconv $LIBS" + fi + fi +fi +if test "x$ac_found_iconv" = xyes; then + AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.]) +fi + +dnl Check if iconv uses const in prototype declaration +if test "x$ac_found_iconv" = "xyes"; then + AC_CACHE_CHECK(for iconv declaration, ac_cv_iconv_const, + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include + #include ]], + [[#ifdef __cplusplus + "C" + #endif + #if defined(__STDC__) || defined(__cplusplus) + size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft); + #else + size_t iconv(); + #endif]])], + [ac_cv_iconv_const=], + [ac_cv_iconv_const=const])]) + AC_DEFINE_UNQUOTED([ICONV_CONST], $ac_cv_iconv_const, + [Define as const if the declaration of iconv() needs const.]) +fi + dnl -------------- dnl CHECK FOR NDBM dnl -------------- diff --git a/h/prototypes.h b/h/prototypes.h index c2a876f..edddc47 100644 --- a/h/prototypes.h +++ b/h/prototypes.h @@ -61,6 +61,7 @@ int gans (char *, struct swit *); char **getans (char *, struct swit *); int getanswer (char *); char **getarguments (char *, int, char **, int); +char *get_charset(); char *getcpy (char *); char *getfolder(int); int lkclose(int, char*); diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index b4bf4eb..a87fc0e 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -10,6 +10,10 @@ */ #include +#ifdef HAVE_ICONV +# include +# include +#endif static signed char hexindex[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, @@ -61,6 +65,12 @@ decode_rfc2047 (char *str, char *dst) int between_encodings = 0; /* are we between two encodings? */ int equals_pending = 0; /* is there a '=' pending? */ int whitespace = 0; /* how much whitespace between encodings? */ +#ifdef HAVE_ICONV + int use_iconv = 0; /* are we converting encoding with iconv? */ + iconv_t cd; + int fromutf8; + char *saveq, *convbuf; +#endif if (!str) return 0; @@ -73,6 +83,14 @@ decode_rfc2047 (char *str, char *dst) return 0; for (p = str, q = dst; *p; p++) { + + /* reset iconv */ +#ifdef HAVE_ICONV + if (use_iconv) { + iconv_close(cd); + use_iconv = 0; + } +#endif /* * If we had an '=' character pending from * last iteration, then add it first. @@ -106,9 +124,20 @@ decode_rfc2047 (char *str, char *dst) if (!*pp) continue; - /* Check if character set is OK */ - if (!check_charset(startofmime, pp - startofmime)) + /* Check if character set can be handled natively */ + if (!check_charset(startofmime, pp - startofmime)) { +#ifdef HAVE_ICONV + /* .. it can't. We'll use iconv then. */ + *pp = '\0'; + cd = iconv_open(get_charset(), startofmime); + fromutf8 = !strcasecmp(startofmime, "UTF-8"); + *pp = '?'; + if (cd == (iconv_t)-1) continue; + use_iconv = 1; +#else continue; +#endif + } startofmime = pp + 1; @@ -159,6 +188,14 @@ decode_rfc2047 (char *str, char *dst) if (between_encodings) q -= whitespace; +#ifdef HAVE_ICONV + if (use_iconv) { + saveq = q; + if (!(q = convbuf = (char *)malloc(endofmime - startofmime))) + continue; + } +#endif + /* Now decode the text */ if (quoted_printable) { for (pp = startofmime; pp < endofmime; pp++) { @@ -218,6 +255,35 @@ decode_rfc2047 (char *str, char *dst) } } +#ifdef HAVE_ICONV + /* Convert to native character set */ + if (use_iconv) { + size_t inbytes = q - convbuf; + size_t outbytes = BUFSIZ; + ICONV_CONST char *start = convbuf; + + while (inbytes) { + if (iconv(cd, &start, &inbytes, &saveq, &outbytes) == + (size_t)-1) { + if (errno != EILSEQ) break; + /* character couldn't be converted. we output a `?' + * and try to carry on which won't work if + * either encoding was stateful */ + iconv (cd, 0, 0, &saveq, &outbytes); + *saveq++ = '?'; + /* skip to next input character */ + if (fromutf8) { + for (start++;(*start & 192) == 128;start++) + inbytes--; + } else + start++, inbytes--; + } + } + q = saveq; + free(convbuf); + } +#endif + /* * Now that we are done decoding this particular * encoded word, advance string to trailing '='. @@ -229,6 +295,9 @@ decode_rfc2047 (char *str, char *dst) whitespace = 0; /* re-initialize amount of whitespace */ } } +#ifdef HAVE_ICONV + if (use_iconv) iconv_close(cd); +#endif /* If an equals was pending at end of string, add it now. */ if (equals_pending) diff --git a/sbr/fmt_scan.c b/sbr/fmt_scan.c index c9b882d..357484b 100644 --- a/sbr/fmt_scan.c +++ b/sbr/fmt_scan.c @@ -130,7 +130,7 @@ match (char *str, char *sub) sp++;\ }\ while ((c = (unsigned char) *sp++) && --i >= 0 && cp < ep)\ - if (isgraph(c)) \ + if (!iscntrl(c) && !isspace(c)) \ *cp++ = c;\ else {\ while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\ @@ -148,7 +148,7 @@ match (char *str, char *sub) while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\ sp++;\ while((c = (unsigned char) *sp++) && cp < ep)\ - if (isgraph(c)) \ + if (!iscntrl(c) && !isspace(c)) \ *cp++ = c;\ else {\ while ((c = (unsigned char) *sp) && (iscntrl(c) || isspace(c)))\