Rework in charset code. (Includes renaming.)
authormarkus schnalke <meillo@marmaro.de>
Mon, 30 Apr 2012 16:52:19 +0000 (18:52 +0200)
committermarkus schnalke <meillo@marmaro.de>
Mon, 30 Apr 2012 16:52:19 +0000 (18:52 +0200)
Renamed check_charset() to is_native_charset(), and sbr/check_charset.c to
sbr/charset.c. Removed caching in favor for simpler code.
is_native_charset() does not check for prefixes anymore. The code didn't
match the comments anyway and it matched when comparing the first 10 [sic!]
chars of `ISO-8859-1whatever' with `ISO-8859-11', which is not wanted.

h/prototypes.h
sbr/Makefile.in
sbr/charset.c [new file with mode: 0644]
sbr/check_charset.c [deleted file]
sbr/fmt_rfc2047.c
uip/mhshowsbr.c

index f2c4652..d8231f5 100644 (file)
@@ -28,7 +28,7 @@ void advertise(char *, char *, char *, va_list);
 void advise(char *, char *, ...);
 void ambigsw(char *, struct swit *);
 char **brkstring(char *, char *, char *);
-int check_charset(char *, int);
+int is_native_charset(char *);
 char *concat(char *, ...);
 int context_del(char *);
 char *context_find(char *);
index 6c3d2c2..ca49c46 100644 (file)
@@ -44,7 +44,7 @@ SIGNAL_H = @SIGNAL_H@
 
 # source for library functions
 SRCS = addrsbr.c ambigsw.c brkstring.c  \
-       check_charset.c concat.c context_del.c  \
+       charset.c concat.c context_del.c  \
        context_find.c context_foil.c context_read.c  \
        context_replace.c context_save.c \
        cpydata.c cpydgst.c crawl_folders.c  \
diff --git a/sbr/charset.c b/sbr/charset.c
new file mode 100644 (file)
index 0000000..56f085c
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+** charset.c -- routines for character sets
+**
+** This code is Copyright (c) 2002, by the authors of nmh.  See the
+** COPYRIGHT file in the root directory of the nmh distribution for
+** complete copyright information.
+*/
+
+#include <h/mh.h>
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
+
+/*
+** Get the current character set
+*/
+char *
+get_charset()
+{
+       char *charset = getenv("MM_CHARSET");
+#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
+       if (!charset) {
+               charset = norm_charmap(nl_langinfo(CODESET));
+       }
+#endif
+       return charset;
+}
+
+
+/*
+** Check if we can display a given character set natively.
+*/
+int
+is_native_charset(char *str)
+{
+       char *mm_charset = NULL;
+
+       if (!(mm_charset = get_charset())) {
+               mm_charset = "US-ASCII";
+       }
+       if (mh_strcasecmp(str, mm_charset)==0) {
+               return 1;
+       }
+
+       /* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */
+       if (strncasecmp("ISO-8859-", mm_charset, 9)==0 ||
+                       mh_strcasecmp("UTF-8", mm_charset)==0) {
+               if (mh_strcasecmp(str, "US-ASCII")==0) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+
+/*
+** Return the name of the character set we are
+** using for 8bit text.
+*/
+char *
+write_charset_8bit(void)
+{
+       char *mm_charset = NULL;
+
+       if (!(mm_charset = get_charset())) {
+               mm_charset = "x-unknown";
+       }
+       return mm_charset;
+}
diff --git a/sbr/check_charset.c b/sbr/check_charset.c
deleted file mode 100644 (file)
index d11d772..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-** check_charset.c -- routines for character sets
-**
-** This code is Copyright (c) 2002, by the authors of nmh.  See the
-** COPYRIGHT file in the root directory of the nmh distribution for
-** complete copyright information.
-*/
-
-#include <h/mh.h>
-#ifdef HAVE_LANGINFO_H
-# include <langinfo.h>
-#endif
-
-
-/*
-** Get the current character set
-*/
-char *
-get_charset()
-{
-       char *charset = getenv("MM_CHARSET");
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
-       if (!charset)
-               charset = norm_charmap(nl_langinfo(CODESET));
-#endif
-       return charset;
-}
-
-
-/*
-** Check if we can display a given character set natively.
-** We are passed the length of the initial part of the
-** string to check, since we want to allow the name of the
-** character set to be a substring of a larger string.
-*/
-
-int
-check_charset(char *str, int len)
-{
-       static char *mm_charset = NULL;
-       static char *alt_charset = NULL;
-       static int mm_len;
-       static int alt_len;
-
-       /* Cache the name of our default character set */
-       if (!mm_charset) {
-               if (!(mm_charset = get_charset()))
-                       mm_charset = "US-ASCII";
-               mm_len = strlen(mm_charset);
-
-               /* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */
-               if (!strncasecmp("ISO-8859-", mm_charset, 9) ||
-                       !mh_strcasecmp("UTF-8", mm_charset)) {
-                       alt_charset = "US-ASCII";
-                       alt_len = strlen(alt_charset);
-               }
-       }
-
-       /* Check if character set is OK */
-       if ((len == mm_len) && !strncasecmp(str, mm_charset, mm_len))
-               return 1;
-       if (alt_charset && (len == alt_len) && !strncasecmp(str, alt_charset, alt_len))
-               return 1;
-
-       return 0;
-}
-
-
-/*
-** Return the name of the character set we are
-** using for 8bit text.
-*/
-char *
-write_charset_8bit(void)
-{
-       static char *mm_charset = NULL;
-
-       /*
-       ** Cache the name of the character set to
-       ** use for 8bit text.
-       */
-       if (!mm_charset && !(mm_charset = get_charset()))
-               mm_charset = "x-unknown";
-
-       return mm_charset;
-}
index dc123e5..4e8231f 100644 (file)
@@ -125,22 +125,25 @@ decode_rfc2047(char *str, char *dst, size_t dstlen)
 
                        if (!*pp)
                                continue;
+                       *pp = '\0';
 
                        /* Check if character set can be handled natively */
-                       if (!check_charset(startofmime, pp - startofmime)) {
+                       if (!is_native_charset(startofmime)) {
 #ifdef HAVE_ICONV
                                /* .. it can't. We'll use iconv then. */
-                               *pp = '\0';
                                cd = iconv_open(get_charset(), startofmime);
                                fromutf8 = !mh_strcasecmp(startofmime, "UTF-8");
                                *pp = '?';
-                               if (cd == (iconv_t)-1) continue;
+                               if (cd == (iconv_t)-1)
+                                       continue;
                                use_iconv = 1;
 #else
+                               *pp = '?';
                                continue;
 #endif
                        }
 
+                       *pp = '?';
                        startofmime = pp + 1;
 
                        /* Check for valid encoding type */
index 2a8bbf9..9b7e7b0 100644 (file)
@@ -519,15 +519,14 @@ show_text(CT ct, int alternate)
        ** if it is not a text part of a multipart/alternative
        */
        if (!alternate || ct->c_subtype == TEXT_PLAIN) {
-               if (ct->c_charset && !check_charset(ct->c_charset,
-                               strlen(ct->c_charset))) {
+               if (ct->c_charset && !is_native_charset(ct->c_charset)) {
                        snprintf(buffer, sizeof(buffer), "%%liconv -f '%s'",
                                        ct->c_charset);
                } else {
                        snprintf(buffer, sizeof(buffer), "%%lcat");
                }
-               cp = (ct->c_showproc = getcpy(buffer));
-               return show_content_aux(ct, alternate, cp, NULL);
+               ct->c_showproc = getcpy(buffer);
+               return show_content_aux(ct, alternate, ct->c_showproc, NULL);
        }
 
        return NOTOK;