From: markus schnalke <meillo@marmaro.de>
Date: Mon, 30 Apr 2012 16:52:19 +0000 (+0200)
Subject: Rework in charset code. (Includes renaming.)
X-Git-Tag: mmh-thesis-end~73
X-Git-Url: http://git.marmaro.de/?a=commitdiff_plain;h=8d77b48284c58c135a6b2787e721597346ab056d;p=mmh

Rework in charset code. (Includes renaming.)
Renamed check_charset() to is_native_charset(), and sbr/check_charset.c to
sbr/charset.c. Removed caching in favor for simpler code.
is_native_charset() does not check for prefixes anymore. The code didn't
match the comments anyway and it matched when comparing the first 10 [sic!]
chars of `ISO-8859-1whatever' with `ISO-8859-11', which is not wanted.
---

diff --git a/h/prototypes.h b/h/prototypes.h
index f2c4652a..d8231f5a 100644
--- a/h/prototypes.h
+++ b/h/prototypes.h
@@ -28,7 +28,7 @@ void advertise(char *, char *, char *, va_list);
 void advise(char *, char *, ...);
 void ambigsw(char *, struct swit *);
 char **brkstring(char *, char *, char *);
-int check_charset(char *, int);
+int is_native_charset(char *);
 char *concat(char *, ...);
 int context_del(char *);
 char *context_find(char *);
diff --git a/sbr/Makefile.in b/sbr/Makefile.in
index 6c3d2c2b..ca49c469 100644
--- a/sbr/Makefile.in
+++ b/sbr/Makefile.in
@@ -44,7 +44,7 @@ SIGNAL_H = @SIGNAL_H@
 
 # source for library functions
 SRCS = addrsbr.c ambigsw.c brkstring.c  \
-	check_charset.c concat.c context_del.c  \
+	charset.c concat.c context_del.c  \
 	context_find.c context_foil.c context_read.c  \
 	context_replace.c context_save.c \
 	cpydata.c cpydgst.c crawl_folders.c  \
diff --git a/sbr/charset.c b/sbr/charset.c
new file mode 100644
index 00000000..56f085cc
--- /dev/null
+++ b/sbr/charset.c
@@ -0,0 +1,70 @@
+/*
+** charset.c -- routines for character sets
+**
+** This code is Copyright (c) 2002, by the authors of nmh.  See the
+** COPYRIGHT file in the root directory of the nmh distribution for
+** complete copyright information.
+*/
+
+#include <h/mh.h>
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
+
+/*
+** Get the current character set
+*/
+char *
+get_charset()
+{
+	char *charset = getenv("MM_CHARSET");
+#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
+	if (!charset) {
+		charset = norm_charmap(nl_langinfo(CODESET));
+	}
+#endif
+	return charset;
+}
+
+
+/*
+** Check if we can display a given character set natively.
+*/
+int
+is_native_charset(char *str)
+{
+	char *mm_charset = NULL;
+
+	if (!(mm_charset = get_charset())) {
+		mm_charset = "US-ASCII";
+	}
+	if (mh_strcasecmp(str, mm_charset)==0) {
+		return 1;
+	}
+
+	/* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */
+	if (strncasecmp("ISO-8859-", mm_charset, 9)==0 ||
+			mh_strcasecmp("UTF-8", mm_charset)==0) {
+		if (mh_strcasecmp(str, "US-ASCII")==0) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+/*
+** Return the name of the character set we are
+** using for 8bit text.
+*/
+char *
+write_charset_8bit(void)
+{
+	char *mm_charset = NULL;
+
+	if (!(mm_charset = get_charset())) {
+		mm_charset = "x-unknown";
+	}
+	return mm_charset;
+}
diff --git a/sbr/check_charset.c b/sbr/check_charset.c
deleted file mode 100644
index d11d7729..00000000
--- a/sbr/check_charset.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-** check_charset.c -- routines for character sets
-**
-** This code is Copyright (c) 2002, by the authors of nmh.  See the
-** COPYRIGHT file in the root directory of the nmh distribution for
-** complete copyright information.
-*/
-
-#include <h/mh.h>
-#ifdef HAVE_LANGINFO_H
-# include <langinfo.h>
-#endif
-
-
-/*
-** Get the current character set
-*/
-char *
-get_charset()
-{
-	char *charset = getenv("MM_CHARSET");
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
-	if (!charset)
-		charset = norm_charmap(nl_langinfo(CODESET));
-#endif
-	return charset;
-}
-
-
-/*
-** Check if we can display a given character set natively.
-** We are passed the length of the initial part of the
-** string to check, since we want to allow the name of the
-** character set to be a substring of a larger string.
-*/
-
-int
-check_charset(char *str, int len)
-{
-	static char *mm_charset = NULL;
-	static char *alt_charset = NULL;
-	static int mm_len;
-	static int alt_len;
-
-	/* Cache the name of our default character set */
-	if (!mm_charset) {
-		if (!(mm_charset = get_charset()))
-			mm_charset = "US-ASCII";
-		mm_len = strlen(mm_charset);
-
-		/* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */
-		if (!strncasecmp("ISO-8859-", mm_charset, 9) ||
-			!mh_strcasecmp("UTF-8", mm_charset)) {
-			alt_charset = "US-ASCII";
-			alt_len = strlen(alt_charset);
-		}
-	}
-
-	/* Check if character set is OK */
-	if ((len == mm_len) && !strncasecmp(str, mm_charset, mm_len))
-		return 1;
-	if (alt_charset && (len == alt_len) && !strncasecmp(str, alt_charset, alt_len))
-		return 1;
-
-	return 0;
-}
-
-
-/*
-** Return the name of the character set we are
-** using for 8bit text.
-*/
-char *
-write_charset_8bit(void)
-{
-	static char *mm_charset = NULL;
-
-	/*
-	** Cache the name of the character set to
-	** use for 8bit text.
-	*/
-	if (!mm_charset && !(mm_charset = get_charset()))
-		mm_charset = "x-unknown";
-
-	return mm_charset;
-}
diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c
index dc123e53..4e8231fc 100644
--- a/sbr/fmt_rfc2047.c
+++ b/sbr/fmt_rfc2047.c
@@ -125,22 +125,25 @@ decode_rfc2047(char *str, char *dst, size_t dstlen)
 
 			if (!*pp)
 				continue;
+			*pp = '\0';
 
 			/* Check if character set can be handled natively */
-			if (!check_charset(startofmime, pp - startofmime)) {
+			if (!is_native_charset(startofmime)) {
 #ifdef HAVE_ICONV
 				/* .. it can't. We'll use iconv then. */
-				*pp = '\0';
 				cd = iconv_open(get_charset(), startofmime);
 				fromutf8 = !mh_strcasecmp(startofmime, "UTF-8");
 				*pp = '?';
-				if (cd == (iconv_t)-1) continue;
+				if (cd == (iconv_t)-1)
+					continue;
 				use_iconv = 1;
 #else
+				*pp = '?';
 				continue;
 #endif
 			}
 
+			*pp = '?';
 			startofmime = pp + 1;
 
 			/* Check for valid encoding type */
diff --git a/uip/mhshowsbr.c b/uip/mhshowsbr.c
index 2a8bbf92..9b7e7b05 100644
--- a/uip/mhshowsbr.c
+++ b/uip/mhshowsbr.c
@@ -519,15 +519,14 @@ show_text(CT ct, int alternate)
 	** if it is not a text part of a multipart/alternative
 	*/
 	if (!alternate || ct->c_subtype == TEXT_PLAIN) {
-		if (ct->c_charset && !check_charset(ct->c_charset,
-				strlen(ct->c_charset))) {
+		if (ct->c_charset && !is_native_charset(ct->c_charset)) {
 			snprintf(buffer, sizeof(buffer), "%%liconv -f '%s'",
 					ct->c_charset);
 		} else {
 			snprintf(buffer, sizeof(buffer), "%%lcat");
 		}
-		cp = (ct->c_showproc = getcpy(buffer));
-		return show_content_aux(ct, alternate, cp, NULL);
+		ct->c_showproc = getcpy(buffer);
+		return show_content_aux(ct, alternate, ct->c_showproc, NULL);
 	}
 
 	return NOTOK;