From: markus schnalke Date: Mon, 30 Apr 2012 16:52:19 +0000 (+0200) Subject: Rework in charset code. (Includes renaming.) X-Git-Tag: mmh-thesis-end~73 X-Git-Url: http://git.marmaro.de/?a=commitdiff_plain;h=8d77b48284c58c135a6b2787e721597346ab056d;p=mmh Rework in charset code. (Includes renaming.) Renamed check_charset() to is_native_charset(), and sbr/check_charset.c to sbr/charset.c. Removed caching in favor for simpler code. is_native_charset() does not check for prefixes anymore. The code didn't match the comments anyway and it matched when comparing the first 10 [sic!] chars of `ISO-8859-1whatever' with `ISO-8859-11', which is not wanted. --- diff --git a/h/prototypes.h b/h/prototypes.h index f2c4652..d8231f5 100644 --- a/h/prototypes.h +++ b/h/prototypes.h @@ -28,7 +28,7 @@ void advertise(char *, char *, char *, va_list); void advise(char *, char *, ...); void ambigsw(char *, struct swit *); char **brkstring(char *, char *, char *); -int check_charset(char *, int); +int is_native_charset(char *); char *concat(char *, ...); int context_del(char *); char *context_find(char *); diff --git a/sbr/Makefile.in b/sbr/Makefile.in index 6c3d2c2..ca49c46 100644 --- a/sbr/Makefile.in +++ b/sbr/Makefile.in @@ -44,7 +44,7 @@ SIGNAL_H = @SIGNAL_H@ # source for library functions SRCS = addrsbr.c ambigsw.c brkstring.c \ - check_charset.c concat.c context_del.c \ + charset.c concat.c context_del.c \ context_find.c context_foil.c context_read.c \ context_replace.c context_save.c \ cpydata.c cpydgst.c crawl_folders.c \ diff --git a/sbr/charset.c b/sbr/charset.c new file mode 100644 index 0000000..56f085c --- /dev/null +++ b/sbr/charset.c @@ -0,0 +1,70 @@ +/* +** charset.c -- routines for character sets +** +** This code is Copyright (c) 2002, by the authors of nmh. See the +** COPYRIGHT file in the root directory of the nmh distribution for +** complete copyright information. +*/ + +#include +#ifdef HAVE_LANGINFO_H +# include +#endif + + +/* +** Get the current character set +*/ +char * +get_charset() +{ + char *charset = getenv("MM_CHARSET"); +#if defined(HAVE_NL_LANGINFO) && defined(CODESET) + if (!charset) { + charset = norm_charmap(nl_langinfo(CODESET)); + } +#endif + return charset; +} + + +/* +** Check if we can display a given character set natively. +*/ +int +is_native_charset(char *str) +{ + char *mm_charset = NULL; + + if (!(mm_charset = get_charset())) { + mm_charset = "US-ASCII"; + } + if (mh_strcasecmp(str, mm_charset)==0) { + return 1; + } + + /* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */ + if (strncasecmp("ISO-8859-", mm_charset, 9)==0 || + mh_strcasecmp("UTF-8", mm_charset)==0) { + if (mh_strcasecmp(str, "US-ASCII")==0) { + return 1; + } + } + return 0; +} + + +/* +** Return the name of the character set we are +** using for 8bit text. +*/ +char * +write_charset_8bit(void) +{ + char *mm_charset = NULL; + + if (!(mm_charset = get_charset())) { + mm_charset = "x-unknown"; + } + return mm_charset; +} diff --git a/sbr/check_charset.c b/sbr/check_charset.c deleted file mode 100644 index d11d772..0000000 --- a/sbr/check_charset.c +++ /dev/null @@ -1,86 +0,0 @@ -/* -** check_charset.c -- routines for character sets -** -** This code is Copyright (c) 2002, by the authors of nmh. See the -** COPYRIGHT file in the root directory of the nmh distribution for -** complete copyright information. -*/ - -#include -#ifdef HAVE_LANGINFO_H -# include -#endif - - -/* -** Get the current character set -*/ -char * -get_charset() -{ - char *charset = getenv("MM_CHARSET"); -#if defined(HAVE_NL_LANGINFO) && defined(CODESET) - if (!charset) - charset = norm_charmap(nl_langinfo(CODESET)); -#endif - return charset; -} - - -/* -** Check if we can display a given character set natively. -** We are passed the length of the initial part of the -** string to check, since we want to allow the name of the -** character set to be a substring of a larger string. -*/ - -int -check_charset(char *str, int len) -{ - static char *mm_charset = NULL; - static char *alt_charset = NULL; - static int mm_len; - static int alt_len; - - /* Cache the name of our default character set */ - if (!mm_charset) { - if (!(mm_charset = get_charset())) - mm_charset = "US-ASCII"; - mm_len = strlen(mm_charset); - - /* US-ASCII is a subset of the ISO-8859-X and UTF-8 character sets */ - if (!strncasecmp("ISO-8859-", mm_charset, 9) || - !mh_strcasecmp("UTF-8", mm_charset)) { - alt_charset = "US-ASCII"; - alt_len = strlen(alt_charset); - } - } - - /* Check if character set is OK */ - if ((len == mm_len) && !strncasecmp(str, mm_charset, mm_len)) - return 1; - if (alt_charset && (len == alt_len) && !strncasecmp(str, alt_charset, alt_len)) - return 1; - - return 0; -} - - -/* -** Return the name of the character set we are -** using for 8bit text. -*/ -char * -write_charset_8bit(void) -{ - static char *mm_charset = NULL; - - /* - ** Cache the name of the character set to - ** use for 8bit text. - */ - if (!mm_charset && !(mm_charset = get_charset())) - mm_charset = "x-unknown"; - - return mm_charset; -} diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index dc123e5..4e8231f 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -125,22 +125,25 @@ decode_rfc2047(char *str, char *dst, size_t dstlen) if (!*pp) continue; + *pp = '\0'; /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + if (!is_native_charset(startofmime)) { #ifdef HAVE_ICONV /* .. it can't. We'll use iconv then. */ - *pp = '\0'; cd = iconv_open(get_charset(), startofmime); fromutf8 = !mh_strcasecmp(startofmime, "UTF-8"); *pp = '?'; - if (cd == (iconv_t)-1) continue; + if (cd == (iconv_t)-1) + continue; use_iconv = 1; #else + *pp = '?'; continue; #endif } + *pp = '?'; startofmime = pp + 1; /* Check for valid encoding type */ diff --git a/uip/mhshowsbr.c b/uip/mhshowsbr.c index 2a8bbf9..9b7e7b0 100644 --- a/uip/mhshowsbr.c +++ b/uip/mhshowsbr.c @@ -519,15 +519,14 @@ show_text(CT ct, int alternate) ** if it is not a text part of a multipart/alternative */ if (!alternate || ct->c_subtype == TEXT_PLAIN) { - if (ct->c_charset && !check_charset(ct->c_charset, - strlen(ct->c_charset))) { + if (ct->c_charset && !is_native_charset(ct->c_charset)) { snprintf(buffer, sizeof(buffer), "%%liconv -f '%s'", ct->c_charset); } else { snprintf(buffer, sizeof(buffer), "%%lcat"); } - cp = (ct->c_showproc = getcpy(buffer)); - return show_content_aux(ct, alternate, cp, NULL); + ct->c_showproc = getcpy(buffer); + return show_content_aux(ct, alternate, ct->c_showproc, NULL); } return NOTOK;