2 ** The Single Unix Specification function nl_langinfo(CODESET)
3 ** returns the name of the encoding used by the currently selected
6 ** http://www.opengroup.org/onlinepubs/7908799/xsh/langinfo.h.html
8 ** Unfortunately the encoding names are not yet standardized.
9 ** This function knows about the encoding names used on many
10 ** different systems and converts them where possible into
11 ** the corresponding MIME charset name registered in
13 ** http://www.iana.org/assignments/character-sets
15 ** Please extend it as needed and suggest improvements to the author.
17 ** Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11
18 ** Permission to use, copy, modify, and distribute this software
19 ** for any purpose and without fee is hereby granted. The author
20 ** disclaims all warranties with regard to this software.
24 ** http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c
29 #define digit(x) ((x) >= '0' && (x) <= '9')
34 norm_charmap(char *name)
42 ** Many need no remapping, but they are listed here so you
43 ** can see what output to expect, and modify for your needs
46 if (strcmp(name, "UTF-8")==0)
48 if (strcmp(name, "EUC-JP")==0)
50 if (strcmp(name, "EUC-KR")==0)
52 if (strcmp(name, "EUC-TW")==0)
54 if (strcmp(name, "KOI8-R")==0)
56 if (strcmp(name, "KOI8-U")==0)
58 if (strcmp(name, "GBK")==0)
60 if (strcmp(name, "GB2312")==0)
62 if (strcmp(name, "GB18030")==0)
64 if (strcmp(name, "VSCII")==0)
67 /* ASCII comes in many names */
68 if (strcmp(name, "ASCII")==0 ||
69 strcmp(name, "US-ASCII")==0 ||
70 strcmp(name, "ANSI_X3.4-1968")==0 ||
71 strcmp(name, "646")==0 ||
72 strcmp(name, "ISO646")==0 ||
73 strcmp(name, "ISO_646.IRV")==0)
76 /* ISO 8859 will be converted to "ISO-8859-x" */
77 if ((p = strstr(name, "8859-"))) {
78 memcpy(buf, "ISO-8859-\0\0", 12);
88 /* Windows code pages will be converted to "WINDOWS-12xx" */
89 if ((p = strstr(name, "CP12"))) {
90 memcpy(buf, "WINDOWS-12\0\0", 13);
100 /* TIS-620 comes in at least the following two forms */
101 if (strcmp(name, "TIS-620")==0 ||
102 strcmp(name, "TIS620.2533")==0)
103 return "ISO-8859-11";
105 /* For some, uppercase/lowercase might differ */
106 if (strcmp(name, "Big5")==0 || strcmp(name, "BIG5")==0)
108 if (strcmp(name, "Big5HKSCS")==0 || strcmp(name, "BIG5HKSCS")==0)
112 ** I don't know of any implementation of nl_langinfo(CODESET) out
113 ** there that returns anything else (and I'm not even certain all of
114 ** the above occur in the wild), but just in case, as a fallback,
115 ** return the unmodified name.