2 ** The Single Unix Specification function nl_langinfo(CODESET)
3 ** returns the name of the encoding used by the currently selected
6 ** http://www.opengroup.org/onlinepubs/7908799/xsh/langinfo.h.html
8 ** Unfortunately the encoding names are not yet standardized.
9 ** This function knows about the encoding names used on many
10 ** different systems and converts them where possible into
11 ** the corresponding MIME charset name registered in
13 ** http://www.iana.org/assignments/character-sets
15 ** Please extend it as needed and suggest improvements to the author.
17 ** Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11
18 ** Permission to use, copy, modify, and distribute this software
19 ** for any purpose and without fee is hereby granted. The author
20 ** disclaims all warranties with regard to this software.
24 ** http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c
29 #define digit(x) ((x) >= '0' && (x) <= '9')
34 norm_charmap(char *name)
42 ** Many need no remapping, but they are listed here so you
43 ** can see what output to expect, and modify for your needs
46 if (!strcmp(name, "UTF-8"))
48 if (!strcmp(name, "EUC-JP"))
50 if (!strcmp(name, "EUC-KR"))
52 if (!strcmp(name, "EUC-TW"))
54 if (!strcmp(name, "KOI8-R"))
56 if (!strcmp(name, "KOI8-U"))
58 if (!strcmp(name, "GBK"))
60 if (!strcmp(name, "GB2312"))
62 if (!strcmp(name, "GB18030"))
64 if (!strcmp(name, "VSCII"))
67 /* ASCII comes in many names */
68 if (!strcmp(name, "ASCII") ||
69 !strcmp(name, "US-ASCII") ||
70 !strcmp(name, "ANSI_X3.4-1968") ||
71 !strcmp(name, "646") ||
72 !strcmp(name, "ISO646") ||
73 !strcmp(name, "ISO_646.IRV"))
76 /* ISO 8859 will be converted to "ISO-8859-x" */
77 if ((p = strstr(name, "8859-"))) {
78 memcpy(buf, "ISO-8859-\0\0", 12);
88 /* Windows code pages will be converted to "WINDOWS-12xx" */
89 if ((p = strstr(name, "CP12"))) {
90 memcpy(buf, "WINDOWS-12\0\0", 13);
100 /* TIS-620 comes in at least the following two forms */
101 if (!strcmp(name, "TIS-620") ||
102 !strcmp(name, "TIS620.2533"))
103 return "ISO-8859-11";
105 /* For some, uppercase/lowercase might differ */
106 if (!strcmp(name, "Big5") || !strcmp(name, "BIG5"))
108 if (!strcmp(name, "Big5HKSCS") || !strcmp(name, "BIG5HKSCS"))
112 ** I don't know of any implementation of nl_langinfo(CODESET) out
113 ** there that returns anything else (and I'm not even certain all of
114 ** the above occur in the wild), but just in case, as a fallback,
115 ** return the unmodified name.