diff options
Diffstat (limited to 'lib/localcharset.c')
-rw-r--r-- | lib/localcharset.c | 81 |
1 files changed, 68 insertions, 13 deletions
diff --git a/lib/localcharset.c b/lib/localcharset.c index 7f09567ce..7ce17e69e 100644 --- a/lib/localcharset.c +++ b/lib/localcharset.c @@ -1,6 +1,6 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2014 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2016 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -34,6 +34,7 @@ #if defined _WIN32 || defined __WIN32__ # define WINDOWS_NATIVE +# include <locale.h> #endif #if defined __EMX__ @@ -127,7 +128,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2) const char *dir; const char *base = "charset.alias"; char *file_name; @@ -341,6 +342,36 @@ get_charset_aliases (void) "CP54936" "\0" "GB18030" "\0" "CP65001" "\0" "UTF-8" "\0"; # endif +# if defined OS2 + /* To avoid the troubles of installing a separate file in the same + directory as the DLL and of retrieving the DLL's directory at + runtime, simply inline the aliases here. */ + + /* The list of encodings is taken from "List of OS/2 Codepages" + by Alex Taylor: + <http://altsan.org/os2/toolkits/uls/index.html#codepages>. + See also "IBM Globalization - Code page identifiers": + <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */ + cp = "CP813" "\0" "ISO-8859-7" "\0" + "CP878" "\0" "KOI8-R" "\0" + "CP819" "\0" "ISO-8859-1" "\0" + "CP912" "\0" "ISO-8859-2" "\0" + "CP913" "\0" "ISO-8859-3" "\0" + "CP914" "\0" "ISO-8859-4" "\0" + "CP915" "\0" "ISO-8859-5" "\0" + "CP916" "\0" "ISO-8859-8" "\0" + "CP920" "\0" "ISO-8859-9" "\0" + "CP921" "\0" "ISO-8859-13" "\0" + "CP923" "\0" "ISO-8859-15" "\0" + "CP954" "\0" "EUC-JP" "\0" + "CP964" "\0" "EUC-TW" "\0" + "CP970" "\0" "EUC-KR" "\0" + "CP1089" "\0" "ISO-8859-6" "\0" + "CP1208" "\0" "UTF-8" "\0" + "CP1381" "\0" "GB2312" "\0" + "CP1386" "\0" "GBK" "\0" + "CP3372" "\0" "EUC-JP" "\0"; +# endif #endif charset_aliases = cp; @@ -461,14 +492,34 @@ locale_charset (void) static char buf[2 + 10 + 1]; - /* The Windows API has a function returning the locale's codepage as a - number: GetACP(). - When the output goes to a console window, it needs to be provided in - GetOEMCP() encoding if the console is using a raster font, or in - GetConsoleOutputCP() encoding if it is using a TrueType font. - But in GUI programs and for output sent to files and pipes, GetACP() - encoding is the best bet. */ - sprintf (buf, "CP%u", GetACP ()); + /* The Windows API has a function returning the locale's codepage as + a number, but the value doesn't change according to what the + 'setlocale' call specified. So we use it as a last resort, in + case the string returned by 'setlocale' doesn't specify the + codepage. */ + char *current_locale = setlocale (LC_ALL, NULL); + char *pdot; + + /* If they set different locales for different categories, + 'setlocale' will return a semi-colon separated list of locale + values. To make sure we use the correct one, we choose LC_CTYPE. */ + if (strchr (current_locale, ';')) + current_locale = setlocale (LC_CTYPE, NULL); + + pdot = strrchr (current_locale, '.'); + if (pdot) + sprintf (buf, "CP%s", pdot + 1); + else + { + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ + sprintf (buf, "CP%u", GetACP ()); + } codeset = buf; #elif defined OS2 @@ -478,6 +529,8 @@ locale_charset (void) ULONG cp[3]; ULONG cplen; + codeset = NULL; + /* Allow user to override the codeset, as set in the operating system, with standard language environment variables. */ locale = getenv ("LC_ALL"); @@ -509,10 +562,12 @@ locale_charset (void) } } - /* Resolve through the charset.alias file. */ - codeset = locale; + /* For the POSIX locale, don't use the system's codepage. */ + if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) + codeset = ""; } - else + + if (codeset == NULL) { /* OS/2 has a function returning the locale's codepage as a number. */ if (DosQueryCp (sizeof (cp), cp, &cplen)) |