diff options
Diffstat (limited to 'lib/localcharset.c')
-rw-r--r-- | lib/localcharset.c | 130 |
1 files changed, 33 insertions, 97 deletions
diff --git a/lib/localcharset.c b/lib/localcharset.c index 5bbe2c8b..29de23d6 100644 --- a/lib/localcharset.c +++ b/lib/localcharset.c @@ -1,6 +1,6 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -13,7 +13,8 @@ GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along - with this program; if not, see <http://www.gnu.org/licenses/>. */ + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* Written by Bruno Haible <bruno@clisp.org>. */ @@ -29,12 +30,11 @@ #include <stdlib.h> #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET -# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ +# define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */ #endif #if defined _WIN32 || defined __WIN32__ -# define WINDOWS_NATIVE -# include <locale.h> +# define WIN32_NATIVE #endif #if defined __EMX__ @@ -44,7 +44,7 @@ # endif #endif -#if !defined WINDOWS_NATIVE +#if !defined WIN32_NATIVE # include <unistd.h> # if HAVE_LANGINFO_CODESET # include <langinfo.h> @@ -57,7 +57,7 @@ # define WIN32_LEAN_AND_MEAN # include <windows.h> # endif -#elif defined WINDOWS_NATIVE +#elif defined WIN32_NATIVE # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -66,11 +66,6 @@ # include <os2.h> #endif -/* For MB_CUR_MAX_L */ -#if defined DARWIN7 -# include <xlocale.h> -#endif - #if ENABLE_RELOCATABLE # include "relocatable.h" #else @@ -88,7 +83,7 @@ #endif #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Native Windows, Cygwin, OS/2, DOS */ + /* Win32, Cygwin, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') #endif @@ -128,7 +123,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2) +#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) const char *dir; const char *base = "charset.alias"; char *file_name; @@ -233,7 +228,8 @@ get_charset_aliases (void) { /* Out of memory. */ res_size = 0; - free (old_res_ptr); + if (old_res_ptr != NULL) + free (old_res_ptr); break; } strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); @@ -313,7 +309,7 @@ get_charset_aliases (void) "DECKOREAN" "\0" "EUC-KR" "\0"; # endif -# if defined WINDOWS_NATIVE || defined __CYGWIN__ +# if defined WIN32_NATIVE || defined __CYGWIN__ /* To avoid the troubles of installing a separate file in the same directory as the DLL and of retrieving the DLL's directory at runtime, simply inline the aliases here. */ @@ -342,36 +338,6 @@ get_charset_aliases (void) "CP54936" "\0" "GB18030" "\0" "CP65001" "\0" "UTF-8" "\0"; # endif -# if defined OS2 - /* To avoid the troubles of installing a separate file in the same - directory as the DLL and of retrieving the DLL's directory at - runtime, simply inline the aliases here. */ - - /* The list of encodings is taken from "List of OS/2 Codepages" - by Alex Taylor: - <http://altsan.org/os2/toolkits/uls/index.html#codepages>. - See also "IBM Globalization - Code page identifiers": - <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */ - cp = "CP813" "\0" "ISO-8859-7" "\0" - "CP878" "\0" "KOI8-R" "\0" - "CP819" "\0" "ISO-8859-1" "\0" - "CP912" "\0" "ISO-8859-2" "\0" - "CP913" "\0" "ISO-8859-3" "\0" - "CP914" "\0" "ISO-8859-4" "\0" - "CP915" "\0" "ISO-8859-5" "\0" - "CP916" "\0" "ISO-8859-8" "\0" - "CP920" "\0" "ISO-8859-9" "\0" - "CP921" "\0" "ISO-8859-13" "\0" - "CP923" "\0" "ISO-8859-15" "\0" - "CP954" "\0" "EUC-JP" "\0" - "CP964" "\0" "EUC-TW" "\0" - "CP970" "\0" "EUC-KR" "\0" - "CP1089" "\0" "ISO-8859-6" "\0" - "CP1208" "\0" "UTF-8" "\0" - "CP1381" "\0" "GB2312" "\0" - "CP1386" "\0" "GBK" "\0" - "CP3372" "\0" "EUC-JP" "\0"; -# endif #endif charset_aliases = cp; @@ -395,7 +361,7 @@ locale_charset (void) const char *codeset; const char *aliases; -#if !(defined WINDOWS_NATIVE || defined OS2) +#if !(defined WIN32_NATIVE || defined OS2) # if HAVE_LANGINFO_CODESET @@ -403,9 +369,10 @@ locale_charset (void) codeset = nl_langinfo (CODESET); # ifdef __CYGWIN__ - /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always - returns "US-ASCII". Return the suffix of the locale name from the - environment variables (if present) or the codepage as a number. */ + /* Cygwin 1.5.x does not have locales. nl_langinfo (CODESET) always + returns "US-ASCII". As long as this is not fixed, return the suffix + of the locale name from the environment variables (if present) or + the codepage as a number. */ if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) { const char *locale; @@ -442,10 +409,10 @@ locale_charset (void) } } - /* The Windows API has a function returning the locale's codepage as a - number: GetACP(). This encoding is used by Cygwin, unless the user - has set the environment variable CYGWIN=codepage:oem (which very few - people do). + /* Woe32 has a function returning the locale's codepage as a number: + GetACP(). This encoding is used by Cygwin, unless the user has set + the environment variable CYGWIN=codepage:oem (which very few people + do). Output directed to console windows needs to be converted (to GetOEMCP() if the console is using a raster font, or to GetConsoleOutputCP() if it is using a TrueType font). Cygwin does @@ -488,38 +455,18 @@ locale_charset (void) # endif -#elif defined WINDOWS_NATIVE +#elif defined WIN32_NATIVE static char buf[2 + 10 + 1]; - /* The Windows API has a function returning the locale's codepage as - a number, but the value doesn't change according to what the - 'setlocale' call specified. So we use it as a last resort, in - case the string returned by 'setlocale' doesn't specify the - codepage. */ - char *current_locale = setlocale (LC_ALL, NULL); - char *pdot; - - /* If they set different locales for different categories, - 'setlocale' will return a semi-colon separated list of locale - values. To make sure we use the correct one, we choose LC_CTYPE. */ - if (strchr (current_locale, ';')) - current_locale = setlocale (LC_CTYPE, NULL); - - pdot = strrchr (current_locale, '.'); - if (pdot) - sprintf (buf, "CP%s", pdot + 1); - else - { - /* The Windows API has a function returning the locale's codepage as a - number: GetACP(). - When the output goes to a console window, it needs to be provided in - GetOEMCP() encoding if the console is using a raster font, or in - GetConsoleOutputCP() encoding if it is using a TrueType font. - But in GUI programs and for output sent to files and pipes, GetACP() - encoding is the best bet. */ - sprintf (buf, "CP%u", GetACP ()); - } + /* Woe32 has a function returning the locale's codepage as a number: + GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ + sprintf (buf, "CP%u", GetACP ()); codeset = buf; #elif defined OS2 @@ -529,8 +476,6 @@ locale_charset (void) ULONG cp[3]; ULONG cplen; - codeset = NULL; - /* Allow user to override the codeset, as set in the operating system, with standard language environment variables. */ locale = getenv ("LC_ALL"); @@ -562,12 +507,10 @@ locale_charset (void) } } - /* For the POSIX locale, don't use the system's codepage. */ - if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) - codeset = ""; + /* Resolve through the charset.alias file. */ + codeset = locale; } - - if (codeset == NULL) + else { /* OS/2 has a function returning the locale's codepage as a number. */ if (DosQueryCp (sizeof (cp), cp, &cplen)) @@ -602,12 +545,5 @@ locale_charset (void) if (codeset[0] == '\0') codeset = "ASCII"; -#ifdef DARWIN7 - /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" - (the default codeset) does not work when MB_CUR_MAX is 1. */ - if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) - codeset = "ASCII"; -#endif - return codeset; } |