diff -r 385b871cffae Lib/locale.py --- a/Lib/locale.py Sat Feb 16 15:34:57 2008 +0100 +++ b/Lib/locale.py Sat Feb 16 21:01:15 2008 +0100 @@ -278,6 +278,13 @@ def _test(): # overridden below) _setlocale = setlocale +# Avoid relying on the locale-dependent .lower() method +# (see bug #1813). +_ascii_lower_map = ''.join( + chr(x + 32 if x >= ord('A') and x <= ord('Z') else x) + for x in range(256) +) + def normalize(localename): """ Returns a normalized locale code for the given locale @@ -295,7 +302,7 @@ def normalize(localename): """ # Normalize the locale name and extract the encoding - fullname = localename.lower() + fullname = localename.translate(_ascii_lower_map) if ':' in fullname: # ':' is sometimes used as encoding delimiter. fullname = fullname.replace(':', '.') diff -r 385b871cffae Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Sat Feb 16 15:34:57 2008 +0100 +++ b/Lib/test/test_codecs.py Sat Feb 16 21:01:15 2008 +0100 @@ -1,6 +1,7 @@ from test import test_support from test import test_support import unittest import codecs +import locale import sys, StringIO, _testcapi class Queue(object): @@ -1078,6 +1079,16 @@ class CodecsModuleTest(unittest.TestCase self.assertRaises(LookupError, codecs.lookup, "__spam__") self.assertRaises(LookupError, codecs.lookup, " ") + def test_lookup_with_locale(self): + # Bug #1813: when normalizing codec name, lowercasing must be locale + # agnostic, otherwise the looked up codec name might end up wrong. + try: + locale.setlocale(locale.LC_CTYPE, 'tr') + except locale.Error: + # SKIPped test + return + codecs.lookup('ISO8859_1') + def test_getencoder(self): self.assertRaises(TypeError, codecs.getencoder) self.assertRaises(LookupError, codecs.getencoder, "__spam__") diff -r 385b871cffae Python/codecs.c --- a/Python/codecs.c Sat Feb 16 15:34:57 2008 +0100 +++ b/Python/codecs.c Sat Feb 16 21:01:15 2008 +0100 @@ -45,6 +45,22 @@ int PyCodec_Register(PyObject *search_fu return -1; } +/* A locale-agnostic version of tolower(), to avoid codec lookup failures + when the current locale's case folding behaviour differs from latin-based + languages (see bug #1813). + XXX: do we want to export this function and make it available to other + modules? + */ + +static +int ascii_tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c + 32; + return c; +} + + /* Convert a string to a normalized Python string: all characters are converted to lower case, spaces are replaced with underscores. */ @@ -70,7 +86,7 @@ PyObject *normalizestring(const char *st if (ch == ' ') ch = '-'; else - ch = tolower(Py_CHARMASK(ch)); + ch = ascii_tolower(Py_CHARMASK(ch)); p[i] = ch; } return v;