diff -r c7fdb0637d0b Lib/locale.py --- a/Lib/locale.py Fri Sep 13 11:46:24 2013 +0300 +++ b/Lib/locale.py Fri Sep 13 15:12:23 2013 +0300 @@ -861,7 +897,7 @@ 'catalan': 'ca_ES.ISO8859-1', 'cextend': 'en_US.ISO8859-1', 'cextend.en': 'en_US.ISO8859-1', - 'chinese-s': 'zh_CN.eucCN', + 'chinese-s': 'zh_CN.gb2312', 'chinese-t': 'zh_TW.eucTW', 'croatian': 'hr_HR.ISO8859-2', 'cs': 'cs_CZ.ISO8859-2', @@ -1170,8 +1206,8 @@ 'he_il.cp1255': 'he_IL.CP1255', 'he_il.iso88598': 'he_IL.ISO8859-8', 'he_il.microsoftcp1255': 'he_IL.CP1255', - 'hebrew': 'iw_IL.ISO8859-8', - 'hebrew.iso88598': 'iw_IL.ISO8859-8', + 'hebrew': 'he_IL.ISO8859-8', + 'hebrew.iso88598': 'he_IL.ISO8859-8', 'hi': 'hi_IN.ISCII-DEV', 'hi_in': 'hi_IN.ISCII-DEV', 'hi_in.isciidev': 'hi_IN.ISCII-DEV', @@ -1427,10 +1463,10 @@ 'se_no': 'se_NO.UTF-8', 'serbocroatian': 'sr_RS.UTF-8@latin', 'sh': 'sr_RS.UTF-8@latin', - 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2', - 'sh_hr': 'sh_HR.ISO8859-2', + 'sh_ba.iso88592@bosnia': 'sr_RS.ISO8859-2', + 'sh_hr': 'hr_HR.ISO8859-2', 'sh_hr.iso88592': 'hr_HR.ISO8859-2', - 'sh_sp': 'sr_CS.ISO8859-2', + 'sh_sp': 'sr_RS.ISO8859-2', 'sh_yu': 'sr_RS.UTF-8@latin', 'si': 'si_LK.UTF-8', 'si_lk': 'si_LK.UTF-8', @@ -1445,8 +1481,8 @@ 'slovak': 'sk_SK.ISO8859-2', 'slovene': 'sl_SI.ISO8859-2', 'slovenian': 'sl_SI.ISO8859-2', - 'sp': 'sr_CS.ISO8859-5', - 'sp_yu': 'sr_CS.ISO8859-5', + 'sp': 'sr_RS.ISO8859-5', + 'sp_yu': 'sr_RS.ISO8859-5', 'spanish': 'es_ES.ISO8859-1', 'spanish.iso88591': 'es_ES.ISO8859-1', 'spanish_spain': 'es_ES.ISO8859-1', @@ -1459,23 +1495,24 @@ 'sr@latin': 'sr_RS.UTF-8@latin', 'sr@latn': 'sr_RS.UTF-8@latin', 'sr_cs': 'sr_RS.UTF-8', - 'sr_cs.iso88592': 'sr_CS.ISO8859-2', - 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', - 'sr_cs.iso88595': 'sr_CS.ISO8859-5', + 'sr_cs.iso88592': 'sr_RS.ISO8859-2', + 'sr_cs.iso88592@latn': 'sr_RS.ISO8859-2', + 'sr_cs.iso88595': 'sr_RS.ISO8859-5', 'sr_cs.utf8@latn': 'sr_RS.UTF-8@latin', 'sr_cs@latn': 'sr_RS.UTF-8@latin', 'sr_me': 'sr_ME.UTF-8', 'sr_rs': 'sr_RS.UTF-8', + 'sr_rs.cp1251': 'sr_RS.CP1251', 'sr_rs.utf8@latn': 'sr_RS.UTF-8@latin', 'sr_rs@latin': 'sr_RS.UTF-8@latin', 'sr_rs@latn': 'sr_RS.UTF-8@latin', - 'sr_sp': 'sr_CS.ISO8859-2', + 'sr_sp': 'sr_RS.ISO8859-2', 'sr_yu': 'sr_RS.UTF-8@latin', - 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251', - 'sr_yu.iso88592': 'sr_CS.ISO8859-2', - 'sr_yu.iso88595': 'sr_CS.ISO8859-5', - 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5', - 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251', + 'sr_yu.cp1251@cyrillic': 'sr_RS.CP1251', + 'sr_yu.iso88592': 'sr_RS.ISO8859-2', + 'sr_yu.iso88595': 'sr_RS.ISO8859-5', + 'sr_yu.iso88595@cyrillic': 'sr_RS.ISO8859-5', + 'sr_yu.microsoftcp1251@cyrillic': 'sr_RS.CP1251', 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8', 'sr_yu@cyrillic': 'sr_RS.UTF-8', 'ss': 'ss_ZA.ISO8859-1', @@ -1537,8 +1574,8 @@ 'uk_ua.iso88595': 'uk_UA.ISO8859-5', 'uk_ua.koi8u': 'uk_UA.KOI8-U', 'uk_ua.microsoftcp1251': 'uk_UA.CP1251', - 'univ': 'en_US.utf', - 'universal': 'en_US.utf', + 'univ': 'en_US.UTF-8', + 'universal': 'en_US.UTF-8', 'universal.utf8@ucs4': 'en_US.UTF-8', 'ur': 'ur_PK.CP1256', 'ur_pk': 'ur_PK.CP1256', @@ -1570,10 +1607,10 @@ 'yi_us': 'yi_US.CP1255', 'yi_us.cp1255': 'yi_US.CP1255', 'yi_us.microsoftcp1255': 'yi_US.CP1255', - 'zh': 'zh_CN.eucCN', + 'zh': 'zh_CN.gb2312', 'zh_cn': 'zh_CN.gb2312', 'zh_cn.big5': 'zh_TW.big5', - 'zh_cn.euc': 'zh_CN.eucCN', + 'zh_cn.euc': 'zh_CN.gb2312', 'zh_cn.gb18030': 'zh_CN.gb18030', 'zh_cn.gb2312': 'zh_CN.gb2312', 'zh_cn.gbk': 'zh_CN.gbk', diff -r c7fdb0637d0b Lib/test/test_locale.py --- a/Lib/test/test_locale.py Fri Sep 13 11:46:24 2013 +0300 +++ b/Lib/test/test_locale.py Fri Sep 13 15:12:23 2013 +0300 @@ -365,6 +365,64 @@ self.assertLess(locale.strxfrm('à'), locale.strxfrm('b')) +class NormalizeTest(unittest.TestCase): + def check(self, loc, expected): + self.assertEqual(locale.normalize(loc), expected, msg=loc) + + def test_locale_alias(self): + for loc, alias in locale.locale_alias.items(): + with self.subTest(locale=(loc, alias)): + self.check(loc, alias) + with self.subTest(locale=(loc, alias)): + self.check(alias, alias) + + def test_english(self): + self.check('en', 'en_US.ISO8859-1') + self.check('EN', 'en_US.ISO8859-1') + self.check('en_US', 'en_US.ISO8859-1') + self.check('en_us', 'en_US.ISO8859-1') + self.check('en_GB', 'en_GB.ISO8859-1') + self.check('en_US.UTF-8', 'en_US.UTF-8') + self.check('en_US.utf8', 'en_US.UTF-8') + self.check('en_US:UTF-8', 'en_US.UTF-8') + self.check('en_US.ISO8859-1', 'en_US.ISO8859-1') + self.check('en_US.US-ASCII', 'en_US.ISO8859-1') + self.check('english', 'en_EN.ISO8859-1') + + def test_ukrainian(self): + self.check('uk', 'uk_UA.KOI8-U') + self.check('uk_UA', 'uk_UA.KOI8-U') + self.check('uk_UA.utf8', 'uk_UA.UTF-8') + self.check('uk_UA.cp1251', 'uk_UA.CP1251') + self.check('uk_UA.koi8u', 'uk_UA.KOI8-U') + self.check('uk_ua.iso88595', 'uk_UA.ISO8859-5') + + def test_euro(self): + self.check('de_DE@euro', 'de_DE.ISO8859-15') + self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15') + + def test_valencia(self): + self.check('es_CA.UTF-8@valencia', 'es_CA.UTF-8@valencia') + self.check('ca_ES.UTF-8@valencia', 'ca_ES.UTF-8@valencia') + self.check('ca_ES@valencia', 'ca_ES.ISO8859-1@valencia') + self.check('ca@valencia', 'ca_ES.ISO8859-1@valencia') + + def test_devanagari(self): + self.check('ks_in@devanagari', 'ks_IN@devanagari.UTF-8') + self.check('ks_IN@devanagari.UTF-8', 'ks_IN@devanagari.UTF-8') + self.check('sd', 'sd_IN@devanagari.UTF-8') + self.check('sd_IN@devanagari.UTF-8', 'sd_IN@devanagari.UTF-8') + + def test_microsoftcp(self): + self.check('uk_ua.microsoftcp1251', 'uk_UA.CP1251') + self.check('uk_ua.microsoft-cp1251', 'uk_UA.CP1251') + + def test_georgianacademy(self): + self.check('ka_ge.georgianacademy', 'ka_GE.GEORGIAN-ACADEMY') + self.check('ka_GE.georgian_academy', 'ka_GE.GEORGIAN-ACADEMY') + self.check('ka_GE.GEORGIAN-ACADEMY', 'ka_GE.GEORGIAN-ACADEMY') + + class TestMiscellaneous(unittest.TestCase): def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions.