From 7cd03bed1962e1c46a2d25edd7348091a75b945d Mon Sep 17 00:00:00 2001 From: Mike FABIAN Date: Thu, 7 Nov 2013 17:47:26 +0100 Subject: [PATCH] Issue #19534: fix normalize() in locale.py to make it work for sr_RS.UTF-8@latin --- Lib/locale.py | 81 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index 7ddfdb7..84abc9e 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -355,7 +355,7 @@ def normalize(localename): If normalization fails, the original name is returned unchanged. - If the given encoding is not known, the function defaults to + If the encoding is not given, the function adds the default encoding for the locale code just like setlocale() does. @@ -363,54 +363,60 @@ def normalize(localename): # Normalize the locale name and extract the encoding if isinstance(localename, _unicode): localename = localename.encode('ascii') - fullname = localename.translate(_ascii_lower_map) - if ':' in fullname: + langterrname = localename.translate(_ascii_lower_map) + if '@' in langterrname: + langterrname, modifier = langterrname.split('@')[:2] + else: + modifier = '' + if ':' in langterrname: # ':' is sometimes used as encoding delimiter. - fullname = fullname.replace(':', '.') - if '.' in fullname: - langname, encoding = fullname.split('.')[:2] - fullname = langname + '.' + encoding + langterrname = langterrname.replace(':', '.') + if '.' in langterrname: + langterrname, encoding = langterrname.split('.')[:2] else: - langname = fullname + langterrname = langterrname encoding = '' - # First lookup: fullname (possibly with encoding) + # First lookup: complete locale name, possibly with encoding and/or modifier norm_encoding = encoding.replace('-', '') norm_encoding = norm_encoding.replace('_', '') - lookup_name = langname + '.' + encoding - code = locale_alias.get(lookup_name, None) - if code is not None: - return code - #print 'first lookup failed' - - # Second try: langname (without encoding) - code = locale_alias.get(langname, None) - if code is not None: - #print 'langname lookup succeeded' - if '.' in code: - langname, defenc = code.split('.') + lookup_name = langterrname + if encoding: + lookup_name += '.' + norm_encoding + if modifier: + lookup_name += '@' + modifier + normalized_locale_name = locale_alias.get(lookup_name, None) + if normalized_locale_name is not None: + return normalized_locale_name + + # Second lookup: (without encoding and without modifier) + normalized_locale_name = locale_alias.get(langterrname, None) + if normalized_locale_name is not None: + if '@' in normalized_locale_name: + normalized_locale_name, defmodifier = normalized_locale_name.split('@') + else: + defmodifier = '' + if '.' in normalized_locale_name: + normalized_locale_name, defencoding = normalized_locale_name.split('.') else: - langname = code - defenc = '' + defencoding = '' if encoding: # Convert the encoding to a C lib compatible encoding string norm_encoding = encodings.normalize_encoding(encoding) - #print 'norm encoding: %r' % norm_encoding norm_encoding = encodings.aliases.aliases.get(norm_encoding, norm_encoding) - #print 'aliased encoding: %r' % norm_encoding - encoding = locale_encoding_alias.get(norm_encoding, + norm_encoding = locale_encoding_alias.get(norm_encoding, norm_encoding) - else: - encoding = defenc - #print 'found encoding %r' % encoding - if encoding: - return langname + '.' + encoding - else: - return langname + normalized_locale_name += '.' + norm_encoding + elif defencoding: + normalized_locale_name += '.' + defencoding + if modifier: + normalized_locale_name += '@' + modifier + elif defmodifier: + normalized_locale_name += '@' + defmodifier + return normalized_locale_name - else: - return localename + return localename def _parse_localename(localename): @@ -1254,7 +1260,7 @@ locale_alias = { 'korean': 'ko_KR.eucKR', 'korean.euc': 'ko_KR.eucKR', 'ks': 'ks_IN.UTF-8', - 'ks_in@devanagari': 'ks_IN@devanagari.UTF-8', + 'ks_in@devanagari': 'ks_IN.UTF-8@devanagari', 'kw': 'kw_GB.ISO8859-1', 'kw_gb': 'kw_GB.ISO8859-1', 'kw_gb.iso88591': 'kw_GB.ISO8859-1', @@ -1414,7 +1420,7 @@ locale_alias = { 'rw': 'rw_RW.ISO8859-1', 'rw_rw': 'rw_RW.ISO8859-1', 'rw_rw.iso88591': 'rw_RW.ISO8859-1', - 'sd': 'sd_IN@devanagari.UTF-8', + 'sd': 'sd_IN.UTF-8@devanagari', 'se_no': 'se_NO.UTF-8', 'serbocroatian': 'sr_RS.UTF-8@latin', 'sh': 'sr_RS.UTF-8@latin', @@ -1457,6 +1463,7 @@ locale_alias = { 'sr_cs@latn': 'sr_RS.UTF-8@latin', 'sr_me': 'sr_ME.UTF-8', 'sr_rs': 'sr_RS.UTF-8', + 'sr_rs.utf8@latin': 'sr_RS.UTF-8@latin', 'sr_rs.utf8@latn': 'sr_RS.UTF-8@latin', 'sr_rs@latin': 'sr_RS.UTF-8@latin', 'sr_rs@latn': 'sr_RS.UTF-8@latin', -- 1.8.4.2