diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e7bbd80..02b4823 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1419,34 +1419,30 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, return v; } -/* Convert encoding to lower case and replace '_' with '-' in order to - catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1), - 1 on success. */ +/* Convert encoding to lower case and skip characters different than a-z and + 0-9. Return 0 on error (encoding is longer than lower_len-1), 1 on success. + */ static int normalize_encoding(const char *encoding, char *lower, size_t lower_len) { const char *e; - char *l; + char *l, c; char *l_end; e = encoding; l = lower; l_end = &lower[lower_len - 1]; - while (*e) { + for (; *e; e++) { if (l == l_end) return 0; - if (Py_ISUPPER(*e)) { - *l++ = Py_TOLOWER(*e++); - } - else if (*e == '_') { - *l++ = '-'; - e++; - } - else { - *l++ = *e++; - } + c = *e; + if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9')) + *l++ = c; + else if ('A' <= c && c <= 'Z') + *l++ = c + ('a' - 'A'); + /* else: ignore the character */ } *l = '\0'; return 1; @@ -1466,10 +1462,10 @@ PyObject *PyUnicode_Decode(const char *s, /* Shortcuts for common default encodings */ if (normalize_encoding(encoding, lower, sizeof(lower))) { - if (strcmp(lower, "utf-8") == 0) + if (strcmp(lower, "utf8") == 0) return PyUnicode_DecodeUTF8(s, size, errors); - else if ((strcmp(lower, "latin-1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) + else if ((strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso88591") == 0)) return PyUnicode_DecodeLatin1(s, size, errors); #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) else if (strcmp(lower, "mbcs") == 0) @@ -1477,9 +1473,9 @@ PyObject *PyUnicode_Decode(const char *s, #endif else if (strcmp(lower, "ascii") == 0) return PyUnicode_DecodeASCII(s, size, errors); - else if (strcmp(lower, "utf-16") == 0) + else if (strcmp(lower, "utf16") == 0) return PyUnicode_DecodeUTF16(s, size, errors, 0); - else if (strcmp(lower, "utf-32") == 0) + else if (strcmp(lower, "utf32") == 0) return PyUnicode_DecodeUTF32(s, size, errors, 0); } @@ -1674,12 +1670,12 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, /* Shortcuts for common default encodings */ if (normalize_encoding(encoding, lower, sizeof(lower))) { - if (strcmp(lower, "utf-8") == 0) + if (strcmp(lower, "utf8") == 0) return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), errors); - else if ((strcmp(lower, "latin-1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) + else if ((strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso88591") == 0)) return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), errors);