diff -r a1e032dbcf86 Include/unicodeobject.h --- a/Include/unicodeobject.h Wed Sep 07 00:32:06 2016 +0200 +++ b/Include/unicodeobject.h Tue Sep 06 15:35:53 2016 -0700 @@ -1663,7 +1663,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_Transla PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */ const char *errors /* error handling */ ); diff -r a1e032dbcf86 Lib/encodings/__init__.py --- a/Lib/encodings/__init__.py Wed Sep 07 00:32:06 2016 +0200 +++ b/Lib/encodings/__init__.py Tue Sep 06 15:35:53 2016 -0700 @@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lembu """#" import codecs +import sys from . import aliases _cache = {} @@ -151,3 +152,12 @@ def search_function(encoding): # Register the search_function in the Python codec registry codecs.register(search_function) + +if sys.platform == 'win32': + def _alias_mbcs(encoding): + import _bootlocale + if encoding == _bootlocale.getpreferredencoding(False): + import encodings.mbcs + return encodings.mbcs.getregentry() + + codecs.register(_alias_mbcs) diff -r a1e032dbcf86 Lib/encodings/aliases.py --- a/Lib/encodings/aliases.py Wed Sep 07 00:32:06 2016 +0200 +++ b/Lib/encodings/aliases.py Tue Sep 06 15:35:53 2016 -0700 @@ -458,6 +458,7 @@ aliases = { 'macturkish' : 'mac_turkish', # mbcs codec + 'ansi' : 'mbcs', 'dbcs' : 'mbcs', # ptcp154 codec diff -r a1e032dbcf86 Lib/site.py --- a/Lib/site.py Wed Sep 07 00:32:06 2016 +0200 +++ b/Lib/site.py Tue Sep 06 15:35:53 2016 -0700 @@ -423,21 +423,6 @@ def enablerlcompleter(): sys.__interactivehook__ = register_readline -def aliasmbcs(): - """On Windows, some default encodings are not provided by Python, - while they are always available as "mbcs" in each locale. Make - them usable by aliasing to "mbcs" in such a case.""" - if sys.platform == 'win32': - import _bootlocale, codecs - enc = _bootlocale.getpreferredencoding(False) - if enc.startswith('cp'): # "cp***" ? - try: - codecs.lookup(enc) - except LookupError: - import encodings - encodings._cache[enc] = encodings._unknown - encodings.aliases.aliases[enc] = 'mbcs' - CONFIG_LINE = r'^(?P(\w|[-_])+)\s*=\s*(?P.*)\s*$' def venv(known_paths): @@ -560,7 +545,6 @@ def main(): setcopyright() sethelper() enablerlcompleter() - aliasmbcs() execsitecustomize() if ENABLE_USER_SITE: execusercustomize() diff -r a1e032dbcf86 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Wed Sep 07 00:32:06 2016 +0200 +++ b/Lib/test/test_codecs.py Tue Sep 06 15:35:53 2016 -0700 @@ -8,11 +8,6 @@ import encodings from test import support -if sys.platform == 'win32': - VISTA_OR_LATER = (sys.getwindowsversion().major >= 6) -else: - VISTA_OR_LATER = False - try: import ctypes except ImportError: @@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.Tes ('abc', 'strict', b'abc'), ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'), ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'), + ('\udc80', 'strict', None), + ('\udc80', 'ignore', b''), + ('\udc80', 'replace', b'?'), + ('\udc80', 'backslashreplace', b'\\udc80'), + ('\udc80', 'namereplace', b'\\udc80'), + ('\udc80', 'surrogatepass', b'\xed\xb2\x80'), ] - if VISTA_OR_LATER: - tests.extend(( - ('\udc80', 'strict', None), - ('\udc80', 'ignore', b''), - ('\udc80', 'replace', b'?'), - ('\udc80', 'backslashreplace', b'\\udc80'), - ('\udc80', 'namereplace', b'\\udc80'), - ('\udc80', 'surrogatepass', b'\xed\xb2\x80'), - )) - else: - tests.append(('\udc80', 'strict', b'\xed\xb2\x80')) for text, errors, expected in tests: if expected is not None: try: @@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.Tes (b'[\xff]', 'ignore', '[]'), (b'[\xff]', 'replace', '[\ufffd]'), (b'[\xff]', 'surrogateescape', '[\udcff]'), + (b'[\xed\xb2\x80]', 'strict', None), + (b'[\xed\xb2\x80]', 'ignore', '[]'), + (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'), ] - if VISTA_OR_LATER: - tests.extend(( - (b'[\xed\xb2\x80]', 'strict', None), - (b'[\xed\xb2\x80]', 'ignore', '[]'), - (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'), - )) - else: - tests.extend(( - (b'[\xed\xb2\x80]', 'strict', '[\udc80]'), - )) for raw, errors, expected in tests: if expected is not None: try: @@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.Tes self.assertRaises(UnicodeDecodeError, raw.decode, 'cp65001', errors) - @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') def test_lone_surrogates(self): self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001") self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001") @@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.Tes self.assertEqual("[\uDC80]".encode("cp65001", "replace"), b'[?]') - @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') def test_surrogatepass_handler(self): self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"), b"abc\xed\xa0\x80def") @@ -1951,6 +1932,8 @@ all_unicode_encodings = [ if hasattr(codecs, "mbcs_encode"): all_unicode_encodings.append("mbcs") +if hasattr(codecs, "oem_encode"): + all_unicode_encodings.append("oem") # The following encoding is not tested, because it's not supposed # to work: @@ -2667,6 +2650,8 @@ else: bytes_transform_encodings.append("bz2_codec") transform_aliases["bz2_codec"] = ["bz2"] +if hasattr(codecs, "mbcs_encode"): + transform_aliases["mbcs"] = ["dbcs", "ansi"] class TransformCodecTest(unittest.TestCase): @@ -3119,11 +3104,10 @@ class CodePageTest(unittest.TestCase): (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'), (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'), )) - if VISTA_OR_LATER: - self.check_encode(self.CP_UTF8, ( - ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'), - ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), - )) + self.check_encode(self.CP_UTF8, ( + ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'), + ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), + )) def test_incremental(self): decoded = codecs.code_page_decode(932, b'\x82', 'strict', False) @@ -3144,6 +3128,20 @@ class CodePageTest(unittest.TestCase): False) self.assertEqual(decoded, ('abc', 3)) + def test_mbcs_alias(self): + # Check that looking up our 'default' codepage will return + # mbcs when we don't have a more specific one available + import _bootlocale + def _get_fake_codepage(*a): + return 'cp123' + old_getpreferredencoding = _bootlocale.getpreferredencoding + _bootlocale.getpreferredencoding = _get_fake_codepage + try: + codec = codecs.lookup('cp123') + self.assertEqual(codec.name, 'mbcs') + finally: + _bootlocale.getpreferredencoding = old_getpreferredencoding + class ASCIITest(unittest.TestCase): def test_encode(self): diff -r a1e032dbcf86 Modules/_codecsmodule.c --- a/Modules/_codecsmodule.c Wed Sep 07 00:32:06 2016 +0200 +++ b/Modules/_codecsmodule.c Tue Sep 06 15:35:53 2016 -0700 @@ -626,6 +626,25 @@ static PyObject * } /*[clinic input] +_codecs.oem_decode + data: Py_buffer + errors: str(accept={str, NoneType}) = NULL + final: int(c_default="0") = False + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, + const char *errors, int final) +/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/ +{ + Py_ssize_t consumed = data->len; + PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, + data->buf, data->len, errors, final ? NULL : &consumed); + return codec_tuple(decoded, consumed); +} + +/*[clinic input] _codecs.code_page_decode codepage: int data: Py_buffer @@ -971,6 +990,21 @@ static PyObject * } /*[clinic input] +_codecs.oem_encode + str: unicode + errors: str(accept={str, NoneType}) = NULL + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) +/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/ +{ + return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), + PyUnicode_GET_LENGTH(str)); +} + +/*[clinic input] _codecs.code_page_encode code_page: int str: unicode @@ -1075,6 +1109,8 @@ static PyMethodDef _codecs_functions[] = _CODECS_READBUFFER_ENCODE_METHODDEF _CODECS_MBCS_ENCODE_METHODDEF _CODECS_MBCS_DECODE_METHODDEF + _CODECS_OEM_ENCODE_METHODDEF + _CODECS_OEM_DECODE_METHODDEF _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF diff -r a1e032dbcf86 Modules/clinic/_codecsmodule.c.h --- a/Modules/clinic/_codecsmodule.c.h Wed Sep 07 00:32:06 2016 +0200 +++ b/Modules/clinic/_codecsmodule.c.h Tue Sep 06 15:35:53 2016 -0700 @@ -805,6 +805,45 @@ exit: #if defined(HAVE_MBCS) +PyDoc_STRVAR(_codecs_oem_decode__doc__, +"oem_decode($module, data, errors=None, final=False, /)\n" +"--\n" +"\n"); + +#define _CODECS_OEM_DECODE_METHODDEF \ + {"oem_decode", (PyCFunction)_codecs_oem_decode, METH_VARARGS, _codecs_oem_decode__doc__}, + +static PyObject * +_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, + const char *errors, int final); + +static PyObject * +_codecs_oem_decode(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + Py_buffer data = {NULL, NULL}; + const char *errors = NULL; + int final = 0; + + if (!PyArg_ParseTuple(args, "y*|zi:oem_decode", + &data, &errors, &final)) { + goto exit; + } + return_value = _codecs_oem_decode_impl(module, &data, errors, final); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +#endif /* defined(HAVE_MBCS) */ + +#if defined(HAVE_MBCS) + PyDoc_STRVAR(_codecs_code_page_decode__doc__, "code_page_decode($module, codepage, data, errors=None, final=False, /)\n" "--\n" @@ -1346,6 +1385,38 @@ exit: #if defined(HAVE_MBCS) +PyDoc_STRVAR(_codecs_oem_encode__doc__, +"oem_encode($module, str, errors=None, /)\n" +"--\n" +"\n"); + +#define _CODECS_OEM_ENCODE_METHODDEF \ + {"oem_encode", (PyCFunction)_codecs_oem_encode, METH_VARARGS, _codecs_oem_encode__doc__}, + +static PyObject * +_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors); + +static PyObject * +_codecs_oem_encode(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + PyObject *str; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "U|z:oem_encode", + &str, &errors)) { + goto exit; + } + return_value = _codecs_oem_encode_impl(module, str, errors); + +exit: + return return_value; +} + +#endif /* defined(HAVE_MBCS) */ + +#if defined(HAVE_MBCS) + PyDoc_STRVAR(_codecs_code_page_encode__doc__, "code_page_encode($module, code_page, str, errors=None, /)\n" "--\n" @@ -1446,6 +1517,10 @@ exit: #define _CODECS_MBCS_DECODE_METHODDEF #endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */ +#ifndef _CODECS_OEM_DECODE_METHODDEF + #define _CODECS_OEM_DECODE_METHODDEF +#endif /* !defined(_CODECS_OEM_DECODE_METHODDEF) */ + #ifndef _CODECS_CODE_PAGE_DECODE_METHODDEF #define _CODECS_CODE_PAGE_DECODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_DECODE_METHODDEF) */ @@ -1454,7 +1529,11 @@ exit: #define _CODECS_MBCS_ENCODE_METHODDEF #endif /* !defined(_CODECS_MBCS_ENCODE_METHODDEF) */ +#ifndef _CODECS_OEM_ENCODE_METHODDEF + #define _CODECS_OEM_ENCODE_METHODDEF +#endif /* !defined(_CODECS_OEM_ENCODE_METHODDEF) */ + #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=0221e4eece62c905 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7874e2d559d49368 input=a9049054013a1b77]*/ diff -r a1e032dbcf86 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Sep 07 00:32:06 2016 +0200 +++ b/Objects/unicodeobject.c Tue Sep 06 15:35:53 2016 -0700 @@ -7774,6 +7774,12 @@ PyUnicode_AsMBCSString(PyObject *unicode return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); } +PyObject * +_PyUnicode_AsOEMString(PyObject *unicode) +{ + return PyUnicode_EncodeCodePage(CP_OEMCP, unicode, NULL); +} + #undef NEED_RETRY #endif /* HAVE_MBCS */