# HG changeset patch # User Steve Dower # Date 1473121679 25200 # Mon Sep 05 17:27:59 2016 -0700 # Node ID 7748abe35dcc03c3e014d2c49f484620e4d16fa7 # Parent d52f10a0f10d157e74205922c2f0a90b7004022f Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1659,11 +1659,11 @@ #ifdef HAVE_MBCS -/* --- MBCS codecs for Windows -------------------------------------------- */ +/* --- MBCS and OEM codecs for Windows ------------------------------------ */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */ const char *errors /* error handling */ ); @@ -1674,6 +1674,19 @@ Py_ssize_t *consumed /* bytes consumed */ ); +PyAPI_FUNC(PyObject*) PyUnicode_DecodeOEM( + const char *string, /* OEM encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ +); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeOEMStateful( + const char *string, /* MBCS encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ +); + PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( int code_page, /* code page number */ const char *string, /* encoded string */ @@ -1686,12 +1699,22 @@ PyObject *unicode /* Unicode object */ ); +PyAPI_FUNC(PyObject*) PyUnicode_AsOEMString( + PyObject *unicode /* Unicode object */ +); + #ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ const char *errors /* error handling */ ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeOEM( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); #endif PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -29,6 +29,7 @@ """#" import codecs +import sys from . import aliases _cache = {} @@ -151,3 +152,11 @@ # Register the search_function in the Python codec registry codecs.register(search_function) + +if sys.platform == 'win32': + def _alias_mbcs(encoding): + import _bootlocale + if encoding == _bootlocale.getpreferredencoding(False): + return search_function('mbcs') + + codecs.register(_alias_mbcs) diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -458,6 +458,7 @@ 'macturkish' : 'mac_turkish', # mbcs codec + 'ansi' : 'mbcs', 'dbcs' : 'mbcs', # ptcp154 codec diff --git a/Lib/site.py b/Lib/site.py --- a/Lib/site.py +++ b/Lib/site.py @@ -423,21 +423,6 @@ sys.__interactivehook__ = register_readline -def aliasmbcs(): - """On Windows, some default encodings are not provided by Python, - while they are always available as "mbcs" in each locale. Make - them usable by aliasing to "mbcs" in such a case.""" - if sys.platform == 'win32': - import _bootlocale, codecs - enc = _bootlocale.getpreferredencoding(False) - if enc.startswith('cp'): # "cp***" ? - try: - codecs.lookup(enc) - except LookupError: - import encodings - encodings._cache[enc] = encodings._unknown - encodings.aliases.aliases[enc] = 'mbcs' - CONFIG_LINE = r'^(?P(\w|[-_])+)\s*=\s*(?P.*)\s*$' def venv(known_paths): @@ -560,7 +545,6 @@ setcopyright() sethelper() enablerlcompleter() - aliasmbcs() execsitecustomize() if ENABLE_USER_SITE: execusercustomize() diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -626,6 +626,25 @@ } /*[clinic input] +_codecs.oem_decode + data: Py_buffer + errors: str(accept={str, NoneType}) = NULL + final: int(c_default="0") = False + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, + const char *errors, int final) +/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/ +{ + Py_ssize_t consumed = data->len; + PyObject *decoded = PyUnicode_DecodeOEMStateful(data->buf, data->len, + errors, final ? NULL : &consumed); + return codec_tuple(decoded, consumed); +} + +/*[clinic input] _codecs.code_page_decode codepage: int data: Py_buffer @@ -971,6 +990,21 @@ } /*[clinic input] +_codecs.oem_encode + str: unicode + errors: str(accept={str, NoneType}) = NULL + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) +/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/ +{ + return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), + PyUnicode_GET_LENGTH(str)); +} + +/*[clinic input] _codecs.code_page_encode code_page: int str: unicode @@ -1075,6 +1109,8 @@ _CODECS_READBUFFER_ENCODE_METHODDEF _CODECS_MBCS_ENCODE_METHODDEF _CODECS_MBCS_DECODE_METHODDEF + _CODECS_OEM_ENCODE_METHODDEF + _CODECS_OEM_DECODE_METHODDEF _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -805,6 +805,45 @@ #if defined(HAVE_MBCS) +PyDoc_STRVAR(_codecs_oem_decode__doc__, +"oem_decode($module, data, errors=None, final=False, /)\n" +"--\n" +"\n"); + +#define _CODECS_OEM_DECODE_METHODDEF \ + {"oem_decode", (PyCFunction)_codecs_oem_decode, METH_VARARGS, _codecs_oem_decode__doc__}, + +static PyObject * +_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, + const char *errors, int final); + +static PyObject * +_codecs_oem_decode(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + Py_buffer data = {NULL, NULL}; + const char *errors = NULL; + int final = 0; + + if (!PyArg_ParseTuple(args, "y*|zi:oem_decode", + &data, &errors, &final)) { + goto exit; + } + return_value = _codecs_oem_decode_impl(module, &data, errors, final); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +#endif /* defined(HAVE_MBCS) */ + +#if defined(HAVE_MBCS) + PyDoc_STRVAR(_codecs_code_page_decode__doc__, "code_page_decode($module, codepage, data, errors=None, final=False, /)\n" "--\n" @@ -1346,6 +1385,38 @@ #if defined(HAVE_MBCS) +PyDoc_STRVAR(_codecs_oem_encode__doc__, +"oem_encode($module, str, errors=None, /)\n" +"--\n" +"\n"); + +#define _CODECS_OEM_ENCODE_METHODDEF \ + {"oem_encode", (PyCFunction)_codecs_oem_encode, METH_VARARGS, _codecs_oem_encode__doc__}, + +static PyObject * +_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors); + +static PyObject * +_codecs_oem_encode(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + PyObject *str; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "U|z:oem_encode", + &str, &errors)) { + goto exit; + } + return_value = _codecs_oem_encode_impl(module, str, errors); + +exit: + return return_value; +} + +#endif /* defined(HAVE_MBCS) */ + +#if defined(HAVE_MBCS) + PyDoc_STRVAR(_codecs_code_page_encode__doc__, "code_page_encode($module, code_page, str, errors=None, /)\n" "--\n" @@ -1446,6 +1517,10 @@ #define _CODECS_MBCS_DECODE_METHODDEF #endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */ +#ifndef _CODECS_OEM_DECODE_METHODDEF + #define _CODECS_OEM_DECODE_METHODDEF +#endif /* !defined(_CODECS_OEM_DECODE_METHODDEF) */ + #ifndef _CODECS_CODE_PAGE_DECODE_METHODDEF #define _CODECS_CODE_PAGE_DECODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_DECODE_METHODDEF) */ @@ -1454,7 +1529,11 @@ #define _CODECS_MBCS_ENCODE_METHODDEF #endif /* !defined(_CODECS_MBCS_ENCODE_METHODDEF) */ +#ifndef _CODECS_OEM_ENCODE_METHODDEF + #define _CODECS_OEM_ENCODE_METHODDEF +#endif /* !defined(_CODECS_OEM_ENCODE_METHODDEF) */ + #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=0221e4eece62c905 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7874e2d559d49368 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7386,6 +7386,23 @@ return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); } +PyObject * +PyUnicode_DecodeOEMStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + return decode_code_page_stateful(CP_OEMCP, s, size, errors, consumed); +} + +PyObject * +PyUnicode_DecodeOEM(const char *s, + Py_ssize_t size, + const char *errors) +{ + return PyUnicode_DecodeOEMStateful(s, size, errors, NULL); +} + static DWORD encode_code_page_flags(UINT code_page, const char *errors) { @@ -7767,6 +7784,20 @@ } PyObject * +PyUnicode_EncodeOEM(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *unicode, *res; + unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + res = encode_code_page(CP_OEMCP, unicode, errors); + Py_DECREF(unicode); + return res; +} + +PyObject * PyUnicode_EncodeCodePage(int code_page, PyObject *unicode, const char *errors) @@ -7780,6 +7811,12 @@ return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); } +PyObject * +PyUnicode_AsOEMString(PyObject *unicode) +{ + return PyUnicode_EncodeCodePage(CP_OEMCP, unicode, NULL); +} + #undef NEED_RETRY #endif /* HAVE_MBCS */