Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 81927) +++ Objects/unicodeobject.c (copie de travail) @@ -1767,6 +1767,33 @@ return 0; } +/* create or adjust a UnicodeDecodeError */ +static void +make_decode_exception(PyObject **exceptionObject, + const char *encoding, + const char *input, Py_ssize_t length, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) +{ + if (*exceptionObject == NULL) { + *exceptionObject = PyUnicodeDecodeError_Create( + encoding, input, length, startpos, endpos, reason); + } + else { + if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos)) + goto onError; + if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos)) + goto onError; + if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) + goto onError; + } + return; + +onError: + Py_DECREF(*exceptionObject); + *exceptionObject = NULL; +} + /* error handling callback helper: build arguments, call the callback and check the arguments, if no exception occurred, copy the replacement to the output @@ -1800,20 +1827,13 @@ goto onError; } - if (*exceptionObject == NULL) { - *exceptionObject = PyUnicodeDecodeError_Create( - encoding, *input, *inend-*input, *startinpos, *endinpos, reason); - if (*exceptionObject == NULL) - goto onError; - } - else { - if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos)) - goto onError; - if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos)) - goto onError; - if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) - goto onError; - } + make_decode_exception(exceptionObject, + encoding, + *input, *inend - *input, + *startinpos, *endinpos, + reason); + if (*exceptionObject == NULL) + goto onError; restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); if (restuple == NULL) @@ -4551,32 +4571,47 @@ static int decode_mbcs(PyUnicodeObject **v, const char *s, /* MBCS string */ int size, /* sizeof MBCS string */ - int final) + int final, + const char *errors) { Py_UNICODE *p; - Py_ssize_t n = 0; - int usize = 0; + Py_ssize_t n; + DWORD usize; + DWORD dwFlags; assert(size >= 0); + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + dwFlags = MB_ERR_INVALID_CHARS; + else if (strcmp(errors, "replace")==0) + dwFlags = 0; + else { + PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + return -1; + } + /* Skip trailing lead-byte unless 'final' is set */ if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) --size; /* First get the size of the result */ if (size > 0) { - usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0); - if (usize == 0) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; - } - } + /* get the size of the result */ + usize = MultiByteToWideChar(CP_ACP, dwFlags, s, size, NULL, 0); + if (usize==0) + goto mbcs_decode_error; + } else + usize = 0; if (*v == NULL) { /* Create unicode object */ *v = _PyUnicode_New(usize); if (*v == NULL) return -1; + n = 0; } else { /* Extend unicode object */ @@ -4586,15 +4621,35 @@ } /* Do the conversion */ - if (size > 0) { + if (usize > 0) { p = PyUnicode_AS_UNICODE(*v) + n; - if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; + if (0 == MultiByteToWideChar(CP_ACP, dwFlags, s, size, p, usize)) { + goto mbcs_decode_error; } } + return size; - return size; +mbcs_decode_error: + /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then + we raise a UnicodeDecodeError - else it is a 'generic' + windows error + */ + if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) { + /* Ideally, we should get reason from FormatMessage - this + is the Windows 2000 English version of the message + */ + PyObject *exc = NULL; + const char *reason = "No mapping for the Unicode character exists " + "in the target multi-byte code page."; + make_decode_exception(&exc, "mbcs", s, size, 0, 0, reason); + if (exc != NULL) { + PyCodec_StrictErrors(exc); + Py_DECREF(exc); + } + return -1; + } + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; } PyObject *PyUnicode_DecodeMBCSStateful(const char *s, @@ -4611,10 +4666,10 @@ #ifdef NEED_RETRY retry: if (size > INT_MAX) - done = decode_mbcs(&v, s, INT_MAX, 0); + done = decode_mbcs(&v, s, INT_MAX, 0, errors); else #endif - done = decode_mbcs(&v, s, (int)size, !consumed); + done = decode_mbcs(&v, s, (int)size, !consumed, errors); if (done < 0) { Py_XDECREF(v); @@ -4648,20 +4703,42 @@ */ static int encode_mbcs(PyObject **repr, const Py_UNICODE *p, /* unicode */ - int size) /* size of unicode */ + int size, /* size of unicode */ + const char* errors) { - int mbcssize = 0; - Py_ssize_t n = 0; + int mbcssize; + Py_ssize_t n; + BOOL usedDefaultChar = FALSE; + BOOL *pusedDefaultChar = NULL; + PyObject *exc = NULL; assert(size >= 0); + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + pusedDefaultChar = &usedDefaultChar; + else if (strcmp(errors, "replace")==0) { + ; /* pusedDefaultChar remains NULL */ + } else { + PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + return -1; + } + /* First get the size of the result */ if (size > 0) { - mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL); + mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, + NULL, pusedDefaultChar); if (mbcssize == 0) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; } + /* If we used a default char, then we failed! */ + if (pusedDefaultChar && *pusedDefaultChar) + goto mbcs_encode_error; + } else { + mbcssize = 0; } if (*repr == NULL) { @@ -4669,6 +4746,7 @@ *repr = PyBytes_FromStringAndSize(NULL, mbcssize); if (*repr == NULL) return -1; + n = 0; } else { /* Extend string object */ @@ -4680,13 +4758,24 @@ /* Do the conversion */ if (size > 0) { char *s = PyBytes_AS_STRING(*repr) + n; - if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { + if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, + NULL, pusedDefaultChar)) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; } + if (pusedDefaultChar && *pusedDefaultChar) { + Py_DECREF(*repr); + goto mbcs_encode_error; + } } + return 0; - return 0; +mbcs_encode_error: + raise_encode_exception(&exc, "mbcs", p, size, + 0, 0, "invalid character"); + Py_XDECREF(exc); + *repr = NULL; + return -1; } PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, @@ -4699,10 +4788,10 @@ #ifdef NEED_RETRY retry: if (size > INT_MAX) - ret = encode_mbcs(&repr, p, INT_MAX); + ret = encode_mbcs(&repr, p, INT_MAX, errors); else #endif - ret = encode_mbcs(&repr, p, (int)size); + ret = encode_mbcs(&repr, p, (int)size, errors); if (ret < 0) { Py_XDECREF(repr); Index: Lib/ctypes/__init__.py =================================================================== --- Lib/ctypes/__init__.py (révision 81910) +++ Lib/ctypes/__init__.py (copie de travail) @@ -265,7 +265,7 @@ pass else: if _os.name in ("nt", "ce"): - set_conversion_mode("mbcs", "ignore") + set_conversion_mode("mbcs", "strict") else: set_conversion_mode("ascii", "strict") Index: Lib/test/test_codecs.py =================================================================== --- Lib/test/test_codecs.py (révision 81910) +++ Lib/test/test_codecs.py (copie de travail) @@ -1358,11 +1358,6 @@ "idna", ] -# The following encodings only support "strict" mode -only_strict_mode = [ - "idna", -] - class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): def test_basics(self): s = "abc123" # all codecs should be able to encode these @@ -1437,7 +1432,7 @@ result = "".join(codecs.iterdecode(codecs.iterencode("", encoding), encoding)) self.assertEqual(result, "") - if encoding not in only_strict_mode: + if encoding not in ("idna", "mbcs"): # check incremental decoder/encoder with errors argument try: encoder = codecs.getincrementalencoder(encoding)("ignore")