Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.202 diff -u -r2.202 unicodeobject.c --- unicodeobject.c 26 Nov 2003 08:21:35 -0000 2.202 +++ unicodeobject.c 29 Nov 2003 01:29:14 -0000 @@ -2591,24 +2591,52 @@ { PyUnicodeObject *v; Py_UNICODE *p; + DWORD usize; + DWORD dwFlags; + + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + dwFlags = MB_ERR_INVALID_CHARS; + else if (strcmp(errors, "ignore")==0 || strcmp(errors, "replace")==0) + dwFlags = 0; + else + return PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + + /* get the size of the result */ + usize = MultiByteToWideChar(CP_ACP, dwFlags, s, size, NULL, 0); - /* First get the size of the result */ - DWORD usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0); if (size > 0 && usize==0) - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + goto mbcs_decode_error; v = _PyUnicode_New(usize); if (v == NULL) return NULL; if (usize == 0) - return (PyObject *)v; + return (PyObject *)v; p = PyUnicode_AS_UNICODE(v); - if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) { + if (0 == MultiByteToWideChar(CP_ACP, dwFlags, s, size, p, usize)) { Py_DECREF(v); - return PyErr_SetFromWindowsErrWithFilename(0, NULL); + goto mbcs_decode_error; } return (PyObject *)v; +mbcs_decode_error: + /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then + we raise a UnicodeDecodeError - else it is a 'generic' + windows error + */ + if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) { + /* Ideally, we should get reason from FormatMessage - this + is the Windows 2000 English version of the message + */ + const char *reason = "No mapping for the Unicode character exists " + "in the target multi-byte code page."; + /* what should we use for startinpos and endinpos? */ + return PyUnicodeDecodeError_Create("mbcs", s, size, 0, 0, reason); + } + return PyErr_SetFromWindowsErrWithFilename(0, NULL); } PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, @@ -2619,14 +2647,31 @@ char *s; DWORD mbcssize; + BOOL usedDefaultChar = FALSE; + BOOL *pusedDefaultChar = NULL; + + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + pusedDefaultChar = &usedDefaultChar; + else if (strcmp(errors, "ignore")==0 || strcmp(errors, "replace")==0) { + ; /* pusedDefaultChar remains NULL */ + } else + return PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + /* If there are no characters, bail now! */ if (size==0) - return PyString_FromString(""); + return PyString_FromString(""); /* First get the size of the result */ - mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL); - if (mbcssize==0) + mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, + NULL, pusedDefaultChar); + if (mbcssize==0) /* general failure */ return PyErr_SetFromWindowsErrWithFilename(0, NULL); + /* If we used a default char, then we failed! */ + if (pusedDefaultChar && *pusedDefaultChar) + goto mbcs_encode_error; repr = PyString_FromStringAndSize(NULL, mbcssize); if (repr == NULL) @@ -2636,11 +2681,27 @@ /* Do the conversion */ s = PyString_AS_STRING(repr); - if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { + if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, + NULL, pusedDefaultChar)) { Py_DECREF(repr); return PyErr_SetFromWindowsErrWithFilename(0, NULL); } + if (pusedDefaultChar && *pusedDefaultChar) { + Py_DECREF(repr); + goto mbcs_encode_error; + } return repr; +mbcs_encode_error: + /* This is only for encoding error, not Windows errors */ + { + PyObject *exc = NULL; + /* Not sure what to use here? Again, what size to use? */ + const char *reason = "invalid character"; + raise_encode_exception(&exc, "mbcs", p, size, + 0, 0, reason); + Py_XDECREF(exc); + } + return NULL; } PyObject *PyUnicode_AsMBCSString(PyObject *unicode)