Index: Lib/tarfile.py =================================================================== --- Lib/tarfile.py (revision 81506) +++ Lib/tarfile.py (working copy) @@ -159,9 +159,12 @@ #--------------------------------------------------------- # initialization #--------------------------------------------------------- -ENCODING = sys.getfilesystemencoding() -if ENCODING is None: - ENCODING = "ascii" +if os.name != 'nt': + ENCODING = sys.getfilesystemencoding() + if ENCODING is None: + ENCODING = "ascii" +else: + ENCODING = 'utf-8' #--------------------------------------------------------- # Some useful functions Index: Lib/test/test_ssl.py =================================================================== --- Lib/test/test_ssl.py (revision 81506) +++ Lib/test/test_ssl.py (working copy) @@ -33,16 +33,16 @@ HOST = support.HOST data_file = lambda name: os.path.join(os.path.dirname(__file__), name) -fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape") CERTFILE = data_file("keycert.pem") -BYTES_CERTFILE = fsencode(CERTFILE) ONLYCERT = data_file("ssl_cert.pem") ONLYKEY = data_file("ssl_key.pem") -BYTES_ONLYCERT = fsencode(ONLYCERT) -BYTES_ONLYKEY = fsencode(ONLYKEY) CAPATH = data_file("capath") -BYTES_CAPATH = fsencode(CAPATH) +if os.name != "nt": + BYTES_CERTFILE = os.fsencode(CERTFILE) + BYTES_ONLYCERT = os.fsencode(ONLYCERT) + BYTES_ONLYKEY = os.fsencode(ONLYKEY) + BYTES_CAPATH = os.fsencode(CAPATH) SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem") @@ -264,8 +264,9 @@ ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1) ctx.load_verify_locations(CERTFILE) ctx.load_verify_locations(cafile=CERTFILE, capath=None) - ctx.load_verify_locations(BYTES_CERTFILE) - ctx.load_verify_locations(cafile=BYTES_CERTFILE, capath=None) + if os.name != 'nt': + ctx.load_verify_locations(BYTES_CERTFILE) + ctx.load_verify_locations(cafile=BYTES_CERTFILE, capath=None) self.assertRaises(TypeError, ctx.load_verify_locations) self.assertRaises(TypeError, ctx.load_verify_locations, None, None) with self.assertRaisesRegexp(ssl.SSLError, "system lib"): @@ -273,7 +274,8 @@ with self.assertRaisesRegexp(ssl.SSLError, "PEM lib"): ctx.load_verify_locations(BADCERT) ctx.load_verify_locations(CERTFILE, CAPATH) - ctx.load_verify_locations(CERTFILE, capath=BYTES_CAPATH) + if os.name != 'nt': + ctx.load_verify_locations(CERTFILE, capath=BYTES_CAPATH) class NetworkedTests(unittest.TestCase): @@ -335,27 +337,21 @@ # OpenSSL 0.9.8n and 1.0.0, as a result the capath directory must # contain both versions of each certificate (same content, different # filename) for this test to be portable across OpenSSL releases. - ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - ctx.verify_mode = ssl.CERT_REQUIRED - ctx.load_verify_locations(capath=CAPATH) - s = ctx.wrap_socket(socket.socket(socket.AF_INET)) - s.connect(("svn.python.org", 443)) - try: - cert = s.getpeercert() - self.assertTrue(cert) - finally: - s.close() - # Same with a bytes `capath` argument - ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - ctx.verify_mode = ssl.CERT_REQUIRED - ctx.load_verify_locations(capath=BYTES_CAPATH) - s = ctx.wrap_socket(socket.socket(socket.AF_INET)) - s.connect(("svn.python.org", 443)) - try: - cert = s.getpeercert() - self.assertTrue(cert) - finally: - s.close() + if os.name != 'nt': + filenames = (CAPATH, BYTES_CAPATH) + else: + filenames = (CAPATH,) + for filenames in filenames: + ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + ctx.verify_mode = ssl.CERT_REQUIRED + ctx.load_verify_locations(capath=filename) + s = ctx.wrap_socket(socket.socket(socket.AF_INET)) + s.connect(("svn.python.org", 443)) + try: + cert = s.getpeercert() + self.assertTrue(cert) + finally: + s.close() @unittest.skipIf(os.name == "nt", "Can't use a socket as a file under Windows") def test_makefile_close(self): @@ -1425,10 +1421,13 @@ if skip_expected: raise unittest.SkipTest("No SSL support") - for filename in [ - CERTFILE, SVN_PYTHON_ORG_ROOT_CERT, BYTES_CERTFILE, - ONLYCERT, ONLYKEY, BYTES_ONLYCERT, BYTES_ONLYKEY, - BADCERT, BADKEY, EMPTYCERT]: + filenames = [ + CERTFILE, SVN_PYTHON_ORG_ROOT_CERT, + ONLYCERT, ONLYKEY, + BADCERT, BADKEY, EMPTYCERT] + if os.name != "nt": + filenames.extend((BYTES_CERTFILE, BYTES_ONLYCERT, BYTES_ONLYKEY, BYTES_CAPATH)) + for filename in filenames: if not os.path.exists(filename): raise support.TestFailed("Can't read certificate file %r" % filename) Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 81506) +++ Objects/unicodeobject.c (working copy) @@ -1463,11 +1463,17 @@ PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) { - if (Py_FileSystemDefaultEncoding) + if (Py_FileSystemDefaultEncoding) { +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + NULL); +#endif return PyUnicode_AsEncodedString(unicode, Py_FileSystemDefaultEncoding, "surrogateescape"); - else + } else return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), "surrogateescape"); @@ -1620,7 +1626,7 @@ if (Py_FileSystemDefaultEncoding) { #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { - return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); + return PyUnicode_DecodeMBCS(s, size, NULL); } #elif defined(__APPLE__) if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { @@ -4524,32 +4530,47 @@ static int decode_mbcs(PyUnicodeObject **v, const char *s, /* MBCS string */ int size, /* sizeof MBCS string */ - int final) + int final, + const char *errors) { Py_UNICODE *p; - Py_ssize_t n = 0; - int usize = 0; + Py_ssize_t n; + DWORD usize; + DWORD dwFlags; assert(size >= 0); + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + dwFlags = MB_ERR_INVALID_CHARS; + else if (strcmp(errors, "replace")==0) + dwFlags = 0; + else { + PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + return -1; + } + /* Skip trailing lead-byte unless 'final' is set */ if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) --size; /* First get the size of the result */ if (size > 0) { - usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0); - if (usize == 0) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; - } - } + /* get the size of the result */ + usize = MultiByteToWideChar(CP_ACP, dwFlags, s, size, NULL, 0); + if (usize==0) + goto mbcs_decode_error; + } else + usize = 0; if (*v == NULL) { /* Create unicode object */ *v = _PyUnicode_New(usize); if (*v == NULL) return -1; + n = 0; } else { /* Extend unicode object */ @@ -4559,15 +4580,34 @@ } /* Do the conversion */ - if (size > 0) { + if (usize > 0) { p = PyUnicode_AS_UNICODE(*v) + n; - if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) { - PyErr_SetFromWindowsErrWithFilename(0, NULL); - return -1; + if (0 == MultiByteToWideChar(CP_ACP, dwFlags, s, size, p, usize)) { + goto mbcs_decode_error; } } return size; + +mbcs_decode_error: + /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then + we raise a UnicodeDecodeError - else it is a 'generic' + windows error + */ + if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) { + /* Ideally, we should get reason from FormatMessage - this + is the Windows 2000 English version of the message + */ + PyObject *err; + const char *reason = "No mapping for the Unicode character exists " + "in the target multi-byte code page."; + /* what should we use for startinpos and endinpos? */ + err = PyUnicodeDecodeError_Create("mbcs", s, size, 0, 0, reason); + /* FIXME: use err */ + return -1; + } + PyErr_SetFromWindowsErrWithFilename(0, NULL); + return -1; } PyObject *PyUnicode_DecodeMBCSStateful(const char *s, @@ -4584,10 +4624,10 @@ #ifdef NEED_RETRY retry: if (size > INT_MAX) - done = decode_mbcs(&v, s, INT_MAX, 0); + done = decode_mbcs(&v, s, INT_MAX, 0, errors); else #endif - done = decode_mbcs(&v, s, (int)size, !consumed); + done = decode_mbcs(&v, s, (int)size, !consumed, errors); if (done < 0) { Py_XDECREF(v); @@ -4621,20 +4661,41 @@ */ static int encode_mbcs(PyObject **repr, const Py_UNICODE *p, /* unicode */ - int size) /* size of unicode */ + int size, /* size of unicode */ + const char* errors) { - int mbcssize = 0; - Py_ssize_t n = 0; + int mbcssize; + Py_ssize_t n; + BOOL usedDefaultChar = FALSE; + BOOL *pusedDefaultChar = NULL; assert(size >= 0); + /* check and handle 'errors' arg */ + if (errors==NULL || strcmp(errors, "strict")==0) + pusedDefaultChar = &usedDefaultChar; + else if (strcmp(errors, "replace")==0) { + ; /* pusedDefaultChar remains NULL */ + } else { + PyErr_Format(PyExc_ValueError, + "mbcs encoding does not support errors='%s'", + errors); + return -1; + } + /* First get the size of the result */ if (size > 0) { - mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL); + mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, + NULL, pusedDefaultChar); if (mbcssize == 0) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; } + /* If we used a default char, then we failed! */ + if (pusedDefaultChar && *pusedDefaultChar) + goto mbcs_encode_error; + } else { + mbcssize = 0; } if (*repr == NULL) { @@ -4642,6 +4703,7 @@ *repr = PyBytes_FromStringAndSize(NULL, mbcssize); if (*repr == NULL) return -1; + n = 0; } else { /* Extend string object */ @@ -4653,13 +4715,31 @@ /* Do the conversion */ if (size > 0) { char *s = PyBytes_AS_STRING(*repr) + n; - if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { + if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, + NULL, pusedDefaultChar)) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; } + if (pusedDefaultChar && *pusedDefaultChar) { + Py_DECREF(*repr); + goto mbcs_encode_error; + } } return 0; + +mbcs_encode_error: + /* This is only for encoding error, not Windows errors */ + { + PyObject *exc = NULL; + /* Not sure what to use here? Again, what size to use? */ + const char *reason = "invalid character"; + raise_encode_exception(&exc, "mbcs", p, size, + 0, 0, reason); + Py_XDECREF(exc); + } + *repr = NULL; + return -1; } PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, @@ -4672,10 +4752,10 @@ #ifdef NEED_RETRY retry: if (size > INT_MAX) - ret = encode_mbcs(&repr, p, INT_MAX); + ret = encode_mbcs(&repr, p, INT_MAX, errors); else #endif - ret = encode_mbcs(&repr, p, (int)size); + ret = encode_mbcs(&repr, p, (int)size, errors); if (ret < 0) { Py_XDECREF(repr);