Patch: -------------------------------- import sys import builtins def hook(message): if message is None: return builtins._ = message try: print(repr(message)) except UnicodeEncodeError: print(ascii(message)) sys.displayhook = hook -------------------------------- # This file shows the behavior of Py2.x on Windows (non-UTF8 terminal) and # Linux (UTF8 terminal), the "unexpected" errors raised by Py3 on Windows # and how the patch can fix them, and finally shows that the patch doesn't # affect the behavior on a UTF8 terminal. # Python 2.6, Windows, non-UTF8 terminal Python 2.6 (r26:66721, Oct 2 2008, 11:35:03) [MSC v.1500 32 bit (Intel)] on win 32 Type "help", "copyright", "credits" or "license" for more information. >>> u'\ud800' u'\ud800' >>> print u'\ud800' UnicodeEncodeError: 'charmap' codec can't encode character u'\ud800' in position 0: character maps to >>> u'\u2620' u'\u2620' >>> print u'\u2620' UnicodeEncodeError: 'charmap' codec can't encode character u'\u2620' in position 0: character maps to >>> [u'àáâãäåæ', u'ÀÁÂÃÄÅÆ'] [u'\xe0\xe1\xe2\xe3\xe4\xe5\xe6', u'\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] >>> print [u'àáâãäåæ', u'ÀÁÂÃÄÅÆ'] [u'\xe0\xe1\xe2\xe3\xe4\xe5\xe6', u'\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] >>> [u'\u30aa\u30ab', u'\u30b1\u30b2'] [u'\u30aa\u30ab', u'\u30b1\u30b2'] # Python 2.5, Linux, UTF8 terminal Python 2.5.2 (r252:60911, Jul 31 2008, 17:28:52) [GCC 4.2.3 (Ubuntu 4.2.3-2ubuntu7)] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> u'\ud800' u'\ud800' >>> print u'\ud800' � >>> u'\u2620' u'\u2620' >>> print u'\u2620' ☠ >>> [u'àáâãäåæ', u'ÀÁÂÃÄÅÆ'] [u'\xe0\xe1\xe2\xe3\xe4\xe5\xe6', u'\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] >>> print [u'àáâãäåæ', u'ÀÁÂÃÄÅÆ'] [u'\xe0\xe1\xe2\xe3\xe4\xe5\xe6', u'\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] >>> [u'\u30aa\u30ab', u'\u30b1\u30b2'] [u'\u30aa\u30ab', u'\u30b1\u30b2'] # Python 3.0, Windows, non-UTF8 terminal Python 3.0 (r30:67507, Dec 3 2008, 20:14:27) [MSC v.1500 32 bit (Intel)] on win 32 Type "help", "copyright", "credits" or "license" for more information. >>> '\ud800' '\ud800' >>> print('\ud800') UnicodeEncodeError: 'charmap' codec can't encode character '\ud800' in position 0: character maps to >>> '\u2620' # this should print '\u2620', instead it raises an error UnicodeEncodeError: 'charmap' codec can't encode character '\u2620' in position 1: character maps to >>> print('\u2620') # this correctly raises an error UnicodeEncodeError: 'charmap' codec can't encode character '\u2620' in position 0: character maps to >>> ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> print(['àáâãäåæ', 'ÀÁÂÃÄÅÆ']) ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\xe0\xe1\xe2\xe3\xe4\xe5\xe6', '\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\u30aa\u30ab', '\u30b1\u30b2'] # this shouldn't raise an error UnicodeEncodeError: 'charmap' codec can't encode characters in position 2-3: cha racter maps to # Python 3.0, Windows, non-UTF8 terminal, patched Python 3.0 (r30:67507, Dec 3 2008, 20:14:27) [MSC v.1500 32 bit (Intel)] on win 32 Type "help", "copyright", "credits" or "license" for more information. >>> import sys >>> import builtins >>> >>> def hook(message): ... if message is None: ... return ... builtins._ = message ... try: ... print(repr(message)) ... except UnicodeEncodeError: ... print(ascii(message)) ... >>> sys.displayhook = hook >>> '\ud800' '\ud800' >>> print('\ud800') UnicodeEncodeError: 'charmap' codec can't encode character '\ud800' in position 0: character maps to >>> '\u2620' '\u2620' >>> print('\u2620') UnicodeEncodeError: 'charmap' codec can't encode character '\u2620' in position 0: character maps to >>> ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> print(['àáâãäåæ', 'ÀÁÂÃÄÅÆ']) ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\xe0\xe1\xe2\xe3\xe4\xe5\xe6', '\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\u30aa\u30ab', '\u30b1\u30b2'] ['\u30aa\u30ab', '\u30b1\u30b2'] >>> print(['\u30aa\u30ab', '\u30b1\u30b2']) UnicodeEncodeError: 'charmap' codec can't encode characters in position 2-3: cha racter maps to # Python 3.0, Linux, UTF8 terminal Python 3.0 (r30:67503, Jan 31 2009, 03:49:04) [GCC 4.2.4 (Ubuntu 4.2.4-1ubuntu3)] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> '\ud800' '\ud800' >>> print('\ud800') � >>> '\u2620' '☠' >>> print('\u2620') ☠ >>> ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\xe0\xe1\xe2\xe3\xe4\xe5\xe6', '\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\u30aa\u30ab', '\u30b1\u30b2'] ['オカ', 'ケゲ'] >>> print(['\u30aa\u30ab', '\u30b1\u30b2']) ['オカ', 'ケゲ'] # Python 3.0, Linux, UTF8 terminal, patched Python 3.0 (r30:67503, Jan 31 2009, 03:49:04) [GCC 4.2.4 (Ubuntu 4.2.4-1ubuntu3)] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import sys >>> import builtins >>> >>> def hook(message): ... if message is None: ... return ... builtins._ = message ... try: ... print(repr(message)) ... except UnicodeEncodeError: ... print(ascii(message)) ... >>> sys.displayhook = hook >>> '\ud800' '\ud800' >>> print('\ud800') � >>> '\u2620' '☠' >>> print('\u2620') ☠ >>> ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> print(['àáâãäåæ', 'ÀÁÂÃÄÅÆ']) ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\xe0\xe1\xe2\xe3\xe4\xe5\xe6', '\xc0\xc1\xc2\xc3\xc4\xc5\xc6'] ['àáâãäåæ', 'ÀÁÂÃÄÅÆ'] >>> ['\u30aa\u30ab', '\u30b1\u30b2'] ['オカ', 'ケゲ'] >>> print(['\u30aa\u30ab', '\u30b1\u30b2']) ['オカ', 'ケゲ']