Index: logging/__init__.py =================================================================== --- logging/__init__.py (revision 75042) +++ logging/__init__.py (working copy) @@ -766,28 +766,23 @@ try: msg = self.format(record) stream = self.stream - fs = "%s\n" - if not hasattr(types, "UnicodeType"): #if no unicode support... - stream.write(fs % msg) - else: - try: - if (isinstance(msg, unicode) and - getattr(stream, 'encoding', None)): - fs = fs.decode(stream.encoding) - try: - stream.write(fs % msg) - except UnicodeEncodeError: - #Printing to terminals sometimes fails. For example, - #with an encoding of 'cp1251', the above write will - #work if written to a stream opened or wrapped by - #the codecs module, but fail when writing to a - #terminal even when the codepage is set to cp1251. - #An extra encoding step seems to be needed. - stream.write((fs % msg).encode(stream.encoding)) - else: - stream.write(fs % msg) - except UnicodeError: - stream.write(fs % msg.encode("UTF-8")) + try: + stream.write(msg + '\n') + except UnicodeError: + if isinstance(msg, unicode): + # Printing to terminals sometimes fails. For example, + # with an encoding of 'cp1251', the above write will + # work if written to a stream opened or wrapped by + # the codecs module, but fail when writing to a + # terminal even when the codepage is set to cp1251. + # An extra encoding step seems to be needed. + encoding = getattr(stream, 'encoding', 'ascii') + stream.write(msg.encode(encoding, 'backslashreplace') + '\n') + else: + # When stream is codecs.writer object and msg is encoded by + # encoding that isn't stream.encoding, write() raises + # UnicodeDecodeError. + stream.write(msg.encode('string_escape') + '\n') self.flush() except (KeyboardInterrupt, SystemExit): raise Index: test/test_logging.py =================================================================== --- test/test_logging.py (revision 75042) +++ test/test_logging.py (working copy) @@ -887,10 +887,14 @@ if os.path.isfile(fn): os.remove(fn) + unicode_message = u'\u0434\u043e \u0441\u0432\u0438\u0434\u0430\u043d\u0438\u044f' + escaped_message = '\\u0434\\u043e \\u0441\\u0432\\u0438\\u0434'\ + '\\u0430\\u043d\\u0438\\u044f\n' + def test_encoding_cyrillic_unicode(self): log = logging.getLogger("test") #Get a message in Unicode: Do svidanya in Cyrillic (meaning goodbye) - message = u'\u0434\u043e \u0441\u0432\u0438\u0434\u0430\u043d\u0438\u044f' + message = self.unicode_message #Ensure it's written in a Cyrillic encoding writer_class = codecs.getwriter('cp1251') writer_class.encoding = 'cp1251' @@ -908,7 +912,64 @@ #Compare against what the data should be when encoded in CP-1251 self.assertEqual(s, '\xe4\xee \xf1\xe2\xe8\xe4\xe0\xed\xe8\xff\n') + def test_unicode_without_encoding(self): + log = logging.getLogger("test") + message = self.unicode_message + stream = cStringIO.StringIO() + handler = logging.StreamHandler(stream) + log.addHandler(handler) + try: + log.warning(message) + finally: + log.removeHandler(handler) + handler.close() + s = stream.getvalue() + # message should be encoded in utf-8 (fallback encoding). + self.assertEqual(s, self.escaped_message) + def test_unicode_encodeerror_fallback(self): + log = logging.getLogger("test") + #Get a message in Unicode: Do svidanya in Cyrillic (meaning goodbye) + message = self.unicode_message + + writer_class = codecs.getwriter('latin-1') + writer_class.encoding = 'latin-1' + stream = cStringIO.StringIO() + writer = writer_class(stream, 'strict') + handler = logging.StreamHandler(writer) + log.addHandler(handler) + try: + log.warning(message) + finally: + log.removeHandler(handler) + handler.close() + # check we wrote exactly those bytes, ignoring trailing \n etc + s = stream.getvalue() + # message should be encoded in ascii with backslash escape. + self.assertEqual(s, self.escaped_message) + + def test_encoding_mismatch(self): + log = logging.getLogger("test") + #Get a message in Unicode: Do svidanya in Cyrillic (meaning goodbye) + message = self.unicode_message.encode('utf-8') + + writer_class = codecs.getwriter('latin-1') + writer_class.encoding = 'latin-1' + stream = cStringIO.StringIO() + writer = writer_class(stream, 'strict') + handler = logging.StreamHandler(writer) + log.addHandler(handler) + try: + log.warning(message) + finally: + log.removeHandler(handler) + handler.close() + # check we wrote exactly those bytes, ignoring trailing \n etc + s = stream.getvalue() + # message should be encoded in ascii with backslash escape. + self.assertEqual(s, message.encode('string_escape') + '\n') + + # Set the locale to the platform-dependent default. I have no idea # why the test does this, but in any case we save the current locale # first and restore it at the end.