Here is some more information.
--- snip ---
Normal behaviour
================
$ locale
LANG=en_US.UTF-8
LC_CTYPE=en_US.UTF-8
LC_NUMERIC=POSIX
LC_TIME=en_GB.UTF-8
LC_COLLATE=en_GB.UTF-8
LC_MONETARY=de_DE.UTF-8
LC_MESSAGES=en_US.UTF-8
LC_PAPER=de_DE.UTF-8
LC_NAME=en_US.UTF-8
LC_ADDRESS=de_DE.UTF-8
LC_TELEPHONE=de_DE.UTF-8
LC_MEASUREMENT=de_DE.UTF-8
LC_IDENTIFICATION=de_DE.UTF-8
LC_ALL=
$ python2.6
Python 2.6.3 (r263:75183, Oct 6 2009, 17:19:56)
[GCC 4.3.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> print '缺陷'
缺陷
>>> print u'缺陷'
缺陷
>>> '缺陷'
'\xe7\xbc\xba\xe9\x99\xb7'
>>> u'缺陷'
u'\u7f3a\u9677'
>>> '缺陷'.decode('utf8')
u'\u7f3a\u9677'
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
>>>
$ cat unicode_bug.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
def print_string():
"""
>>> print '缺陷'
缺陷
"""
pass
def print_unicode():
"""
>>> print u'缺陷'
缺陷
"""
pass
def string_repr():
"""
>>> '缺陷'
'\xe7\xbc\xba\xe9\x99\xb7'
"""
pass
def unicode_repr():
"""
>>> u'缺陷'
u'\u7f3a\u9677'
"""
pass
def decode():
"""
>>> '缺陷'.decode('utf8')
u'\u7f3a\u9677'
"""
pass
def unicode_escape_repr():
"""
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
"""
pass
if __name__ == "__main__":
import doctest
doctest.testmod()
$ python2.5 unicode_bug.py
/usr/lib/python2.5/doctest.py:1460: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/lib/python2.5/doctest.py:1480: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
Traceback (most recent call last):
File "unicode_bug.py", line 48, in <module>
doctest.testmod()
File "/usr/lib/python2.5/doctest.py", line 1815, in testmod
runner.run(test)
File "/usr/lib/python2.5/doctest.py", line 1361, in run
return self.__run(test, compileflags, out)
File "/usr/lib/python2.5/doctest.py", line 1277, in __run
self.report_failure(out, test, example, got)
File "/usr/lib/python2.5/doctest.py", line 1141, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/lib/python2.5/doctest.py", line 1565, in output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
$ python2.6 unicode_bug.py
/usr/local/lib/python2.6/doctest.py:1475: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/local/lib/python2.6/doctest.py:1495: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
Traceback (most recent call last):
File "unicode_bug.py", line 48, in <module>
doctest.testmod()
File "/usr/local/lib/python2.6/doctest.py", line 1830, in testmod
runner.run(test)
File "/usr/local/lib/python2.6/doctest.py", line 1374, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1290, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1154, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 1580, in
output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
$ nosetests -V
nosetests version 0.11.1
$ nosetests --with-doctest -v unicode_bug.py
Doctest: unicode_bug.decode ... ok
Doctest: unicode_bug.print_string ... ok
Doctest: unicode_bug.print_unicode ...
/usr/local/lib/python2.6/doctest.py:1475: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/local/lib/python2.6/doctest.py:1495: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
ERROR
Doctest: unicode_bug.string_repr ... FAIL
Doctest: unicode_bug.unicode_escape_repr ... ok
Doctest: unicode_bug.unicode_repr ... FAIL
======================================================================
ERROR: Doctest: unicode_bug.print_unicode
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2140, in runTest
test, out=new.write, clear_globs=False)
File "/usr/local/lib/python2.6/doctest.py", line 1374, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1290, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1154, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 1580, in
output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
======================================================================
FAIL: Doctest: unicode_bug.string_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2145, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: Failed doctest test for unicode_bug.string_repr
File "/home/babilen/test/unicode_bug.py", line 18, in string_repr
----------------------------------------------------------------------
File "/home/babilen/test/unicode_bug.py", line 20, in
unicode_bug.string_repr
Failed example:
'缺陷'
Expected:
'缺陷'
Got:
'\xe7\xbc\xba\xe9\x99\xb7'
======================================================================
FAIL: Doctest: unicode_bug.unicode_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2145, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: Failed doctest test for unicode_bug.unicode_repr
File ".../unicode_bug.py", line 25, in unicode_repr
----------------------------------------------------------------------
File ".../unicode_bug.py", line 27, in unicode_bug.unicode_repr
Failed example:
u'缺陷'
Expected:
u'\u7f3a\u9677'
Got:
u'\xe7\xbc\xba\xe9\x99\xb7'
----------------------------------------------------------------------
Ran 6 tests in 0.011s
FAILED (errors=1, failures=2)
--- snip ---
unicode_literals
================
$ python2.6
Python 2.6.3 (r263:75183, Oct 6 2009, 17:19:56)
[GCC 4.3.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from __future__ import unicode_literals
>>> print '缺陷'
缺陷
>>> print u'缺陷'
缺陷
>>> '缺陷'
u'\u7f3a\u9677'
>>> u'缺陷'
u'\u7f3a\u9677'
>>> '缺陷'.decode('utf8')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.6/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeEncodeError: 'ascii' codec can't encode characters in position
0-1: ordinal not in range(128)
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
$ cat unicode_bug_literals.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from __future__ import unicode_literals
def print_string():
"""
>>> print '缺陷'
缺陷
"""
pass
def print_unicode():
"""
>>> print u'缺陷'
缺陷
"""
pass
def string_repr():
"""
>>> '缺陷'
u'\u7f3a\u9677'
"""
pass
def unicode_repr():
"""
>>> u'缺陷'
u'\u7f3a\u9677'
"""
pass
def unicode_escape_repr():
"""
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
"""
pass
if __name__ == "__main__":
import doctest
doctest.testmod()
$ python2.6 unicode_bug_literals.py
Traceback (most recent call last):
File "unicode_bug_literals.py", line 43, in <module>
doctest.testmod()
File "/usr/local/lib/python2.6/doctest.py", line 1830, in testmod
runner.run(test)
File "/usr/local/lib/python2.6/doctest.py", line 1374, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1290, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1154, in report_failure
self._checker.output_difference(example, got, self.optionflags))
UnicodeEncodeError: 'ascii' codec can't encode characters in position
157-158: ordinal not in range(128)
$ nosetests --with-doctest -v unicode_bug_literals.py
Doctest: unicode_bug_literals.print_string ... ok
Doctest: unicode_bug_literals.print_unicode ... ok
Doctest: unicode_bug_literals.string_repr ... FAIL
Doctest: unicode_bug_literals.unicode_escape_repr ... FAIL
Doctest: unicode_bug_literals.unicode_repr ... FAIL
======================================================================
FAIL: Doctest: unicode_bug_literals.string_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2145, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: <unprintable AssertionError object>
======================================================================
FAIL: Doctest: unicode_bug_literals.unicode_escape_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2145, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: <unprintable AssertionError object>
======================================================================
FAIL: Doctest: unicode_bug_literals.unicode_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2145, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: <unprintable AssertionError object>
----------------------------------------------------------------------
Ran 5 tests in 0.011s
FAILED (failures=3)
--- snip ---
With doctest.unicode-2.patch
============================
$ nosetests --with-doctest -v unicode_bug.py
Doctest: unicode_bug.decode ... ok
Doctest: unicode_bug.print_string ... ok
Doctest: unicode_bug.print_unicode ...
/usr/local/lib/python2.6/doctest.py:1480: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/local/lib/python2.6/doctest.py:1500: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
ERROR
Doctest: unicode_bug.string_repr ... ERROR
Doctest: unicode_bug.unicode_escape_repr ... ok
Doctest: unicode_bug.unicode_repr ... ERROR
======================================================================
ERROR: Doctest: unicode_bug.print_unicode
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2150, in runTest
clear_globs=False)
File "/usr/local/lib/python2.6/doctest.py", line 1379, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1291, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1155, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 1585, in
output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
======================================================================
ERROR: Doctest: unicode_bug.string_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2150, in runTest
clear_globs=False)
File "/usr/local/lib/python2.6/doctest.py", line 1379, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1291, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1155, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 2149, in <lambda>
test, out=lambda x: new.write(x.encode(output_encoding)),
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position
170: ordinal not in range(128)
======================================================================
ERROR: Doctest: unicode_bug.unicode_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2150, in runTest
clear_globs=False)
File "/usr/local/lib/python2.6/doctest.py", line 1379, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1291, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1155, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 2149, in <lambda>
test, out=lambda x: new.write(x.encode(output_encoding)),
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position
172: ordinal not in range(128)
----------------------------------------------------------------------
Ran 6 tests in 0.010s
FAILED (errors=3)
$ nosetests --with-doctest -v unicode_bug_literals.py
Doctest: unicode_bug_literals.print_string ... ok
Doctest: unicode_bug_literals.print_unicode ... ok
Doctest: unicode_bug_literals.string_repr ... FAIL
Doctest: unicode_bug_literals.unicode_escape_repr ... FAIL
Doctest: unicode_bug_literals.unicode_repr ... FAIL
======================================================================
FAIL: Doctest: unicode_bug_literals.string_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2155, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: Failed doctest test for unicode_bug_literals.string_repr
File "/home/babilen/test/unicode_bug_literals.py", line 20, in string_repr
----------------------------------------------------------------------
File "/home/babilen/test/unicode_bug_literals.py", line 22, in
unicode_bug_literals.string_repr
Failed example:
'缺陷'
Expected:
u'缺陷'
Got:
u'\u7f3a\u9677'
======================================================================
FAIL: Doctest: unicode_bug_literals.unicode_escape_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2155, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: Failed doctest test for
unicode_bug_literals.unicode_escape_repr
File "/home/babilen/test/unicode_bug_literals.py", line 34, in
unicode_escape_repr
----------------------------------------------------------------------
File "/home/babilen/test/unicode_bug_literals.py", line 36, in
unicode_bug_literals.unicode_escape_repr
Failed example:
u'缺陷'
Expected:
u'缺陷'
Got:
u'\u7f3a\u9677'
======================================================================
FAIL: Doctest: unicode_bug_literals.unicode_repr
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.6/doctest.py", line 2155, in runTest
raise self.failureException(self.format_failure(new.getvalue()))
AssertionError: Failed doctest test for unicode_bug_literals.unicode_repr
File "/home/babilen/test/unicode_bug_literals.py", line 27, in
unicode_repr
----------------------------------------------------------------------
File "/home/babilen/test/unicode_bug_literals.py", line 29, in
unicode_bug_literals.unicode_repr
Failed example:
u'缺陷'
Expected:
u'缺陷'
Got:
u'\u7f3a\u9677'
----------------------------------------------------------------------
Ran 5 tests in 0.009s
FAILED (failures=3)
--- snip ---
If you need further information do not hesitate to contact me.
with kind regards
Wolodja Wentland |