diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.py' -x ACKS -x '*.txt' -x README -x NEWS SVN-Python/Include/pyerrors.h Dev-Python/Include/pyerrors.h --- SVN-Python/Include/pyerrors.h 2006-07-30 08:55:48.000000000 +0200 +++ Dev-Python/Include/pyerrors.h 2006-08-11 18:26:36.000000000 +0200 @@ -173,6 +173,7 @@ PyAPI_DATA(PyObject *) PyExc_RuntimeWarning; PyAPI_DATA(PyObject *) PyExc_FutureWarning; PyAPI_DATA(PyObject *) PyExc_ImportWarning; +PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; /* Convenience functions */ diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.py' -x ACKS -x '*.txt' -x README -x NEWS SVN-Python/Include/unicodeobject.h Dev-Python/Include/unicodeobject.h --- SVN-Python/Include/unicodeobject.h 2006-06-14 07:21:04.000000000 +0200 +++ Dev-Python/Include/unicodeobject.h 2006-08-11 19:36:59.862376911 +0200 @@ -189,6 +189,7 @@ # define PyUnicode_RSplit PyUnicodeUCS2_RSplit # define PyUnicode_Replace PyUnicodeUCS2_Replace # define PyUnicode_Resize PyUnicodeUCS2_Resize +# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS2_Split # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines @@ -266,6 +267,7 @@ # define PyUnicode_RSplit PyUnicodeUCS4_RSplit # define PyUnicode_Replace PyUnicodeUCS4_Replace # define PyUnicode_Resize PyUnicodeUCS4_Resize +# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS4_Split # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines @@ -1139,6 +1141,28 @@ PyObject *right /* Right string */ ); +/* Rich compare two strings and return one of the following: + + - NULL in case an exception was raised + - Py_True or Py_False for successfuly comparisons + - Py_NotImplemented in case the type combination is unknown + + Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in + case the conversion of the arguments to Unicode fails with a + UnicodeDecodeError. + + Possible values for op: + + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( + PyObject *left, /* Left string */ + PyObject *right, /* Right string */ + int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ + ); + /* Apply a argument tuple or dictionary to a format string and return the resulting Unicode string. */ diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.py' -x ACKS -x '*.txt' -x README -x NEWS SVN-Python/Objects/exceptions.c Dev-Python/Objects/exceptions.c --- SVN-Python/Objects/exceptions.c 2006-07-03 16:59:05.000000000 +0200 +++ Dev-Python/Objects/exceptions.c 2006-08-11 18:28:27.000000000 +0200 @@ -1948,6 +1948,14 @@ "Base class for warnings about probable mistakes in module imports"); +/* + * UnicodeWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, UnicodeWarning, + "Base class for warnings about Unicode related problems, mostly\n" + "related to conversion problems."); + + /* Pre-computed MemoryError instance. Best to create this as early as * possible and not wait until a MemoryError is actually raised! */ @@ -2048,6 +2056,7 @@ PRE_INIT(RuntimeWarning) PRE_INIT(FutureWarning) PRE_INIT(ImportWarning) + PRE_INIT(UnicodeWarning) m = Py_InitModule4("exceptions", functions, exceptions_doc, (PyObject *)NULL, PYTHON_API_VERSION); @@ -2113,6 +2122,7 @@ POST_INIT(RuntimeWarning) POST_INIT(FutureWarning) POST_INIT(ImportWarning) + POST_INIT(UnicodeWarning) PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL); if (!PyExc_MemoryErrorInst) diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.py' -x ACKS -x '*.txt' -x README -x NEWS SVN-Python/Objects/object.c Dev-Python/Objects/object.c --- SVN-Python/Objects/object.c 2006-06-19 00:17:29.000000000 +0200 +++ Dev-Python/Objects/object.c 2006-08-11 18:51:13.000000000 +0200 @@ -731,23 +731,6 @@ return (vv < ww) ? -1 : (vv > ww) ? 1 : 0; } -#ifdef Py_USING_UNICODE - /* Special case for Unicode */ - if (PyUnicode_Check(v) || PyUnicode_Check(w)) { - c = PyUnicode_Compare(v, w); - if (!PyErr_Occurred()) - return c; - /* TypeErrors are ignored: if Unicode coercion fails due - to one of the arguments not having the right type, we - continue as defined by the coercion protocol (see - above). Luckily, decoding errors are reported as - ValueErrors and are not masked by this technique. */ - if (!PyErr_ExceptionMatches(PyExc_TypeError)) - return -2; - PyErr_Clear(); - } -#endif - /* None is smaller than anything */ if (v == Py_None) return -1; diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.py' -x ACKS -x '*.txt' -x README -x NEWS SVN-Python/Objects/unicodeobject.c Dev-Python/Objects/unicodeobject.c --- SVN-Python/Objects/unicodeobject.c 2006-07-21 07:32:28.000000000 +0200 +++ Dev-Python/Objects/unicodeobject.c 2006-08-11 19:37:40.000000000 +0200 @@ -5405,6 +5405,83 @@ return -1; } +PyObject *PyUnicode_RichCompare(PyObject *left, + PyObject *right, + int op) +{ + int result; + + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) + goto onError; + + /* Convert the return value to a Boolean */ + switch (op) { + case Py_EQ: + result = (result == 0); + break; + case Py_NE: + result = (result != 0); + break; + case Py_LE: + result = (result <= 0); + break; + case Py_GE: + result = (result >= 0); + break; + case Py_LT: + result = (result == -1); + break; + case Py_GT: + result = (result == 1); + break; + } + return PyBool_FromLong(result); + + onError: + + /* Standard case + + Type errors mean that PyUnicode_FromObject() could not convert + one of the arguments (usually the right hand side) to Unicode, + ie. we can't handle the comparison request. However, it is + possible that the other object knows a comparison method, which + is why we return Py_NotImplemented to give the other object a + chance. + + */ + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + if (op != Py_EQ && op != Py_NE) + return NULL; + + /* Equality comparison. + + This is a special case: we silcence any + PyExc_UnicodeDecodeError and instead turn it into a + PyErr_UnicodeWarning. + + */ + if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) + return NULL; + PyErr_Clear(); + if (PyErr_Warn(PyExc_UnicodeWarning, + (op == Py_EQ) ? + "Unicode equal comparison " + "failed to convert both arguments to Unicode - " + "interpreting them as being unequal" : + "Unicode unequal comparison " + "failed to convert both arguments to Unicode - " + "interpreting them as being unequal" + ) < 0) + return NULL; + result = (op == Py_NE); + return PyBool_FromLong(result); +} + int PyUnicode_Contains(PyObject *container, PyObject *element) { @@ -7862,7 +7939,7 @@ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ - (cmpfunc) unicode_compare, /* tp_compare */ + 0, /* tp_compare */ unicode_repr, /* tp_repr */ &unicode_as_number, /* tp_as_number */ &unicode_as_sequence, /* tp_as_sequence */ @@ -7878,7 +7955,7 @@ unicode_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ + PyUnicode_RichCompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ diff -u -rP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.c' -x '*.h' -x '*.in' -x output SVN-Python/Lib/test/exception_hierarchy.txt Dev-Python/Lib/test/exception_hierarchy.txt --- SVN-Python/Lib/test/exception_hierarchy.txt 2006-05-27 14:29:24.000000000 +0200 +++ Dev-Python/Lib/test/exception_hierarchy.txt 2006-08-11 22:51:42.305538105 +0200 @@ -45,3 +45,4 @@ +-- UserWarning +-- FutureWarning +-- ImportWarning + +-- UnicodeWarning diff -u -rP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.c' -x '*.h' -x '*.in' -x output SVN-Python/Misc/NEWS Dev-Python/Misc/NEWS --- SVN-Python/Misc/NEWS 2006-08-11 09:26:10.000000000 +0200 +++ Dev-Python/Misc/NEWS 2006-08-11 22:35:05.555403968 +0200 @@ -12,6 +12,19 @@ Core and builtins ----------------- +- Unicode objects will no longer raise an exception when being + compared equal or unequal to a string and causing a + UnicodeDecodeError exception, e.g. as result of a decoding failure. + + Instead, the equal (==) and unequal (!=) comparison operators will + now issue a UnicodeWarning and interpret the two objects as + unequal. The UnicodeWarning can be then filtered as desired using + the warning framework, e.g. silenced completely, turned into an + exception, logged, etc. + + Note that compare operators other than equal and unequal will still + raise UnicodeDecodeError exceptions as they've always done. + - Bug #1536021: __hash__ may now return long int; the final hash value is obtained by invoking hash on the long int. diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.el' SVN-Python/Doc/api/exceptions.tex Dev-Python/Doc/api/exceptions.tex --- SVN-Python/Doc/api/exceptions.tex 2006-07-31 18:00:34.000000000 +0200 +++ Dev-Python/Doc/api/exceptions.tex 2006-08-11 22:55:00.387387598 +0200 @@ -288,10 +288,11 @@ names are \samp{PyExc_} followed by the Python exception name. These have the type \ctype{PyObject*}; they are all class objects. Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning}, - \cdata{PyExc_DeprecationWarning}, \cdata{PyExc_SyntaxWarning}, - \cdata{PyExc_RuntimeWarning}, and \cdata{PyExc_FutureWarning}. - \cdata{PyExc_Warning} is a subclass of \cdata{PyExc_Exception}; the - other warning categories are subclasses of \cdata{PyExc_Warning}. + \cdata{PyExc_UnicodeWarning}, \cdata{PyExc_DeprecationWarning}, + \cdata{PyExc_SyntaxWarning}, \cdata{PyExc_RuntimeWarning}, and + \cdata{PyExc_FutureWarning}. \cdata{PyExc_Warning} is a subclass of + \cdata{PyExc_Exception}; the other warning categories are subclasses + of \cdata{PyExc_Warning}. For information about warning control, see the documentation for the \module{warnings} module and the \programopt{-W} option in the diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.el' SVN-Python/Doc/lib/libexcs.tex Dev-Python/Doc/lib/libexcs.tex --- SVN-Python/Doc/lib/libexcs.tex 2006-05-16 20:26:10.000000000 +0200 +++ Dev-Python/Doc/lib/libexcs.tex 2006-08-11 22:59:34.495527699 +0200 @@ -454,6 +454,11 @@ \begin{excdesc}{ImportWarning} Base class for warnings about probable mistakes in module imports. \versionadded{2.5} + +\begin{excdesc}{UnicodeWarning} +Base class for warnings related to Unicode. +\versionadded{2.5} + \end{excdesc} The class hierarchy for built-in exceptions is: diff -u -rbP -x '*.o' -x '*.pyc' -x Makefile -x '*~' -x '*.so' -x add2lib -x pgen -x buildno -x core -x 'config.*' -x 'pyconfig.*' -x 'libpython*' -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x '*.bak' -x '*.s' -x DEADJOE -x '*.rej' -x '*.orig' -x Demo -x CVS -x Doc -x '*.orig' -x '*.pyc' -x '*.pyo' -x '.#*' -x '*/plat*' -x Grammar -x '*.html' -x 'setup.cfg*' -x .cvsignore -x BeOS -x build -x platform -x Makefile.in -x Makefile.pkg -x Mac -x autom4te.cache -x configure.new -x confdefs.h -x conftest.c -x .svn -x .svnignore -x '*.m4' -x '*.el' SVN-Python/Doc/lib/libwarnings.tex Dev-Python/Doc/lib/libwarnings.tex --- SVN-Python/Doc/lib/libwarnings.tex 2006-07-06 15:41:34.000000000 +0200 +++ Dev-Python/Doc/lib/libwarnings.tex 2006-08-11 22:58:23.452078511 +0200 @@ -76,6 +76,9 @@ \lineii{ImportWarning}{Base category for warnings triggered during the process of importing a module (ignored by default).} + +\lineii{UnicodeWarning}{Base category for warnings related to Unicode.} + \end{tableii} While these are technically built-in exceptions, they are documented