diff -r 89e2201142f9 Lib/sre_compile.py --- a/Lib/sre_compile.py Fri Nov 18 10:42:10 2016 -0800 +++ b/Lib/sre_compile.py Sat Nov 19 00:01:43 2016 +0100 @@ -576,5 +576,5 @@ def compile(p, flags=0): return _sre.compile( pattern, flags | p.pattern.flags, code, p.pattern.groups-1, - groupindex, indexgroup + groupindex, tuple(indexgroup) ) diff -r 89e2201142f9 Lib/test/test_re.py --- a/Lib/test/test_re.py Fri Nov 18 10:42:10 2016 -0800 +++ b/Lib/test/test_re.py Sat Nov 19 00:01:43 2016 +0100 @@ -3,12 +3,13 @@ from test.support import verbose, run_un import io import locale import re -from re import Scanner import sre_compile +import string import sys -import string import traceback import unittest +import warnings +from re import Scanner from weakref import proxy # Misc tests from Tim Peters' re.doc @@ -1505,7 +1506,7 @@ class ReTests(unittest.TestCase): long_overflow = 2**128 self.assertRaises(TypeError, re.finditer, "a", {}) with self.assertRaises(OverflowError): - _sre.compile("abc", 0, [long_overflow], 0, [], []) + _sre.compile("abc", 0, [long_overflow], 0, {}, ()) with self.assertRaises(TypeError): _sre.compile({}, 0, [], 0, [], []) @@ -1777,6 +1778,55 @@ SUBPATTERN None 0 0 self.assertIn('ASCII', str(re.A)) self.assertIn('DOTALL', str(re.S)) + def test_pattern_compare(self): + pattern1 = re.compile('abc', re.IGNORECASE) + + # equal + re.purge() + pattern2 = re.compile('abc', re.IGNORECASE) + self.assertEqual(hash(pattern2), hash(pattern1)) + self.assertEqual(pattern2, pattern1) + + # not equal: different pattern (comparison is case sensitive) + re.purge() + pattern3 = re.compile('XYZ', re.IGNORECASE) + # Don't test hash(pattern3) != hash(pattern1) because there is no + # warranty that hash values are different + self.assertNotEqual(pattern3, pattern1) + + # not equal: different flag (flags=0) + re.purge() + pattern4 = re.compile('abc') + self.assertNotEqual(pattern4, pattern1) + + # only == and != comparison operators are supported + with self.assertRaises(TypeError): + pattern1 < pattern2 + + def test_pattern_compare_bytes(self): + pattern1 = re.compile('abc') + + # not equal: pattern of a different types (str vs bytes), + # comparison must not raise a BytesWarning + re.purge() + pattern2 = re.compile(b'abc') + with warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + self.assertNotEqual(pattern2, pattern1) + + # equal: test bytes patterns + re.purge() + pattern3 = re.compile(b'abc') + self.assertEqual(hash(pattern3), hash(pattern2)) + self.assertEqual(pattern3, pattern2) + + @cpython_only + def test_pattern_compare_impl(self): + pattern1 = re.compile('abc', re.IGNORECASE) + re.purge() + pattern2 = re.compile('ABC', re.IGNORECASE) + self.assertEqual(pattern2, pattern1) + class PatternReprTests(unittest.TestCase): def check(self, pattern, expected): diff -r 89e2201142f9 Modules/_sre.c --- a/Modules/_sre.c Fri Nov 18 10:42:10 2016 -0800 +++ b/Modules/_sre.c Sat Nov 19 00:01:43 2016 +0100 @@ -1438,8 +1438,8 @@ static int _validate(PatternObject *self flags: int code: object(subclass_of='&PyList_Type') groups: Py_ssize_t - groupindex: object - indexgroup: object + groupindex: object(subclass_of='&PyDict_Type') + indexgroup: object(subclass_of='&PyTuple_Type') [clinic start generated code]*/ @@ -1447,7 +1447,7 @@ static PyObject * _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, PyObject *code, Py_ssize_t groups, PyObject *groupindex, PyObject *indexgroup) -/*[clinic end generated code: output=ef9c2b3693776404 input=7d059ec8ae1edb85]*/ +/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/ { /* "compile" pattern descriptor to pattern object */ @@ -1506,14 +1506,12 @@ static PyObject * self->groups = groups; - Py_XINCREF(groupindex); + Py_INCREF(groupindex); self->groupindex = groupindex; - Py_XINCREF(indexgroup); + Py_INCREF(indexgroup); self->indexgroup = indexgroup; - self->weakreflist = NULL; - if (!_validate(self)) { Py_DECREF(self); return NULL; @@ -2649,6 +2647,83 @@ pattern_scanner(PatternObject *self, PyO return (PyObject*) scanner; } +static Py_hash_t +pattern_hash(PatternObject *self) +{ + Py_hash_t hash, hash2; + + hash = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); + + hash2 = PyObject_Hash(self->indexgroup); + if (hash2 == -1) { + return -1; + } + hash ^= hash2; + + /* Don't hash groupindex dictionary: indexgroup should contain the same + information, and computing the hash of a dict requires to compute + hash(sorted(dict.items())) which is expensive. + + Anyway, pattern_richcompare() compares groupindex and indexgroup. */ + + hash ^= self->groups; + hash ^= self->flags; + hash ^= self->isbytes; + hash ^= self->codesize; + + if (hash == -1) { + hash = -2; + } + return hash; +} + +static PyObject* +pattern_richcompare(PyObject *lefto, PyObject *righto, int op) +{ + PatternObject *left, *right; + int cmp; + + if (op != Py_EQ && op != Py_NE) { + Py_RETURN_NOTIMPLEMENTED; + } + + if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { + Py_RETURN_NOTIMPLEMENTED; + } + left = (PatternObject *)lefto; + right = (PatternObject *)righto; + + cmp = (left->groups == right->groups + && left->flags == right->flags + && left->isbytes == right->isbytes + && left->codesize && right->codesize); + if (cmp) { + cmp = (memcmp(left->code, right->code, + sizeof(left->code[0]) * left->codesize) == 0); + } + if (cmp) { + cmp = PyObject_RichCompareBool(left->groupindex, right->groupindex, + Py_EQ); + if (cmp < 0) { + return NULL; + } + } + if (cmp) { + cmp = PyObject_RichCompareBool(left->indexgroup, right->indexgroup, + Py_EQ); + if (cmp < 0) { + return NULL; + } + } + /* Don't compare the pattern because of the re.LOCALE flag. A pattern compiled with re.LOCALE + produces a different code depending on the current locale which + can change anytime. */ + if (op == Py_NE) { + cmp = !cmp; + } + return PyBool_FromLong(cmp); +} + #include "clinic/_sre.c.h" static PyMethodDef pattern_methods[] = { @@ -2693,7 +2768,7 @@ static PyTypeObject Pattern_Type = { 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)pattern_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ @@ -2703,7 +2778,7 @@ static PyTypeObject Pattern_Type = { pattern_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ + pattern_richcompare, /* tp_richcompare */ offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ diff -r 89e2201142f9 Modules/clinic/_sre.c.h --- a/Modules/clinic/_sre.c.h Fri Nov 18 10:42:10 2016 -0800 +++ b/Modules/clinic/_sre.c.h Sat Nov 19 00:01:43 2016 +0100 @@ -438,7 +438,7 @@ static PyObject * { PyObject *return_value = NULL; static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", NULL}; - static _PyArg_Parser _parser = {"OiO!nOO:compile", _keywords, 0}; + static _PyArg_Parser _parser = {"OiO!nO!O!:compile", _keywords, 0}; PyObject *pattern; int flags; PyObject *code; @@ -447,7 +447,7 @@ static PyObject * PyObject *indexgroup; if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser, - &pattern, &flags, &PyList_Type, &code, &groups, &groupindex, &indexgroup)) { + &pattern, &flags, &PyList_Type, &code, &groups, &PyDict_Type, &groupindex, &PyTuple_Type, &indexgroup)) { goto exit; } return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup); @@ -728,4 +728,4 @@ static PyObject * { return _sre_SRE_Scanner_search_impl(self); } -/*[clinic end generated code: output=a4a246bca1963bc9 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b74b16d90f207358 input=a9049054013a1b77]*/ diff -r 89e2201142f9 Modules/sre.h --- a/Modules/sre.h Fri Nov 18 10:42:10 2016 -0800 +++ b/Modules/sre.h Sat Nov 19 00:01:43 2016 +0100 @@ -27,8 +27,8 @@ typedef struct { PyObject_VAR_HEAD Py_ssize_t groups; /* must be first! */ - PyObject* groupindex; - PyObject* indexgroup; + PyObject* groupindex; /* dict */ + PyObject* indexgroup; /* tuple */ /* compatibility */ PyObject* pattern; /* pattern source (or None) */ int flags; /* flags used when compiling pattern source */