diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -253,6 +253,8 @@ f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) + f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG) + f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII) f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -923,6 +923,44 @@ # Test behaviour when not given a string or pattern as parameter self.assertRaises(TypeError, re.compile, 0) + def test_repr_with_no_flags_should_be_unicode(self): + self.assertEqual(repr(re.compile('random pattern')), + "re.compile('random pattern', re.UNICODE)") + + def test_repr_with_ascii_flag_only(self): + self.assertEqual(repr(re.compile('random pattern', re.ASCII)), + "re.compile('random pattern', re.ASCII)") + + def test_repr_with_single_flag(self): + self.assertEqual( + repr(re.compile('random pattern', re.IGNORECASE)), + "re.compile('random pattern', re.IGNORECASE|re.UNICODE)") + + def test_repr_with_multiple_flags(self): + self.assertEqual( + repr(re.compile('random pattern', re.I|re.S|re.X)), + "re.compile('random pattern', "\ + "re.IGNORECASE|re.DOTALL|re.UNICODE|re.VERBOSE)") + + def test_repr_with_inline_flags(self): + self.assertEqual(repr(re.compile(b'(?i)pattern')), + "re.compile(b'(?i)pattern', re.IGNORECASE)") + + def test_repr_with_no_flags(self): + self.assertEqual(repr(re.compile(b'bytes pattern')), + "re.compile(b'bytes pattern')") + + def test_repr_with_double_quotes_inside(self): + self.assertEqual( + repr(re.compile('random "double quoted" pattern')), + '''re.compile('random "double quoted" pattern', re.UNICODE)''') + + def test_repr_with_single_quotes_inside(self): + self.assertEqual( + repr(re.compile("random 'single quoted' pattern")), + '''re.compile("random 'single quoted' pattern", re.UNICODE)''') + + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Modules/_sre.c b/Modules/_sre.c --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2594,6 +2594,14 @@ {NULL} /* Sentinel */ }; +typedef struct { + char *name; + int value; +} PatternFlag; + +static PyObject * +pattern_repr(PatternObject *pattern); + static PyTypeObject Pattern_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_" SRE_MODULE ".SRE_Pattern", @@ -2603,7 +2611,7 @@ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_reserved */ - 0, /* tp_repr */ + (reprfunc)pattern_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -2627,6 +2635,104 @@ static int _validate(PatternObject *self); /* Forward */ +/* +* copied and adapted `append_string` from `regex` module +* https://mrab-regex-hg.googlecode.com/hg/regex_3/regex/_regex.c +*/ +static int +append_string(PyObject *list, char *string) { + PyObject *item; + int status; + + item = Py_BuildValue("U", string); + if (!item) + return 0; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + return 0; + + return 1; +} + +static PyObject * +join_string(PyObject *list, char *string) { + PyObject *separator; + PyObject *result; + separator = Py_BuildValue("U", string); + if (!separator) + return NULL; + result = PyUnicode_Join(separator, list); + Py_DECREF(separator); + return result; +} + +static PyObject * +pattern_repr(PatternObject *obj) { + PyObject *list; + PyObject *result; + PyObject *pattern_repr; + PyObject *flags_result; + PyObject *flag_items; + PatternFlag flags[] = { + {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, + {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, + {"re.LOCALE", SRE_FLAG_LOCALE}, + {"re.MULTILINE", SRE_FLAG_MULTILINE}, + {"re.DOTALL", SRE_FLAG_DOTALL}, + {"re.UNICODE", SRE_FLAG_UNICODE}, + {"re.VERBOSE", SRE_FLAG_VERBOSE}, + {"re.DEBUG", SRE_FLAG_DEBUG}, + {"re.ASCII", SRE_FLAG_ASCII}, + }; + int i; + + list = PyList_New(0); + if (!list) + return NULL; + if (!append_string(list, "re.compile(")) + return NULL; + pattern_repr = PyObject_Repr(obj->pattern); + if (!pattern_repr) + return NULL; + if (PyList_Append(list, pattern_repr) < 0) + return NULL; + Py_DECREF(pattern_repr); + + flag_items = PyList_New(0); + if (!flag_items) + return NULL; + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + if (obj->flags & flags[i].value) { + if (!append_string(flag_items, flags[i].name)) + return NULL; + } + } + + if (PyList_Size(flag_items) > 0) { + flags_result = join_string(flag_items, "|"); + if (!flags_result) + return NULL; + if (!append_string(list, ", ")) + return NULL; + if (PyList_Append(list, flags_result) < 0) + return NULL; + } + + if (!append_string(list, ")")) + return NULL; + + result = join_string(list, ""); + if (!result) + return NULL; + + Py_DECREF(list); + + return result; +} + static PyObject * _compile(PyObject* self_, PyObject* args) { diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h --- a/Modules/sre_constants.h +++ b/Modules/sre_constants.h @@ -81,6 +81,8 @@ #define SRE_FLAG_DOTALL 16 #define SRE_FLAG_UNICODE 32 #define SRE_FLAG_VERBOSE 64 +#define SRE_FLAG_DEBUG 128 +#define SRE_FLAG_ASCII 256 #define SRE_INFO_PREFIX 1 #define SRE_INFO_LITERAL 2 #define SRE_INFO_CHARSET 4