Index: Doc/library/re.rst =================================================================== --- Doc/library/re.rst (revision 74191) +++ Doc/library/re.rst (working copy) @@ -338,11 +338,12 @@ ``\d`` For Unicode (str) patterns: - Matches any Unicode digit (which includes ``[0-9]``, and also many - other digit characters). If the :const:`ASCII` flag is used only - ``[0-9]`` is matched (but the flag affects the entire regular - expression, so in such cases using an explicit ``[0-9]`` may be a - better choice). + Matches any Unicode decimal digit; more specifically, matches + any character in Unicode category [Nd] (Number, Decimal Digit). + This includes ``[0-9]``, and also many other digit characters. + If the :const:`ASCII` flag is used only ``[0-9]`` is matched + (but the flag affects the entire regular expression, so in such + cases using an explicit ``[0-9]`` may be a better choice). For 8-bit (bytes) patterns: Matches any decimal digit; this is equivalent to ``[0-9]``. Index: Lib/test/test_re.py =================================================================== --- Lib/test/test_re.py (revision 74191) +++ Lib/test/test_re.py (working copy) @@ -605,6 +605,27 @@ self.assertEqual(next(iter).span(), (4, 4)) self.assertRaises(StopIteration, next, iter) + def test_bug_6561(self): + # '\d' should match characters in Unicode category 'Nd' + # (Number, Decimal Digit), but not those in 'Nl' (Number, + # Letter) or 'No' (Number, Other). + decimal_digits = [ + '\u0037', # '\N{DIGIT SEVEN}', category 'Nd' + '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' + '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' + ] + for x in decimal_digits: + self.assertEqual(re.match('\d', x).group(0), x) + + not_decimal_digits = [ + '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' + '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' + '\u2082', # '\N{SUBSCRIPT TWO}', category 'No' + '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' + ] + for x in not_decimal_digits: + self.assertEqual(re.match('\d', x), None) + def test_empty_array(self): # SF buf 1647541 import array Index: Modules/_sre.c =================================================================== --- Modules/_sre.c (revision 74191) +++ Modules/_sre.c (working copy) @@ -168,7 +168,7 @@ #if defined(HAVE_UNICODE) -#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch)) +#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch)) #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch)) #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))