diff -r 11cf18ec1900 Lib/sre_compile.py --- a/Lib/sre_compile.py Sat Sep 06 22:49:07 2014 +0300 +++ b/Lib/sre_compile.py Mon Sep 08 22:53:42 2014 +0300 @@ -53,7 +53,7 @@ return _sre.getlower(literal, flags) else: emit(OPCODES[op]) - fixup = _identityfunction + fixup = None skip = _len(code); emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip @@ -172,17 +172,15 @@ def _compile_charset(charset, flags, code, fixup=None): # compile charset subprogram emit = code.append - if fixup is None: - fixup = _identityfunction for op, av in _optimize_charset(charset, fixup): emit(OPCODES[op]) if op is NEGATE: pass elif op is LITERAL: - emit(fixup(av)) + emit(av) elif op is RANGE: - emit(fixup(av[0])) - emit(fixup(av[1])) + emit(av[0]) + emit(av[1]) elif op is CHARSET: code.extend(av) elif op is BIGCHARSET: @@ -207,9 +205,15 @@ while True: try: if op is LITERAL: - charmap[fixup(av)] = 1 + i = av + if fixup is not None: + i = fixup(i) + charmap[i] = 1 elif op is RANGE: - for i in range(fixup(av[0]), fixup(av[1])+1): + r = range(av[0], av[1]+1) + if fixup is not None: + r = map(fixup, r) + for i in r: charmap[i] = 1 elif op is NEGATE: out.append((op, av)) @@ -221,7 +225,27 @@ charmap += b'\0' * 0xff00 continue # character set contains non-BMP character codes - tail.append((op, av)) + if fixup is None or op is not RANGE: + tail.append((op, av)) + else: + hi = -2 + for i in map(fixup, range(av[0], av[1]+1)): + if i == hi + 1: + hi = i + elif i < 0x10000: + charmap[i] = 1 + else: + if hi >= 0: + if lo == hi: + tail.append((LITERAL, hi)) + else: + tail.append((RANGE, (lo, hi))) + lo = hi = i + if hi >= 0: + if lo == hi: + tail.append((LITERAL, hi)) + else: + tail.append((RANGE, (lo, hi))) break # compress character map @@ -247,7 +271,7 @@ else: out.append((RANGE, (p, q - 1))) out += tail - if len(out) < len(charset): + if fixup is not None or len(out) < len(charset): return out return charset diff -r 11cf18ec1900 Lib/test/test_re.py --- a/Lib/test/test_re.py Sat Sep 06 22:49:07 2014 +0300 +++ b/Lib/test/test_re.py Mon Sep 08 22:53:42 2014 +0300 @@ -577,6 +577,25 @@ self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") + def test_ignore_case_range(self): + # Issues #3511, #17381. + self.assertIsNone(re.match(r'[9-A]', '_', re.I)) + self.assertIsNone(re.match(br'[9-A]', b'_', re.I)) + self.assertIsNotNone(re.match(r'[\xc0-\xde]', '\xd7', re.I)) + self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I)) + self.assertIsNotNone(re.match(r'[\u0430-\u045f]', '\u0450', re.I)) + self.assertIsNotNone(re.match(r'[\u0430-\u045f]', '\u0400', re.I)) + self.assertIsNotNone(re.match(r'[\u0400-\u042f]', '\u0450', re.I)) + self.assertIsNotNone(re.match(r'[\u0400-\u042f]', '\u0400', re.I)) + self.assertIsNotNone( + re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I)) + self.assertIsNotNone( + re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I)) + self.assertIsNotNone( + re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I)) + self.assertIsNotNone( + re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I)) + def test_category(self): self.assertEqual(re.match(r"(\s)", " ").group(1), " ")