diff -r 1f466354a85b Lib/sre_compile.py --- a/Lib/sre_compile.py Mon Oct 21 09:08:39 2013 +0200 +++ b/Lib/sre_compile.py Mon Oct 21 13:08:18 2013 +0300 @@ -249,22 +249,18 @@ return out return charset +_CODEBITS = _sre.CODESIZE * 8 def _mk_bitmap(bits): - data = [] - dataappend = data.append - if _sre.CODESIZE == 2: - start = (1, 0) - else: - start = (1, 0) - m, v = start - for c in bits: - if c: - v = v + m - m = m + m - if m > MAXCODE: - dataappend(v) - m, v = start - return data + s = bytes(bits).translate(b'0' + b'1' * 255) + return [int(s[i: i + _CODEBITS][::-1], 2) + for i in range(0, len(s), _CODEBITS)] + +def _bytes_to_codes(b): + import array + a = array.array('I', b) + assert a.itemsize == _sre.CODESIZE + assert len(a) * a.itemsize == len(b) + return a.tolist() # To represent a big charset, first a bitmap of all characters in the # set is constructed. Then, this bitmap is sliced into chunks of 256 @@ -293,10 +289,6 @@ # bigcharsets. def _optimize_unicode(charset, fixup): - try: - import array - except ImportError: - return charset charmap = [0]*65536 negate = 0 try: @@ -326,26 +318,16 @@ mapping = [0]*256 block = 0 data = [] - for i in range(256): - chunk = tuple(charmap[i*256:(i+1)*256]) + for i in range(0, 65536, 256): + chunk = tuple(charmap[i: i + 256]) new = comps.setdefault(chunk, block) - mapping[i] = new + mapping[i//256] = new if new == block: - block = block + 1 - data = data + _mk_bitmap(chunk) - header = [block] - if _sre.CODESIZE == 2: - code = 'H' - else: - code = 'I' - # Convert block indices to byte array of 256 bytes - mapping = array.array('b', mapping).tobytes() - # Convert byte array to word array - mapping = array.array(code, mapping) - assert mapping.itemsize == _sre.CODESIZE - assert len(mapping) * mapping.itemsize == 256 - header = header + mapping.tolist() - data[0:0] = header + block += 1 + data.extend(_mk_bitmap(chunk)) + # Convert block indices to word array + mapping = _bytes_to_codes(bytes(mapping)) + data[0:0] = [block] + mapping return [(BIGCHARSET, data)] def _simple(av):