#!/usr/bin/env python import sys MAXCODE = 0xFFFF # sys.maxunicode encodings = sys.argv[1:] if not encodings: encodings = [ 'big5', 'big5hkscs', 'cp932', 'cp949', 'cp950', 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'iso2022-kr', 'johab', 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-7', 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', ] INFLEN = 1000 for encoding in encodings: max_len = [-1] * 256 min_len = [INFLEN] * 256 for c in range(MAXCODE + 1): try: d = chr(c).encode(encoding) except UnicodeEncodeError: continue b = d[0] max_len[b] = max(max_len[b], len(d)) min_len[b] = min(min_len[b], len(d)) if max(max_len) > 4: print('%s is not supported' % encoding, file=sys.stderr) continue if any(n != m for n, m in zip(min_len, max_len) if m > 0): print('%s is not supported' % encoding, file=sys.stderr) continue info_map = [b if n == 1 else -n if n > 1 else -1 for b, n in enumerate(max_len)] print('int %s_map[256] = {' % encoding.replace('-', '_').lower()) for i in range(0, 256, 16): print(' %s,' % ', '.join(str(x) for x in info_map[i: i + 16])) print('};')