CHARACTERS = ( # U+00E6 (Latin small letter AE): Encodable to cp1252, cp1254, cp1257, iso-8859-1 '\u00E6', # U+0141 (Latin capital letter L with stroke): Encodable to cp1250, cp1257 '\u0141', # U+041A (Cyrillic capital letter KA): Encodable to cp932, cp950, cp1251 '\u041A', # U+05D0 (Hebrew Letter Alef): Encodable to cp424, cp1255 '\u05D0', # U+06A9 (Arabic letter KEHEH): Encodable to cp1256 '\u06A9', # U+03A9 (Greek capital letter OMEGA): Encodable to cp932, cp950, cp1253 '\u03A9', # U+0E01 (Thai character KO KAI): Encodable to cp874 '\u0E01', ) CODE_PAGES = (424, 874, 932, 950) + tuple(range(1250, 1257+1)) LOCALE_ENCODINGS = ['iso-8859-1'] #, 'utf8'] def is_encodable(character, encoding, errors): try: encoded = character.encode(encoding, errors) decoded = encoded.decode(encoding, errors) except UnicodeError: return False else: return True def check_encoding(encoding, errors): for character in CHARACTERS: if is_encodable(character, encoding, errors): return print("No character for encoding %s:%s :-(" % (encoding, errors)) for code_page in CODE_PAGES: check_encoding('cp%s' % code_page, 'strict') for encoding in LOCALE_ENCODINGS: check_encoding(encoding, 'surrogateescape') for character in CHARACTERS: encodings = [] for code_page in CODE_PAGES: encoding = 'cp%s' % code_page if is_encodable(character, encoding, 'strict'): encodings.append(encoding) for encoding in LOCALE_ENCODINGS: if is_encodable(character, encoding, 'surrogateescape'): encodings.append(encoding) print("U+%04X: Encodable to %s" % (ord(character), ', '.join(encodings)))