Message280979
Serhiy, single-byte codepages map every byte value, even if it's just to a Unicode C1 control code [1].
For example:
import ctypes
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
MB_ERR_INVALID_CHARS = 0x00000008
def mbtwc_errcheck(result, func, args):
if not result and args[-1]:
raise ctypes.WinError(ctypes.get_last_error())
return args
kernel32.MultiByteToWideChar.errcheck = mbtwc_errcheck
def decode(codepage, data, strict=True):
flags = MB_ERR_INVALID_CHARS if strict else 0
n = kernel32.MultiByteToWideChar(codepage, flags,
data, len(data),
None, 0)
buf = (ctypes.c_wchar * n)()
kernel32.MultiByteToWideChar(codepage, flags,
data, len(data),
buf, n)
return buf.value
codepages = [437, 874] + list(range(1250, 1259))
for cp in codepages:
print('cp%d:' % cp, ascii(decode(cp, b'\x81\x8d')))
Output:
cp437: '\xfc\xec'
cp874: '\x81\x8d'
cp1250: '\x81\u0164'
cp1251: '\u0403\u040c'
cp1252: '\x81\x8d'
cp1253: '\x81\x8d'
cp1254: '\x81\x8d'
cp1255: '\x81\x8d'
cp1256: '\u067e\u0686'
cp1257: '\x81\xa8'
cp1258: '\x81\x8d'
[1]: https://en.wikipedia.org/wiki/C0_and_C1_control_codes |
|
Date |
User |
Action |
Args |
2016-11-16 19:18:00 | eryksun | set | recipients:
+ eryksun, paul.moore, vstinner, larry, tim.golden, benjamin.peterson, ned.deily, ezio.melotti, zach.ware, serhiy.storchaka, steve.dower, Artoria2e5 |
2016-11-16 19:18:00 | eryksun | set | messageid: <1479323880.69.0.361652571743.issue28712@psf.upfronthosting.co.za> |
2016-11-16 19:18:00 | eryksun | link | issue28712 messages |
2016-11-16 19:18:00 | eryksun | create | |
|