""" Compare .NET code pages to Python """ import sys, codecs import System import System.Text _debug = 0 CHECK_CODECS = [ # (.NET Code page, Python codec name) (65000, 'utf-7'), (65001, 'utf-8'), #(65005, 'utf-32-le'), # not supported by .NET 2.0 #(65006, 'utf-32-be'), # not supported by .NET 2.0 (1200, 'utf-16-le'), (1201, 'utf-16-be'), (28591, 'iso-8859-1'), #(28592, 'iso-8859-2'), # not supported by IronPython 2.6 #(28593, 'iso-8859-3'), # not supported by IronPython 2.6 #(28594, 'iso-8859-4'), # not supported by IronPython 2.6 #(28595, 'iso-8859-5'), # not supported by IronPython 2.6 ] UNICODE_RANGE = sys.maxunicode def compare_codec(code_page, encoding): net_codec = System.Text.Encoding.GetEncoding(code_page) try: python_encoder = codecs.getencoder(encoding) except LookupError: print 'Unknown encoding in Python: %r' % encoding return -1 errors = 0 for i in range(UNICODE_RANGE): u = unichr(i) try: python_bytes, count = python_encoder(u) except ValueError: if _debug: print '%i: skipped (Python encode error)' % i continue net_bytes = ''.join([chr(int(c)) for c in net_codec.GetBytes(u)]) if python_bytes == net_bytes: if _debug: print '%i: OK' % i else: print '%i: .NET=%r, Python=%r' % ( i, net_bytes, python_bytes) errors += 1 return errors for code_page, encoding in CHECK_CODECS: print 'Code Page %i vs. encoding %r' % (code_page, encoding) errors = compare_codec(code_page, encoding) print print '%i errors' % errors print