#!/usr/bin/python import os, tempfile, re def check(offset): f = tempfile.NamedTemporaryFile("wb", delete=False) f.write(b"a"*offset) f.write(b"\xe6\x96\x87") f.write(b"b"*100) f.close() # this tries to use ascii, and fails. The error message contains the wrong # offset. It appears to emit offset%4096 . try: lines = open(f.name, encoding="ascii").readlines() #data = open(f.name).read() except UnicodeDecodeError as e: s = str(e) mo = re.search(r"in position (\d+):", s) found = int(mo.group(1)) if found != offset: print(s) print("Hey, found at %d, should have been at %d" % (found, offset)) #print("diff is %d" % ((offset%4096)-found)) return False finally: os.unlink(f.name) return True if True: where = 0 while True: if not check(where): break if where > 10000: print("all good") break where += 1 if False: import random for i in range(1000): l = random.randint(1, 50000) check(l)