import sys, os, codecs reload(sys) sys.setdefaultencoding("utf_8") errCase = 'a\\nb\u0085 b2\\nc' print "Case SRC: %s\n\n"%errCase file = 'tmp.txt' coding = 'unicode-escape' fs_out=open(file, 'wb') fs_out.write(errCase.decode(coding)) fs_out.close() fs_in=codecs.open(file, 'r', 'utf_8') a_str_lines=fs_in.readlines() fs_out.close() lineNum=0 print "WRONG:" for line in a_str_lines: lineNum+=1 print 'Line: %s = "%s"'%(lineNum,line.encode(coding)) fs_in=codecs.open(file, 'r', 'utf_8') a_str_whole = fs_in.read() fs_in.close() a_str_lines = a_str_whole.split("\n") for idx in range(0,len(a_str_lines)-1): a_str_lines[idx]+="\n" lineNum=0 print "\n\n\nCORRECT:" for line in a_str_lines: lineNum+=1 print 'Line: %s = "%s"'%(lineNum,line.encode(coding)) print "???"