#!/usr/bin/env python """Treat some unusual line terminator as if it was the line terminator. This was written to handle a case (https://bugs.python.org/issue37709) where a user wanted to use the csv module to read a file with \x06 as the field separator and \x07 as the line terminator. Since the csv module requires you to open input files as text, that's never going to work. This is an attempt to work around that limitation. """ class LFMapper: """Treat a user-defined character sequence as line terminator in input. The open file pointer must be opened in binary mode. The line terminator (default NUL) and encoding (default utf-8) are both optional. """ def __init__(self, fp, line_terminator=b"\x00", encoding="utf-8"): self.fp = fp self.terminator = line_terminator self.encoding = encoding self.fp_exhausted = False self.buffer = b"" def __next__(self): "Maybe read some bytes, split and return a hunk." if self.fp_exhausted and not self.buffer: raise StopIteration while self.terminator not in self.buffer: line = self.fp.read(1024) if not line: self.fp_exhausted = True break self.buffer += line if self.terminator in self.buffer: first, self.buffer = self.buffer.split(self.terminator, 1) else: first, self.buffer = self.buffer, b"" if first: return first.decode(self.encoding) raise StopIteration def __iter__(self): return self if __name__ == "__main__": import csv fp = LFMapper(open("bell.csv", "rb"), line_terminator=b"\x07") reader = csv.reader(fp, delimiter="\x06") for row in reader: print(row) fp = LFMapper(open("CSV_SAMPLE.CSV", "rb"), line_terminator=b"\x07\r\n") reader = csv.reader(fp, delimiter="\x06") for row in reader: print(row)