--- csv.py.orig 2017-01-19 12:27:44.000000000 -0600 +++ csv.py 2017-02-04 01:18:41.541172658 -0600 @@ -106,14 +106,12 @@ ascii = [chr(c) for c in range(127)] # 7-bit ASCII # build frequency tables - chunkLength = min(10, len(data)) - iteration = 0 + chunkLength = 10 charFrequency = {} modes = {} delims = {} start, end = 0, min(chunkLength, len(data)) while start < len(data): - iteration += 1 for line in data[start:end]: for char in ascii: metaFrequency = charFrequency.get(char, {}) @@ -140,7 +138,7 @@ # build a list of possible delimiters modeList = modes.items() - total = float(chunkLength * iteration) + total = float(end) # (rows of consistent data) / (number of rows) = 100% consistency = 1.0 # minimum consistency threshold @@ -161,7 +159,7 @@ # analyze another chunkLength lines start = end - end += chunkLength + end = min(end + chunkLength, len(data)) if not delims: return ('', 0)