Index: Lib/csv.py =================================================================== --- Lib/csv.py (revision 69989) +++ Lib/csv.py (working copy) @@ -202,11 +202,15 @@ """ matches = [] + if isinstance(data, bytes): + b = bytes + else: + b = lambda x,y: x for restr in ('(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?", '(?:^|\n)(?P["\']).*?(?P=quote)(?P[^\w\n"\'])(?P ?)', # ".*?", '(?P>[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?" '(?:^|\n)(?P["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space) - regexp = re.compile(restr, re.DOTALL | re.MULTILINE) + regexp = re.compile(b(restr, "ascii"), re.DOTALL | re.MULTILINE) matches = regexp.findall(data) if matches: break @@ -270,8 +274,10 @@ additional chunks as necessary. """ - data = list(filter(None, data.split('\n'))) - + if isinstance(data, bytes): + data = [s for s in data.split(b"\n") if s] + else: + data = [s for s in data.split("\n") if s] ascii = [chr(c) for c in range(127)] # 7-bit ASCII # build frequency tables Index: Lib/test/test_csv.py =================================================================== --- Lib/test/test_csv.py (revision 69989) +++ Lib/test/test_csv.py (working copy) @@ -813,6 +813,16 @@ self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + def test_issue5322(self): + sniffer = csv.Sniffer() + sample = bytes(self.sample3, "ascii") + dialect = sniffer.sniff(sample) + self.assert_(bytes(dialect.delimiter) in sample) + dialect = sniffer.sniff(sample, delimiters="?,") + self.assertEqual(dialect.delimiter, "?") + dialect = sniffer.sniff(sample, delimiters="/,") + self.assertEqual(dialect.delimiter, "/") + if not hasattr(sys, "gettotalrefcount"): if support.verbose: print("*** skipping leakage tests ***") else: