Index: Lib/csv.py =================================================================== --- Lib/csv.py (revision 87399) +++ Lib/csv.py (working copy) @@ -176,11 +176,11 @@ quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) - if not delimiter: + if delimiter is None: delimiter, skipinitialspace = self._guess_delimiter(sample, delimiters) - if not delimiter: + if delimiter is None: raise Error, "Could not determine delimiter" class dialect(Dialect): @@ -210,10 +210,14 @@ this way. """ + # Replace CRLF with just LF. When they occur together they should + # always be treated as a line separator. + data = data.replace("\r\n", "\n") + matches = [] for restr in ('(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?", '(?:^|\n)(?P["\']).*?(?P=quote)(?P[^\w\n"\'])(?P ?)', # ".*?", - '(?P>[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?" + '(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?" '(?:^|\n)(?P["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space) regexp = re.compile(restr, re.DOTALL | re.MULTILINE) matches = regexp.findall(data) @@ -253,10 +257,10 @@ (delims[a] > delims[b]) and a or b, delims.keys()) skipinitialspace = delims[delim] == spaces if delim == '\n': # most likely a file with a single column - delim = '' + delim = ',' else: # there is *no* delimiter, it's a single column of quoted data - delim = '' + delim = ',' skipinitialspace = 0 # if we see an extra quote between delimiters, we've got a @@ -358,7 +362,7 @@ end += chunkLength if not delims: - return ('', 0) + return (',', 0) # if there's more than one, fall back to a 'preferred' list if len(delims) > 1: Index: Lib/test/test_csv.py =================================================================== --- Lib/test/test_csv.py (revision 87399) +++ Lib/test/test_csv.py (working copy) @@ -950,6 +950,27 @@ self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + # Issue 10515. + for i, (sample, expected_delimiter) in enumerate(( + # XXX: I'm not sure if first two make sense but they are + # the existing behavior. + ('abcde', 'e'), + ('a', 'a'), + ('a,"b,c",d\ne,f,g', ','), + ('"a,b",c,d\ne,f,g', ','), + ('a,b,"c,d"\ne,f,g', ','), + ('a,b,"c,d"\r\ne,f,g', ','), + ('"a,b,c,d"\ne', ','), + ('"a,b,c,d"\r\ne', ','), + )): + def f(self, sample=sample, expected_delimiter=expected_delimiter): + dialect = csv.Sniffer().sniff(sample) + self.assertEqual(dialect.delimiter, expected_delimiter, + msg="sample was: {!r}, got: {!r}, expected: {!r}".format( + sample, dialect.delimiter, + expected_delimiter)) + locals()['test_delimiters_extended_{}'.format(i)] = f + def test_doublequote(self): sniffer = csv.Sniffer() dialect = sniffer.sniff(self.header)