Index: Lib/csv.py =================================================================== --- Lib/csv.py (revision 62313) +++ Lib/csv.py (working copy) @@ -233,10 +233,12 @@ (delims[a] > delims[b]) and a or b, delims.keys()) skipinitialspace = delims[delim] == spaces if delim == '\n': # most likely a file with a single column - delim = '' + # comma is as good as any other delimiter in this case + delim = ',' else: # there is *no* delimiter, it's a single column of quoted data - delim = '' + # comma is as good as any other delimiter in this case + delim = ',' skipinitialspace = 0 return (quotechar, delim, skipinitialspace) @@ -317,8 +319,12 @@ if len(delims) == 1: delim = delims.keys()[0] - skipinitialspace = (data[0].count(delim) == - data[0].count("%c " % delim)) + if delim.isdigit() or delim.isalpha(): + delim = "," + skipinitialspace = False + else: + skipinitialspace = (data[0].count(delim) == + data[0].count("%c " % delim)) return (delim, skipinitialspace) # analyze another chunkLength lines @@ -331,19 +337,25 @@ # if there's more than one, fall back to a 'preferred' list if len(delims) > 1: for d in self.preferred: - if d in delims.keys(): + if d in delims: skipinitialspace = (data[0].count(d) == data[0].count("%c " % d)) return (d, skipinitialspace) # nothing else indicates a preference, pick the character that # dominates(?) - items = [(v,k) for (k,v) in delims.items()] - items.sort() - delim = items[-1][1] + items = [(v,k) for (k,v) in delims.items() + if not (k.isalpha() or k.isdigit())] + if not items: + # go with the favorite + delim = ',' + skipinitialspace = 0 + else: + items.sort() + delim = items[-1][1] + skipinitialspace = (data[0].count(delim) == + data[0].count("%c " % delim)) - skipinitialspace = (data[0].count(delim) == - data[0].count("%c " % delim)) return (delim, skipinitialspace) Index: Lib/test/test_csv.py =================================================================== --- Lib/test/test_csv.py (revision 62313) +++ Lib/test/test_csv.py (working copy) @@ -861,6 +861,27 @@ sample6 = "a|b|c\r\nd|e|f\r\n" sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" + sample8 = """\ +43.4e12 +147483648 +47483648 +""" + sample9 = """\ +abcdef +bcdefg +cdefgh +""" + + sample10 = '''\ +"Sequence" +"AALENTHLL" +"RQILNADAM" +"GALENAKAEI" +"NSPANNIVM" +"AQIRNLTVL" +"AGPENSSKI" +''' + def test_has_header(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample1), False) @@ -899,8 +920,23 @@ self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + def test_one_column(self): + sniffer = csv.Sniffer() + # Not much to go on in sample8 or sample9. We hope for useful + # defaults. + dialect = sniffer.sniff(self.sample8) + self.assertEqual(dialect.delimiter, ',') + self.assertEqual(dialect.quotechar, '"') + dialect = sniffer.sniff(self.sample9) + self.assertEqual(dialect.delimiter, ',') + self.assertEqual(dialect.quotechar, '"') + dialect = sniffer.sniff(self.sample10) + self.assertEqual(dialect.delimiter, ',') + self.assertEqual(dialect.quotechar, '"') + if not hasattr(sys, "gettotalrefcount"): - if test_support.verbose: print "*** skipping leakage tests ***" + if test_support.verbose: + print "*** skipping leakage tests ***" else: class NUL: def write(s, *args):