Index: ../stringbench/stringbench.py =================================================================== --- ../stringbench/stringbench.py (revision 76951) +++ ../stringbench/stringbench.py (working copy) @@ -32,7 +32,6 @@ _RANGE_1000 = range(1000) -_RANGE_1000 = range(1000) _RANGE_100 = range(100) _RANGE_10 = range(10) @@ -116,10 +115,6 @@ #### same tests as 'in' but use 'find' -# XXX: TODO: Add rfind - - - @bench('("A"*1000).find("A")', "early match, single character", 1000) def find_quick_match_single_character(STR): s1 = STR("A" * 1000) @@ -161,7 +156,7 @@ for x in _RANGE_1000: s1_find(s2) -@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s)', +@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")', "late match, 100 characters", 100) def find_test_slow_match_100_characters(STR): m = STR("ABC"*33) @@ -171,10 +166,63 @@ for x in _RANGE_100: s1_find(s2) + +#### Same tests for 'rfind' + +@bench('("A"*1000).rfind("A")', "early match, single character", 1000) +def rfind_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_find = s1.rfind + for x in _RANGE_1000: + s1_find(s2) + +@bench('("A"*1000).rfind("B")', "no match, single character", 1000) +def rfind_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_find = s1.rfind + for x in _RANGE_1000: + s1_find(s2) + + +@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000) +def rfind_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_find = s1.rfind + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000) +def rfind_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_find = s1.rfind + for x in _RANGE_1000: + s1_find(s2) + +@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_find = s1.rfind + for x in _RANGE_1000: + s1_find(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E" + m + ("D"+m)*500 + s2 = "E" + m + s1_find = s1.rfind + for x in _RANGE_100: + s1_find(s2) + + #### Now with index. # Skip the ones which fail because that would include exception overhead. -# Add rindex tests. - @bench('("A"*1000).index("A")', "early match, single character", 1000) def index_test_quick_match_single_character(STR): @@ -184,7 +232,6 @@ for x in _RANGE_1000: s1_index(s2) - @bench('("AB"*1000).index("AB")', "early match, two characters", 1000) def index_test_quick_match_two_characters(STR): s1 = STR("AB" * 1000) @@ -201,7 +248,7 @@ for x in _RANGE_1000: s1_index(s2) -@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s)', +@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")', "late match, 100 characters", 100) def index_test_slow_match_100_characters(STR): m = STR("ABC"*33) @@ -211,6 +258,43 @@ for x in _RANGE_100: s1_index(s2) + +#### Same for rindex + +@bench('("A"*1000).rindex("A")', "early match, single character", 1000) +def rindex_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_index = s1.rindex + for x in _RANGE_1000: + s1_index(s2) + +@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000) +def rindex_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_index = s1.rindex + for x in _RANGE_1000: + s1_index(s2) + +@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000) +def rindex_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_index = s1.rindex + for x in _RANGE_1000: + s1_index(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)', + "late match, 100 characters", 100) +def rindex_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E" + m + ("D"+m)*500 + s2 = "E" + m + s1_index = s1.rindex + for x in _RANGE_100: + s1_index(s2) + #### Benchmark the operator-based methods @bench('"A"*10', "repeat 1 character 10 times", 1000) @@ -382,6 +466,24 @@ for x in _RANGE_1000: s_rsplit(N, 1) +@bench('("Here are some words. "*2).partition(" ")', + "split 1 whitespace", 1000) +def whitespace_partition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_partition = s.partition + for x in _RANGE_1000: + s_partition(sep) + +@bench('("Here are some words. "*2).rpartition(" ")', + "split 1 whitespace", 1000) +def whitespace_rpartition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_rpartition = s.rpartition + for x in _RANGE_1000: + s_rpartition(sep) + human_text = """\ Python is a dynamic object-oriented programming language that can be used for many kinds of software development. It offers strong support @@ -529,6 +631,83 @@ for x in _RANGE_10: s_rsplit("ACTAT") +## split text on first "--" characters (or last) +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".split("--", 1)', + "split on 1 multicharacter separator (small)", 1000) +def split_1_multichar_sep_small(STR): + sep = STR("--") + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_split = s.split + for x in _RANGE_1000: + s_split(sep, 1) + +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--", 1)', + "split on 1 multicharacter separator (small)", 1000) +def rsplit_1_multichar_sep_small(STR): + sep = STR("--") + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit(sep, 1) + +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".partition("--")', + "split on 1 multicharacter separator (small)", 1000) +def partition_multichar_sep_small(STR): + sep = STR("--") + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_partition = s.partition + for x in _RANGE_1000: + s_partition(sep) + +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".rpartition("--")', + "split on 1 multicharacter separator (small)", 1000) +def rpartition_multichar_sep_small(STR): + sep = STR("--") + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_rpartition = s.rpartition + for x in _RANGE_1000: + s_rpartition(sep) + +## split dna text on 1 "ACTAT" characters +@bench('dna.split("ACTAT", 1)', + "split on 1 multicharacter separator (dna)", 10) +def split_1_multichar_sep_dna(STR): + sep = STR("ACTAT") + s = _get_dna(STR) + s_split = s.split + for x in _RANGE_10: + s_split(sep, 1) + +@bench('dna.rsplit("ACTAT")', + "split on 1 multicharacter separator (dna)", 10) +def rsplit_1_multichar_sep_dna(STR): + sep = STR("ACTAT") + s = _get_dna(STR) + s_rsplit = s.rsplit + for x in _RANGE_10: + s_rsplit(sep, 1) + +@bench('dna.partition("ACTAT")', + "split on 1 multicharacter separator (dna)", 10) +def partition_multichar_sep_dna(STR): + sep = STR("ACTAT") + s = _get_dna(STR) + s_partition = s.partition + for x in _RANGE_10: + s_partition(sep) + +@bench('dna.rpartition("ACTAT")', + "split on 1 multicharacter separator (dna)", 10) +def rpartition_multichar_sep_dna(STR): + sep = STR("ACTAT") + s = _get_dna(STR) + s_rpartition = s.rpartition + for x in _RANGE_10: + s_rpartition(sep) ## split with limits @@ -539,31 +718,35 @@ @bench('GFF3_example.split("\\t")', "tab split", 1000) def tab_split_no_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_split = s.split for x in _RANGE_1000: - s_split("\t") + s_split(sep) @bench('GFF3_example.split("\\t", 8)', "tab split", 1000) def tab_split_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_split = s.split for x in _RANGE_1000: - s_split("\t", 8) + s_split(sep, 8) @bench('GFF3_example.rsplit("\\t")', "tab split", 1000) def tab_rsplit_no_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_rsplit = s.rsplit for x in _RANGE_1000: - s_rsplit("\t") + s_rsplit(sep) @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000) def tab_rsplit_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_rsplit = s.rsplit for x in _RANGE_1000: - s_rsplit("\t", 8) + s_rsplit(sep, 8) #### Count characters @@ -996,6 +1179,8 @@ for title, group in itertools.groupby(bench_functions, operator.itemgetter(0)): + # Flush buffer before each group + sys.stdout.flush() print "="*10, title for (_, k, v) in group: if hasattr(v, "is_bench"): @@ -1024,7 +1209,7 @@ print "That was zippy!" else: try: - ratio = str_time/uni_time + ratio = str_total/uni_total except ZeroDivisionError: ratio = 0.0 print "%.2f\t%.2f\t%.1f\t%s" % (