diff -r 9923b81a1d34 perf.py
--- a/perf.py	Wed Feb 03 14:19:18 2016 -0600
+++ b/perf.py	Thu Feb 25 14:10:03 2016 +0200
@@ -2160,6 +2160,11 @@ def BM_regex_compile(base_python, change
     bm_path = "performance/bm_regex_compile.py"
     return RegexBenchmark(base_python, changed_python, options, bm_path)
 
+@VersionRange()
+def BM_regex_dna(base_python, changed_python, options):
+    bm_path = "performance/bm_regex_dna.py"
+    return RegexBenchmark(base_python, changed_python, options, bm_path)
+
 
 def MeasureThreading(python, options, bm_name):
     """Test the performance of Python's threading support.
@@ -2370,7 +2375,8 @@ BENCH_GROUPS = {"default": ["2to3", "cha
                             "regex_v8", "json_dump_v2", "json_load"],
                 "startup": ["normal_startup", "startup_nosite",
                             "bzr_startup", "hg_startup"],
-                "regex": ["regex_v8", "regex_effbot", "regex_compile"],
+                "regex": ["regex_v8", "regex_effbot", "regex_compile",
+                          "regex_dna"],
                 "threading": ["threaded_count", "iterative_count"],
                 "serialize": ["slowpickle", "slowunpickle",  # Not for Python 3
                               "fastpickle", "fastunpickle",
diff -r 9923b81a1d34 performance/bm_regex_dna.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/performance/bm_regex_dna.py	Thu Feb 25 14:10:03 2016 +0200
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+
+# The Computer Language Benchmarks Game
+# http://benchmarksgame.alioth.debian.org/
+#
+# regex-dna Python 3 #5 program:
+# contributed by Dominique Wahli
+# 2to3
+# modified by Justin Peel
+#
+# fasta Python 3 #3 program:
+# modified by Ian Osgood
+# modified again by Heinrich Acker
+# modified by Justin Peel
+# Modified by Christopher Sean Forgeron
+
+# Python imports
+import bisect
+import optparse
+import os.path
+import re
+import time
+
+# Local imports
+import util
+from compat import xrange
+
+
+alu = (
+   'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG'
+   'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA'
+   'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT'
+   'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA'
+   'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG'
+   'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC'
+   'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA')
+
+iub = list(zip('acgtBDHKMNRSVWY', [0.27, 0.12, 0.12, 0.27] + [0.02] * 11))
+
+homosapiens = [
+    ('a', 0.3029549426680),
+    ('c', 0.1979883004921),
+    ('g', 0.1975473066391),
+    ('t', 0.3015094502008),
+]
+
+
+def make_cumulative(table):
+    P = []
+    C = []
+    prob = 0.
+    for char, p in table:
+        prob += p
+        P += [prob]
+        C += [ord(char)]
+    return (P, C)
+
+
+def repeat_fasta(src, n, nprint):
+    width = 60
+
+    is_trailing_line = False
+    count_modifier = 0.0
+
+    len_of_src = len(src)
+    ss = src + src + src[:n % len_of_src]
+    # CSF - It's faster to work with a bytearray than a string
+    s = bytearray(ss, encoding='utf8')
+
+    if n % width:
+        # We don't end on a 60 char wide line
+        is_trailing_line = True
+        count_modifier = 1.0
+
+    # CSF - Here we are stuck with using an int instead of a float for the loop,
+    # but testing showed it still to be faster than a for loop
+    count = 0
+    end = (n / float(width)) - count_modifier
+    while count < end:
+        i = count*60 % len_of_src
+        nprint(s[i:i+60] + b'\n')
+        count += 1
+    if is_trailing_line:
+        nprint(s[-(n % width):] + b'\n')
+
+
+def random_fasta(table, n, seed, nprint):
+    width = 60
+    r = range(width)
+    bb = bisect.bisect
+
+    # If we don't have a multiple of the width, then we will have a trailing
+    # line, which needs a slightly different approach
+    is_trailing_line = False
+    count_modifier = 0.0
+
+    line = bytearray(width + 1)    # Width of 60 + 1 for the \n char
+
+    probs, chars = make_cumulative(table)
+
+    # pRNG Vars
+    im = 139968.0
+    #seed = 42.0
+
+    if n % width:
+        # We don't end on a 60 char wide line
+        is_trailing_line = True
+        count_modifier = 1.0
+
+    # CSF - Loops with a high iteration count run faster as a while/float loop.
+    count = 0.0
+    end = (n / float(width)) - count_modifier
+    while count < end:
+        # CSF - Low iteration count loops may run faster as a for loop.
+        for i in r:
+            # CSF - Python is faster for all float math than it is for int, on my
+            # machine at least.
+            seed = (seed * 3877.0 + 29573.0) % 139968.0
+            # CSF - While real values, not variables are faster for most things, on my
+            # machine, it's faster to have 'im' already in a var
+            line[i] = chars[bb(probs, seed / im)]
+
+        line[60] = 10   # End of Line
+        nprint(line)
+        count += 1.0
+
+    if is_trailing_line:
+        for i in range(n % width):
+            seed = (seed * 3877.0 + 29573.0) % 139968.0
+            line[i] = chars[bb(probs, seed / im)]
+
+        nprint(line[:i+1] + b"\n")
+
+    return seed
+
+
+def init_benchmarks(n):
+    result = bytearray()
+    nprint = result.extend
+    nprint(b'>ONE Homo sapiens alu\n')
+    repeat_fasta(alu, n * 2, nprint=nprint)
+
+    # We need to keep track of the state of 'seed' so we pass it in, and return
+    # it back so our output can pass the diff test
+    nprint(b'>TWO IUB ambiguity codes\n')
+    seed = random_fasta(iub, n * 3, seed=42.0, nprint=nprint)
+
+    nprint(b'>THREE Homo sapiens frequency\n')
+    random_fasta(homosapiens, n * 5, seed, nprint=nprint)
+
+    return bytes(result)
+
+
+variants = (
+    b'agggtaaa|tttaccct',
+    b'[cgt]gggtaaa|tttaccc[acg]',
+    b'a[act]ggtaaa|tttacc[agt]t',
+    b'ag[act]gtaaa|tttac[agt]ct',
+    b'agg[act]taaa|ttta[agt]cct',
+    b'aggg[acg]aaa|ttt[cgt]ccct',
+    b'agggt[cgt]aa|tt[acg]accct',
+    b'agggta[cgt]a|t[acg]taccct',
+    b'agggtaa[cgt]|[acg]ttaccct',
+)
+
+subst = (
+    (b'B', b'(c|g|t)'), (b'D', b'(a|g|t)'), (b'H', b'(a|c|t)'),
+    (b'K', b'(g|t)'), (b'M', b'(a|c)'), (b'N', b'(a|c|g|t)'),
+    (b'R', b'(a|g)'), (b'S', b'(c|g)'), (b'V', b'(a|c|g)'),
+    (b'W', b'(a|t)'), (b'Y', b'(c|t)'),
+)
+
+def run_benchmarks(seq):
+    ilen = len(seq)
+
+    seq = re.sub(b'>.*\n|\n', b'', seq)
+    clen = len(seq)
+
+    results = []
+    for f in variants:
+        results.append(len(re.findall(f, seq)))
+
+    for f, r in subst:
+        seq = re.sub(f, r, seq)
+
+    return results, ilen, clen, len(seq)
+
+
+def test_regex_dna(iterations, timer):
+    seq = init_benchmarks(100000)
+    assert len(seq) == 1016745
+
+    # Warm up.
+    res = run_benchmarks(seq)
+    assert res == ([6, 26, 86, 58, 113, 31, 31, 32, 43], 1016745, 1000000, 1336326)
+
+    times = []
+    for i in xrange(iterations):
+        t0 = timer()
+        run_benchmarks(seq)
+        t1 = timer()
+        times.append(t1 - t0)
+    return times
+
+
+if __name__ == '__main__':
+    parser = optparse.OptionParser(
+        usage="%prog [options]",
+        description=("Test the performance of regexps using benchmarks "
+                     "from The Computer Language Benchmarks Game."))
+    util.add_standard_options_to(parser)
+    options, args = parser.parse_args()
+
+    util.run_benchmark(options, options.num_runs, test_regex_dna)