diff -r 6a997c2b8eb6 Lib/_strptime.py
--- a/Lib/_strptime.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/_strptime.py	Sat Jul 05 23:02:45 2008 +0200
@@ -14,7 +14,7 @@ import locale
 import locale
 import calendar
 from re import compile as re_compile
-from re import IGNORECASE
+from re import IGNORECASE, ASCII
 from re import escape as re_escape
 from datetime import date as datetime_date
 try:
@@ -262,7 +262,7 @@ class TimeRE(dict):
 
     def compile(self, format):
         """Return a compiled re object for the format string."""
-        return re_compile(self.pattern(format), IGNORECASE)
+        return re_compile(self.pattern(format), IGNORECASE | ASCII)
 
 _cache_lock = _thread_allocate_lock()
 # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
diff -r 6a997c2b8eb6 Lib/base64.py
--- a/Lib/base64.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/base64.py	Sat Jul 05 23:02:45 2008 +0200
@@ -39,7 +39,7 @@ def _translate(s, altchars):
     return s.translate(translation)
 
 
-
+
 # Base64 encoding/decoding uses binascii
 
 def b64encode(s, altchars=None):
@@ -126,7 +126,7 @@ def urlsafe_b64decode(s):
     return b64decode(s, b'-_')
 
 
-
+
 # Base32 encoding/decoding must be done in Python
 _b32alphabet = {
     0: b'A',  9: b'J', 18: b'S', 27: b'3',
@@ -225,7 +225,7 @@ def b32decode(s, casefold=False, map01=N
     # characters because this will tell us how many null bytes to remove from
     # the end of the decoded string.
     padchars = 0
-    mo = re.search('(?P<pad>[=]*)$', s)
+    mo = re.search(b'(?P<pad>[=]*)$', s)
     if mo:
         padchars = len(mo.group('pad'))
         if padchars > 0:
@@ -262,7 +262,7 @@ def b32decode(s, casefold=False, map01=N
     return b''.join(parts)
 
 
-
+
 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
 # lowercase.  The RFC also recommends against accepting input case
 # insensitively.
@@ -291,12 +291,12 @@ def b16decode(s, casefold=False):
         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
     if casefold:
         s = s.upper()
-    if re.search('[^0-9A-F]', s):
+    if re.search(b'[^0-9A-F]', s):
         raise binascii.Error('Non-base16 digit found')
     return binascii.unhexlify(s)
 
 
-
+
 # Legacy interface.  This code could be cleaned up since I don't believe
 # binascii has any line length limitations.  It just doesn't seem worth it
 # though.  The files should be opened in binary mode.
@@ -353,7 +353,7 @@ def decodestring(s):
     return binascii.a2b_base64(s)
 
 
-
+
 # Usable as a script...
 def main():
     """Small main program"""
diff -r 6a997c2b8eb6 Lib/decimal.py
--- a/Lib/decimal.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/decimal.py	Sat Jul 05 23:02:45 2008 +0200
@@ -5417,7 +5417,7 @@ ExtendedContext = Context(
 # 2. For finite numbers (not infinities and NaNs) the body of the
 # number between the optional sign and the optional exponent must have
 # at least one decimal digit, possibly after the decimal point.  The
-# lookahead expression '(?=\d|\.\d)' checks this.
+# lookahead expression '(?=[0-9]|\.[0-9])' checks this.
 #
 # As the flag UNICODE is not enabled here, we're explicitly avoiding any
 # other meaning for \d than the numbers [0-9].
diff -r 6a997c2b8eb6 Lib/distutils/cygwinccompiler.py
--- a/Lib/distutils/cygwinccompiler.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/cygwinccompiler.py	Sat Jul 05 23:02:45 2008 +0200
@@ -409,7 +409,7 @@ def get_versions():
         out = os.popen(gcc_exe + ' -dumpversion','r')
         out_string = out.read()
         out.close()
-        result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
+        result = re.search('(\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
         if result:
             gcc_version = StrictVersion(result.group(1))
         else:
@@ -421,7 +421,7 @@ def get_versions():
         out = os.popen(ld_exe + ' -v','r')
         out_string = out.read()
         out.close()
-        result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
+        result = re.search('(\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
         if result:
             ld_version = StrictVersion(result.group(1))
         else:
@@ -433,7 +433,7 @@ def get_versions():
         out = os.popen(dllwrap_exe + ' --version','r')
         out_string = out.read()
         out.close()
-        result = re.search(' (\d+\.\d+(\.\d+)*)',out_string)
+        result = re.search(' (\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
         if result:
             dllwrap_version = StrictVersion(result.group(1))
         else:
diff -r 6a997c2b8eb6 Lib/distutils/emxccompiler.py
--- a/Lib/distutils/emxccompiler.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/emxccompiler.py	Sat Jul 05 23:02:45 2008 +0200
@@ -300,7 +300,7 @@ def get_versions():
         out = os.popen(gcc_exe + ' -dumpversion','r')
         out_string = out.read()
         out.close()
-        result = re.search('(\d+\.\d+\.\d+)',out_string)
+        result = re.search('(\d+\.\d+\.\d+)', out_string, re.ASCII)
         if result:
             gcc_version = StrictVersion(result.group(1))
         else:
diff -r 6a997c2b8eb6 Lib/distutils/sysconfig.py
--- a/Lib/distutils/sysconfig.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/sysconfig.py	Sat Jul 05 23:02:45 2008 +0200
@@ -512,7 +512,7 @@ def get_config_vars(*args):
                         # patched up as well.
                         'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
                     flags = _config_vars[key]
-                    flags = re.sub('-arch\s+\w+\s', ' ', flags)
+                    flags = re.sub('-arch\s+\w+\s', ' ', flags, re.ASCII)
                     flags = re.sub('-isysroot [^ \t]*', ' ', flags)
                     _config_vars[key] = flags
 
diff -r 6a997c2b8eb6 Lib/distutils/util.py
--- a/Lib/distutils/util.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/util.py	Sat Jul 05 23:02:45 2008 +0200
@@ -81,7 +81,7 @@ def get_platform ():
         return "%s-%s.%s" % (osname, version, release)
     elif osname[:6] == "cygwin":
         osname = "cygwin"
-        rel_re = re.compile (r'[\d.]+')
+        rel_re = re.compile (r'[\d.]+', re.ASCII)
         m = rel_re.match(release)
         if m:
             release = m.group()
diff -r 6a997c2b8eb6 Lib/distutils/version.py
--- a/Lib/distutils/version.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/version.py	Sat Jul 05 23:02:45 2008 +0200
@@ -134,7 +134,7 @@ class StrictVersion (Version):
     """
 
     version_re = re.compile(r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$',
-                            re.VERBOSE)
+                            re.VERBOSE | re.ASCII)
 
 
     def parse (self, vstring):
diff -r 6a997c2b8eb6 Lib/distutils/versionpredicate.py
--- a/Lib/distutils/versionpredicate.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/distutils/versionpredicate.py	Sat Jul 05 23:02:45 2008 +0200
@@ -5,7 +5,8 @@ import operator
 import operator
 
 
-re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)")
+re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)",
+    re.ASCII)
 # (package) (rest)
 
 re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses
@@ -153,7 +154,8 @@ def split_provision(value):
     global _provision_rx
     if _provision_rx is None:
         _provision_rx = re.compile(
-            "([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$")
+            "([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$",
+            re.ASCII)
     value = value.strip()
     m = _provision_rx.match(value)
     if not m:
diff -r 6a997c2b8eb6 Lib/email/quoprimime.py
--- a/Lib/email/quoprimime.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/email/quoprimime.py	Sat Jul 05 23:02:45 2008 +0200
@@ -70,7 +70,7 @@ for c in (b' !"#$%&\'()*+,-./0123456789:
     _QUOPRI_BODY_MAP[c] = chr(c)
 
 
-
+
 # Helpers
 def header_check(octet):
     """Return True if the octet should be escaped with header quopri."""
@@ -125,7 +125,7 @@ def quote(c):
     return '=%02X' % ord(c)
 
 
-
+
 def header_encode(header_bytes, charset='iso-8859-1'):
     """Encode a single header line with quoted-printable (like) encoding.
 
@@ -149,7 +149,7 @@ def header_encode(header_bytes, charset=
     return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
 
 
-
+
 def body_encode(body, maxlinelen=76, eol=NL):
     """Encode with quoted-printable, wrapping at maxlinelen characters.
 
@@ -225,7 +225,7 @@ def body_encode(body, maxlinelen=76, eol
     return encoded_body
 
 
-
+
 # BAW: I'm not sure if the intent was for the signature of this function to be
 # the same as base64MIME.decode() or not...
 def decode(encoded, eol=NL):
@@ -280,7 +280,7 @@ decodestring = decode
 decodestring = decode
 
 
-
+
 def _unquote_match(match):
     """Turn a match in the form =AB to the ASCII character with value 0xab"""
     s = match.group(0)
@@ -296,4 +296,4 @@ def header_decode(s):
     the high level email.Header class for that functionality.
     """
     s = s.replace('_', ' ')
-    return re.sub(r'=\w{2}', _unquote_match, s)
+    return re.sub(r'=\w{2}', _unquote_match, s, re.ASCII)
diff -r 6a997c2b8eb6 Lib/email/utils.py
--- a/Lib/email/utils.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/email/utils.py	Sat Jul 05 23:02:45 2008 +0200
@@ -52,7 +52,7 @@ escapesre = re.compile(r'[][\\()"]')
 escapesre = re.compile(r'[][\\()"]')
 
 
-
+
 # Helpers
 
 def formataddr(pair):
@@ -73,7 +73,7 @@ def formataddr(pair):
     return address
 
 
-
+
 def getaddresses(fieldvalues):
     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
     all = COMMASPACE.join(fieldvalues)
@@ -81,7 +81,7 @@ def getaddresses(fieldvalues):
     return a.addresslist
 
 
-
+
 ecre = re.compile(r'''
   =\?                   # literal =?
   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
@@ -93,7 +93,7 @@ ecre = re.compile(r'''
   ''', re.VERBOSE | re.IGNORECASE)
 
 
-
+
 def formatdate(timeval=None, localtime=False, usegmt=False):
     """Returns a date string as specified by RFC 2822, e.g.:
 
@@ -146,7 +146,7 @@ def formatdate(timeval=None, localtime=F
         zone)
 
 
-
+
 def make_msgid(idstring=None):
     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
 
@@ -168,7 +168,7 @@ def make_msgid(idstring=None):
     return msgid
 
 
-
+
 # These functions are in the standalone mimelib version only because they've
 # subsequently been fixed in the latest Python versions.  We use this to worm
 # around broken older Pythons.
@@ -202,7 +202,7 @@ def unquote(str):
     return str
 
 
-
+
 # RFC2231-related functions - parameter encoding and decoding
 def decode_rfc2231(s):
     """Decode string according to RFC 2231"""
@@ -227,7 +227,8 @@ def encode_rfc2231(s, charset=None, lang
     return "%s'%s'%s" % (charset, language, s)
 
 
-rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
+rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
+    re.ASCII)
 
 def decode_params(params):
     """Decode parameters list according to RFC 2231.
diff -r 6a997c2b8eb6 Lib/encodings/idna.py
--- a/Lib/encodings/idna.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/encodings/idna.py	Sat Jul 05 23:02:45 2008 +0200
@@ -176,12 +176,10 @@ class Codec(codecs.Codec):
             return "", 0
 
         # IDNA allows decoding to operate on Unicode strings, too.
-        if isinstance(input, bytes):
-            labels = dots.split(input)
-        else:
-            # Force to bytes
+        if not isinstance(input, bytes):
+            # XXX obviously wrong, see #3232
             input = bytes(input)
-            labels = input.split(b".")
+        labels = input.split(b".")
 
         if labels and len(labels[-1]) == 0:
             trailing_dot = '.'
diff -r 6a997c2b8eb6 Lib/ftplib.py
--- a/Lib/ftplib.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/ftplib.py	Sat Jul 05 23:02:45 2008 +0200
@@ -590,7 +590,8 @@ def parse150(resp):
     global _150_re
     if _150_re is None:
         import re
-        _150_re = re.compile("150 .* \((\d+) bytes\)", re.IGNORECASE)
+        _150_re = re.compile(
+            "150 .* \((\d+) bytes\)", re.IGNORECASE | re.ASCII)
     m = _150_re.match(resp)
     if not m:
         return None
@@ -613,7 +614,7 @@ def parse227(resp):
     global _227_re
     if _227_re is None:
         import re
-        _227_re = re.compile(r'(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)')
+        _227_re = re.compile(r'(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)', re.ASCII)
     m = _227_re.search(resp)
     if not m:
         raise error_proto(resp)
diff -r 6a997c2b8eb6 Lib/html/parser.py
--- a/Lib/html/parser.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/html/parser.py	Sat Jul 05 23:02:45 2008 +0200
@@ -385,4 +385,4 @@ class HTMLParser(_markupbase.ParserBase)
                     return '&'+s+';'
 
         return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
-                      replaceEntities, s)
+                      replaceEntities, s, re.ASCII)
diff -r 6a997c2b8eb6 Lib/http/cookiejar.py
--- a/Lib/http/cookiejar.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/http/cookiejar.py	Sat Jul 05 23:02:45 2008 +0200
@@ -121,7 +121,7 @@ def time2netscape(t=None):
 
 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
 
-TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
 def offset_from_tz_string(tz):
     offset = None
     if tz in UTC_ZONES:
@@ -191,9 +191,9 @@ def _str2time(day, mon, yr, hr, min, sec
 
 STRICT_DATE_RE = re.compile(
     r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
-    "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+    "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
 WEEKDAY_RE = re.compile(
-    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
 LOOSE_HTTP_DATE_RE = re.compile(
     r"""^
     (\d\d?)            # day
@@ -210,7 +210,7 @@ LOOSE_HTTP_DATE_RE = re.compile(
     ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
        \s*
     (?:\(\w+\))?       # ASCII representation of timezone in parens.
-       \s*$""", re.X)
+       \s*$""", re.X | re.ASCII)
 def http2time(text):
     """Returns time in seconds since epoch of time represented by a string.
 
@@ -282,7 +282,7 @@ ISO_DATE_RE = re.compile(
       \s*
    ([-+]?\d\d?:?(:?\d\d)?
     |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
-      \s*$""", re.X)
+      \s*$""", re.X | re. ASCII)
 def iso2time(text):
     """
     As for http2time, but parses the ISO 8601 formats:
@@ -489,7 +489,7 @@ def parse_ns_headers(ns_headers):
     return result
 
 
-IPV4_RE = re.compile(r"\.\d+$")
+IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
 def is_HDN(text):
     """Return True if text is a host domain name."""
     # XXX
@@ -574,7 +574,7 @@ def user_domain_match(A, B):
         return True
     return False
 
-cut_port_re = re.compile(r":\d+$")
+cut_port_re = re.compile(r":\d+$", re.ASCII)
 def request_host(request):
     """Return request-host, as defined by RFC 2965.
 
@@ -1207,7 +1207,7 @@ class CookieJar:
     domain_re = re.compile(r"[^.]*")
     dots_re = re.compile(r"^\.+")
 
-    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
+    magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
 
     def __init__(self, policy=None):
         if policy is None:
@@ -1856,7 +1856,7 @@ class LWPCookieJar(FileCookieJar):
 
     def _really_load(self, f, filename, ignore_discard, ignore_expires):
         magic = f.readline()
-        if not re.search(self.magic_re, magic):
+        if not self.magic_re.search(magic):
             msg = ("%r does not look like a Set-Cookie3 (LWP) format "
                    "file" % filename)
             raise LoadError(msg)
@@ -1965,7 +1965,7 @@ class MozillaCookieJar(FileCookieJar):
     header by default (Mozilla can cope with that).
 
     """
-    magic_re = "#( Netscape)? HTTP Cookie File"
+    magic_re = re.compile("#( Netscape)? HTTP Cookie File")
     header = """\
     # Netscape HTTP Cookie File
     # http://www.netscape.com/newsref/std/cookie_spec.html
@@ -1977,7 +1977,7 @@ class MozillaCookieJar(FileCookieJar):
         now = time.time()
 
         magic = f.readline()
-        if not re.search(self.magic_re, magic):
+        if not self.magic_re.search(magic):
             f.close()
             raise LoadError(
                 "%r does not look like a Netscape format cookies file" %
diff -r 6a997c2b8eb6 Lib/http/cookies.py
--- a/Lib/http/cookies.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/http/cookies.py	Sat Jul 05 23:02:45 2008 +0200
@@ -445,7 +445,7 @@ _CookiePattern = re.compile(
     ""+ _LegalCharsPatt +"*"        # Any word or empty string
     r")"                          # End of group 'val'
     r"\s*;?"                      # Probably ending in a semi-colon
-    )
+    , re.ASCII)                   # May be removed if safe.
 
 
 # At long last, here is the cookie class.
diff -r 6a997c2b8eb6 Lib/imaplib.py
--- a/Lib/imaplib.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/imaplib.py	Sat Jul 05 23:02:45 2008 +0200
@@ -88,11 +88,12 @@ InternalDate = re.compile(r'.*INTERNALDA
         r' (?P<hour>[0-9][0-9]):(?P<min>[0-9][0-9]):(?P<sec>[0-9][0-9])'
         r' (?P<zonen>[-+])(?P<zoneh>[0-9][0-9])(?P<zonem>[0-9][0-9])'
         r'"')
-Literal = re.compile(r'.*{(?P<size>\d+)}$')
+Literal = re.compile(r'.*{(?P<size>\d+)}$', re.ASCII)
 MapCRLF = re.compile(r'\r\n|\r|\n')
 Response_code = re.compile(r'\[(?P<type>[A-Z-]+)( (?P<data>[^\]]*))?\]')
 Untagged_response = re.compile(r'\* (?P<type>[A-Z-]+)( (?P<data>.*))?')
-Untagged_status = re.compile(r'\* (?P<data>\d+) (?P<type>[A-Z-]+)( (?P<data2>.*))?')
+Untagged_status = re.compile(
+    r'\* (?P<data>\d+) (?P<type>[A-Z-]+)( (?P<data2>.*))?', re.ASCII)
 
 
 
@@ -146,7 +147,7 @@ class IMAP4:
     class abort(error): pass        # Service errors - close and retry
     class readonly(abort): pass     # Mailbox status changed to READ-ONLY
 
-    mustquote = re.compile(r"[^\w!#$%&'*+,.:;<=>?^`|~-]")
+    mustquote = re.compile(r"[^\w!#$%&'*+,.:;<=>?^`|~-]", re.ASCII)
 
     def __init__(self, host = '', port = IMAP4_PORT):
         self.debug = Debug
@@ -168,7 +169,7 @@ class IMAP4:
         self.tagpre = Int2AP(random.randint(4096, 65535))
         self.tagre = re.compile(r'(?P<tag>'
                         + self.tagpre
-                        + r'\d+) (?P<type>[A-Z]+) (?P<data>.*)')
+                        + r'\d+) (?P<type>[A-Z]+) (?P<data>.*)', re.ASCII)
 
         # Get server welcome message,
         # request and store CAPABILITY response.
diff -r 6a997c2b8eb6 Lib/json/decoder.py
--- a/Lib/json/decoder.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/json/decoder.py	Sat Jul 05 23:02:45 2008 +0200
@@ -67,7 +67,7 @@ def JSONNumber(match, context):
         fn = getattr(context, 'parse_int', None) or int
         res = fn(integer)
     return res, None
-pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
+pattern(r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?')(JSONNumber)
 
 
 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
diff -r 6a997c2b8eb6 Lib/logging/handlers.py
--- a/Lib/logging/handlers.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/logging/handlers.py	Sat Jul 05 23:02:45 2008 +0200
@@ -199,7 +199,7 @@ class TimedRotatingFileHandler(BaseRotat
         else:
             raise ValueError("Invalid rollover interval specified: %s" % self.when)
 
-        self.extMatch = re.compile(self.extMatch)
+        self.extMatch = re.compile(self.extMatch, re.ASCII)
         self.interval = self.interval * interval # multiply by units requested
         self.rolloverAt = currentTime + self.interval
 
diff -r 6a997c2b8eb6 Lib/platform.py
--- a/Lib/platform.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/platform.py	Sat Jul 05 23:02:45 2008 +0200
@@ -118,7 +118,7 @@ _libc_search = re.compile(r'(__libc_init
                           '|'
                           '(GLIBC_([0-9.]+))'
                           '|'
-                          '(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)')
+                          '(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)', re.ASCII)
 
 def libc_ver(executable=sys.executable,lib='',version='',
 
@@ -223,15 +223,15 @@ def _dist_try_harder(distname,version,id
 
     return distname,version,id
 
-_release_filename = re.compile(r'(\w+)[-_](release|version)')
+_release_filename = re.compile(r'(\w+)[-_](release|version)', re.ASCII)
 _lsb_release_version = re.compile(r'(.+)'
                                    ' release '
                                    '([\d.]+)'
-                                   '[^(]*(?:\((.+)\))?')
+                                   '[^(]*(?:\((.+)\))?', re.ASCII)
 _release_version = re.compile(r'([^0-9]+)'
                                '(?: release )?'
                                '([\d.]+)'
-                               '[^(]*(?:\((.+)\))?')
+                               '[^(]*(?:\((.+)\))?', re.ASCII)
 
 # See also http://www.novell.com/coolsolutions/feature/11251.html
 # and http://linuxmafia.com/faq/Admin/release-files.html
@@ -464,7 +464,7 @@ def _norm_version(version, build=''):
 
 _ver_output = re.compile(r'(?:([\w ]+) ([\w.]+) '
                          '.*'
-                         'Version ([\d.]+))')
+                         'Version ([\d.]+))', re.ASCII)
 
 def _syscmd_ver(system='', release='', version='',
 
@@ -1253,16 +1253,16 @@ _sys_version_parser = re.compile(
 _sys_version_parser = re.compile(
     r'([\w.+]+)\s*'
     '\(#?([^,]+),\s*([\w ]+),\s*([\w :]+)\)\s*'
-    '\[([^\]]+)\]?')
+    '\[([^\]]+)\]?', re.ASCII)
 
 _jython_sys_version_parser = re.compile(
-    r'([\d\.]+)')
+    r'([\d\.]+)', re.ASCII)
 
 _ironpython_sys_version_parser = re.compile(
     r'IronPython\s*'
     '([\d\.]+)'
     '(?: \(([\d\.]+)\))?'
-    ' on (.NET [\d\.]+)')
+    ' on (.NET [\d\.]+)', re.ASCII)
 
 _sys_version_cache = {}
 
diff -r 6a997c2b8eb6 Lib/plistlib.py
--- a/Lib/plistlib.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/plistlib.py	Sat Jul 05 23:02:45 2008 +0200
@@ -147,7 +147,7 @@ class DumbXMLWriter:
 # Contents should conform to a subset of ISO 8601
 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units may be omitted with
 #  a loss of precision)
-_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z")
+_dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
 
 def _dateFromString(s):
     order = ('year', 'month', 'day', 'hour', 'minute', 'second')
diff -r 6a997c2b8eb6 Lib/posixpath.py
--- a/Lib/posixpath.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/posixpath.py	Sat Jul 05 23:02:45 2008 +0200
@@ -241,7 +241,7 @@ def expandvars(path):
         return path
     if not _varprog:
         import re
-        _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
+        _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
     i = 0
     while True:
         m = _varprog.search(path, i)
diff -r 6a997c2b8eb6 Lib/py_compile.py
--- a/Lib/py_compile.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/py_compile.py	Sat Jul 05 23:02:45 2008 +0200
@@ -86,7 +86,7 @@ def read_encoding(file, default):
             line = f.readline()
             if not line:
                 break
-            m = re.match(r".*\bcoding:\s*(\S+)\b", line)
+            m = re.match(br".*\bcoding:\s*(\S+)\b", line)
             if m:
                 return m.group(1).decode("ascii")
         return default
diff -r 6a997c2b8eb6 Lib/re.py
--- a/Lib/re.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/re.py	Sat Jul 05 23:02:45 2008 +0200
@@ -64,11 +64,18 @@ resulting RE will match the second chara
     \Z       Matches only at the end of the string.
     \b       Matches the empty string, but only at the start or end of a word.
     \B       Matches the empty string, but not at the start or end of a word.
-    \d       Matches any decimal digit; equivalent to the set [0-9].
-    \D       Matches any non-digit character; equivalent to the set [^0-9].
+    \d       Matches any decimal digit; equivalent to the set [0-9] in
+             bytes patterns or string patterns with the ASCII flag.
+             In string patterns without the ASCII flag, it will match the whole
+             range of Unicode digits.
+    \D       Matches any non-digit character; equivalent to [^\d].
     \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
     \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
-    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
+    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]
+             in bytes patterns or string patterns with the ASCII flag.
+             In string patterns without the ASCII flag, it will match the
+             range of Unicode alphanumeric characters (letters plus digits
+             plus underscore).
              With LOCALE, it will match the set [0-9_] plus characters defined
              as letters for the current locale.
     \W       Matches the complement of \w.
@@ -87,6 +94,12 @@ This module exports the following functi
     escape   Backslash all non-alphanumerics in a string.
 
 Some of the functions in this module takes flags as optional parameters:
+    A  ASCII       For string patterns, make \w, \W, \b, \B, \d, \D
+                   match the corresponding ASCII character categories
+                   (rather than the whole Unicode categories, which is the
+                   default).
+                   For bytes patterns, this flag is the only available
+                   behaviour and needn't be specified.
     I  IGNORECASE  Perform case-insensitive matching.
     L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
     M  MULTILINE   "^" matches the beginning of lines (after a newline)
@@ -95,7 +108,8 @@ Some of the functions in this module tak
                    as the end of the string.
     S  DOTALL      "." matches any character at all, including the newline.
     X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
-    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
+    U  UNICODE     For compatibility only. Ignored for string patterns (it
+                   is the default), and forbidden for bytes patterns.
 
 This module also defines an exception 'error'.
 
@@ -107,16 +121,17 @@ import sre_parse
 
 # public symbols
 __all__ = [ "match", "search", "sub", "subn", "split", "findall",
-    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
-    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+    "compile", "purge", "template", "escape", "A", "I", "L", "M", "S", "X",
+    "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
     "UNICODE", "error" ]
 
 __version__ = "2.2.1"
 
 # flags
+A = ASCII = sre_compile.SRE_FLAG_ASCII # assume ascii "locale"
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode "locale"
 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
 S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
diff -r 6a997c2b8eb6 Lib/sre_constants.py
--- a/Lib/sre_constants.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/sre_constants.py	Sat Jul 05 23:02:45 2008 +0200
@@ -207,9 +207,10 @@ SRE_FLAG_LOCALE = 4 # honour system loca
 SRE_FLAG_LOCALE = 4 # honour system locale
 SRE_FLAG_MULTILINE = 8 # treat target as multiline string
 SRE_FLAG_DOTALL = 16 # treat target as a single string
-SRE_FLAG_UNICODE = 32 # use unicode locale
+SRE_FLAG_UNICODE = 32 # use unicode "locale"
 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
 SRE_FLAG_DEBUG = 128 # debugging
+SRE_FLAG_ASCII = 256 # use ascii "locale"
 
 # flags for INFO primitive
 SRE_INFO_PREFIX = 1 # has prefix
diff -r 6a997c2b8eb6 Lib/sre_parse.py
--- a/Lib/sre_parse.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/sre_parse.py	Sat Jul 05 23:02:45 2008 +0200
@@ -200,7 +200,7 @@ class Tokenizer:
             except IndexError:
                 raise error("bogus escape (end of line)")
             if isinstance(self.string, bytes):
-                char = chr(c)
+                c = chr(c)
             char = char + c
         self.index = self.index + len(char)
         self.next = char
@@ -672,6 +672,18 @@ def _parse(source, state):
 
     return subpattern
 
+def fix_flags(src, flags):
+    # Check and fix flags according to the type of pattern (str or bytes)
+    if isinstance(src, str):
+        if not flags & SRE_FLAG_ASCII:
+            flags |= SRE_FLAG_UNICODE
+        elif flags & SRE_FLAG_UNICODE:
+            raise ValueError("ASCII and UNICODE flags are incompatible")
+    else:
+        if flags & SRE_FLAG_UNICODE:
+            raise ValueError("can't use UNICODE flag with a bytes pattern")
+    return flags
+
 def parse(str, flags=0, pattern=None):
     # parse 're' pattern into list of (opcode, argument) tuples
 
@@ -683,6 +695,7 @@ def parse(str, flags=0, pattern=None):
     pattern.str = str
 
     p = _parse_sub(source, pattern, 0)
+    p.pattern.flags = fix_flags(str, p.pattern.flags)
 
     tail = source.get()
     if tail == ")":
diff -r 6a997c2b8eb6 Lib/tarfile.py
--- a/Lib/tarfile.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/tarfile.py	Sat Jul 05 23:02:45 2008 +0200
@@ -1368,7 +1368,7 @@ class TarInfo(object):
         # "%d %s=%s\n" % (length, keyword, value). length is the size
         # of the complete record including the length field itself and
         # the newline. keyword and value are both UTF-8 encoded strings.
-        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
+        regex = re.compile(br"(\d+) ([^=]+)=")
         pos = 0
         while True:
             match = regex.match(buf, pos)
diff -r 6a997c2b8eb6 Lib/test/re_tests.py
--- a/Lib/test/re_tests.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/test/re_tests.py	Sat Jul 05 23:02:45 2008 +0200
@@ -661,14 +661,10 @@ 123""", SUCCEED, 'found', 'abc'),
     ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
 ]
 
-try:
-    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
-    pass
-else:
-    tests.extend([
+u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
+tests.extend([
     # bug 410271: \b broken under locales
     (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
     (r'(?u)\b.\b', u, SUCCEED, 'found', u),
     (r'(?u)\w', u, SUCCEED, 'found', u),
-    ])
+])
diff -r 6a997c2b8eb6 Lib/test/test_bytes.py
--- a/Lib/test/test_bytes.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/test/test_bytes.py	Sat Jul 05 23:02:45 2008 +0200
@@ -498,7 +498,7 @@ class ByteArrayTest(BaseBytesTest):
         def by(s):
             return bytearray(map(ord, s))
         b = by("Hello, world")
-        self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
+        self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")])
 
     def test_setitem(self):
         b = bytearray([1, 2, 3])
diff -r 6a997c2b8eb6 Lib/test/test_mmap.py
--- a/Lib/test/test_mmap.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/test/test_mmap.py	Sat Jul 05 23:02:45 2008 +0200
@@ -54,7 +54,7 @@ class MmapTests(unittest.TestCase):
         m.flush()
 
         # Test doing a regular expression match in an mmap'ed file
-        match = re.search('[A-Za-z]+', m)
+        match = re.search(b'[A-Za-z]+', m)
         if match is None:
             self.fail('regex match on mmap failed!')
         else:
diff -r 6a997c2b8eb6 Lib/test/test_re.py
--- a/Lib/test/test_re.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/test/test_re.py	Sat Jul 05 23:02:45 2008 +0200
@@ -82,23 +82,6 @@ class ReTests(unittest.TestCase):
                          'abc\ndef\n')
         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                          'abc\ndef\n')
-
-    def test_bug_1140(self):
-        # re.sub(x, y, b'') should return b'', not '', and
-        # re.sub(x, y, '') should return '', not b''.
-        # Also:
-        # re.sub(x, y, str(x)) should return str(y), and
-        # re.sub(x, y, bytes(x)) should return
-        #     str(y) if isinstance(y, str) else unicode(y).
-        for x in 'x',  b'x':
-            for y in 'y', b'y':
-                z = re.sub(x, y, b'')
-                self.assertEqual(z, b'')
-                self.assertEqual(type(z), bytes)
-                #
-                z = re.sub(x, y, '')
-                self.assertEqual(z, '')
-                self.assertEqual(type(z), str)
 
     def test_bug_1661(self):
         # Verify that flags do not get silently ignored with compiled patterns
@@ -327,7 +310,7 @@ class ReTests(unittest.TestCase):
 
     def test_getattr(self):
         self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
-        self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I)
+        self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
         self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
         self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
         self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
@@ -614,8 +597,8 @@ class ReTests(unittest.TestCase):
         import array
         for typecode in 'bBuhHiIlLfd':
             a = array.array(typecode)
-            self.assertEqual(re.compile("bla").match(a), None)
-            self.assertEqual(re.compile("").match(a).groups(), ())
+            self.assertEqual(re.compile(b"bla").match(a), None)
+            self.assertEqual(re.compile(b"").match(a).groups(), ())
 
     def test_inline_flags(self):
         # Bug #1700
@@ -657,6 +640,43 @@ class ReTests(unittest.TestCase):
         self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
+
+    def test_bytes_str_mixing(self):
+        # Mixing str and bytes is disallowed
+        pat = re.compile('.')
+        bpat = re.compile(b'.')
+        self.assertRaises(TypeError, pat.match, b'b')
+        self.assertRaises(TypeError, bpat.match, 'b')
+        self.assertRaises(TypeError, pat.sub, b'b', 'c')
+        self.assertRaises(TypeError, pat.sub, 'b', b'c')
+        self.assertRaises(TypeError, pat.sub, b'b', b'c')
+        self.assertRaises(TypeError, bpat.sub, b'b', 'c')
+        self.assertRaises(TypeError, bpat.sub, 'b', b'c')
+        self.assertRaises(TypeError, bpat.sub, 'b', 'c')
+
+    def test_unicode_flag(self):
+        # String patterns
+        for flags in (0, re.UNICODE):
+            pat = re.compile('\xc0', flags | re.IGNORECASE)
+            self.assertNotEqual(pat.match('\xe0'), None)
+            pat = re.compile('\w', flags)
+            self.assertNotEqual(pat.match('\xe0'), None)
+        pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
+        self.assertEqual(pat.match('\xe0'), None)
+        pat = re.compile('\w', re.ASCII)
+        self.assertEqual(pat.match('\xe0'), None)
+        # Bytes patterns
+        for flags in (0, re.ASCII):
+            pat = re.compile(b'\xc0', re.IGNORECASE)
+            self.assertEqual(pat.match(b'\xe0'), None)
+            pat = re.compile(b'\w')
+            self.assertEqual(pat.match(b'\xe0'), None)
+        # Incompatibilities
+        self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
+        self.assertRaises(ValueError, re.compile, b'(?u)\w')
+        self.assertRaises(ValueError, re.compile, '\w',
+            re.UNICODE | re.ASCII)
+        self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
 
 
 def run_re_tests():
@@ -732,23 +752,25 @@ def run_re_tests():
                 else:
                     print('=== Failed incorrectly', t)
 
-                # Try the match on a unicode string, and check that it
-                # still succeeds.
+                # Try the match with both pattern and string converted to
+                # bytes, and check that it still succeeds.
                 try:
-                    result = obj.search(str(s, "latin-1"))
-                    if result is None:
-                        print('=== Fails on unicode match', t)
-                except NameError:
-                    continue # 1.5.2
-                except TypeError:
-                    continue # unicode test case
-
-                # Try the match on a unicode pattern, and check that it
-                # still succeeds.
-                obj=re.compile(str(pattern, "latin-1"))
-                result = obj.search(s)
-                if result is None:
-                    print('=== Fails on unicode pattern match', t)
+                    bpat = bytes(pattern, "ascii")
+                    bs = bytes(s, "ascii")
+                except UnicodeEncodeError:
+                    # skip non-ascii tests
+                    pass
+                else:
+                    try:
+                        bpat = re.compile(bpat)
+                    except Exception:
+                        print('=== Fails on bytes pattern compile', t)
+                        if verbose:
+                            traceback.print_exc(file=sys.stdout)
+                    else:
+                        bytes_result = bpat.search(bs)
+                        if bytes_result is None:
+                            print('=== Fails on bytes pattern match', t)
 
                 # Try the match with the search area limited to the extent
                 # of the match and see if it still succeeds.  \B will
@@ -771,10 +793,11 @@ def run_re_tests():
 
                 # Try the match with LOCALE enabled, and check that it
                 # still succeeds.
-                obj = re.compile(pattern, re.LOCALE)
-                result = obj.search(s)
-                if result is None:
-                    print('=== Fails on locale-sensitive match', t)
+                if '(?u)' not in pattern:
+                    obj = re.compile(pattern, re.LOCALE)
+                    result = obj.search(s)
+                    if result is None:
+                        print('=== Fails on locale-sensitive match', t)
 
                 # Try the match with UNICODE locale enabled, and check
                 # that it still succeeds.
diff -r 6a997c2b8eb6 Lib/tokenize.py
--- a/Lib/tokenize.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/tokenize.py	Sat Jul 05 23:02:45 2008 +0200
@@ -47,21 +47,23 @@ def any(*choices): return group(*choices
 def any(*choices): return group(*choices) + '*'
 def maybe(*choices): return group(*choices) + '?'
 
+# Note: we use unicode matching for names ("\w") but ascii matching for
+# number literals.
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Name = r'[a-zA-Z_]\w*'
 
-Hexnumber = r'0[xX][\da-fA-F]+'
+Hexnumber = r'0[xX][0-9a-fA-F]+'
 Binnumber = r'0[bB][01]+'
 Octnumber = r'0[oO][0-7]+'
-Decnumber = r'(?:0+|[1-9]\d*)'
+Decnumber = r'(?:0+|[1-9][0-9]*)'
 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?\d+'
-Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
-Expfloat = r'\d+' + Exponent
+Exponent = r'[eE][-+]?[0-9]+'
+Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
+Expfloat = r'[0-9]+' + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 
 # Tail end of ' string.
diff -r 6a997c2b8eb6 Lib/urllib/request.py
--- a/Lib/urllib/request.py	Sat Jul 05 17:55:00 2008 +0200
+++ b/Lib/urllib/request.py	Sat Jul 05 23:02:45 2008 +0200
@@ -141,7 +141,7 @@ def urlcleanup():
         _opener = None
 
 # copied from cookielib.py
-_cut_port_re = re.compile(r":\d+$")
+_cut_port_re = re.compile(r":\d+$", re.ASCII)
 def request_host(request):
     """Return request-host, as defined by RFC 2965.
 
diff -r 6a997c2b8eb6 Modules/_sre.c
--- a/Modules/_sre.c	Sat Jul 05 17:55:00 2008 +0200
+++ b/Modules/_sre.c	Sat Jul 05 23:02:45 2008 +0200
@@ -1691,7 +1691,7 @@ getstring(PyObject* string, Py_ssize_t* 
     /* get pointer to string buffer */
     view.len = -1;
     buffer = Py_TYPE(string)->tp_as_buffer;
-    if (!buffer || !buffer->bf_getbuffer || 
+    if (!buffer || !buffer->bf_getbuffer ||
         (*buffer->bf_getbuffer)(string, &view, PyBUF_SIMPLE) < 0) {
             PyErr_SetString(PyExc_TypeError, "expected string or buffer");
             return NULL;
@@ -1717,7 +1717,7 @@ getstring(PyObject* string, Py_ssize_t* 
     if (PyBytes_Check(string) || bytes == size)
         charsize = 1;
 #if defined(HAVE_UNICODE)
-    else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) 
+    else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
         charsize = sizeof(Py_UNICODE);
 #endif
     else {
@@ -1729,7 +1729,7 @@ getstring(PyObject* string, Py_ssize_t* 
     *p_charsize = charsize;
 
     if (ptr == NULL) {
-            PyErr_SetString(PyExc_ValueError, 
+            PyErr_SetString(PyExc_ValueError,
                             "Buffer is NULL");
     }
     return ptr;
@@ -1753,6 +1753,17 @@ state_init(SRE_STATE* state, PatternObje
     ptr = getstring(string, &length, &charsize);
     if (!ptr)
         return NULL;
+
+	if (charsize == 1 && pattern->charsize > 1) {
+		PyErr_SetString(PyExc_TypeError,
+			"can't use a string pattern on a bytes-like object");
+		return NULL;
+	}
+	if (charsize > 1 && pattern->charsize == 1) {
+		PyErr_SetString(PyExc_TypeError,
+			"can't use a bytes pattern on a string-like object");
+		return NULL;
+	}
 
     /* adjust boundaries */
     if (start < 0)
@@ -2680,6 +2691,16 @@ _compile(PyObject* self_, PyObject* args
         return NULL;
     }
 
+	if (pattern == Py_None)
+		self->charsize = -1;
+	else {
+		Py_ssize_t p_length;
+		if (!getstring(pattern, &p_length, &self->charsize)) {
+			PyObject_DEL(self);
+			return NULL;
+		}
+	}
+
     Py_INCREF(pattern);
     self->pattern = pattern;
 
diff -r 6a997c2b8eb6 Modules/sre.h
--- a/Modules/sre.h	Sat Jul 05 17:55:00 2008 +0200
+++ b/Modules/sre.h	Sat Jul 05 23:02:45 2008 +0200
@@ -30,6 +30,7 @@ typedef struct {
     PyObject* pattern; /* pattern source (or None) */
     int flags; /* flags used when compiling pattern source */
     PyObject *weakreflist; /* List of weak references */
+	int charsize; /* pattern charsize (or -1) */
     /* pattern code */
     Py_ssize_t codesize;
     SRE_CODE code[1];