--- parse.py.patch7	2008-08-01 00:46:11.000000000 +1000
+++ parse.py.patch8	2008-08-08 00:48:37.000000000 +1000
@@ -1,8 +1,8 @@
 Index: Doc/library/urllib.parse.rst
 ===================================================================
---- Doc/library/urllib.parse.rst	(revision 65324)
+--- Doc/library/urllib.parse.rst	(revision 65574)
 +++ Doc/library/urllib.parse.rst	(working copy)
-@@ -182,37 +182,75 @@
+@@ -182,37 +182,81 @@
     string.  If there is no fragment identifier in *url*, return *url* unmodified
     and an empty string.
  
@@ -20,8 +20,8 @@
 +   The optional *encoding* and *errors* parameters specify how to deal with
 +   non-ASCII characters, as accepted by the :meth:`str.encode` method.
 +   *encoding* defaults to ``'utf-8'``.
-+   *errors* defaults to ``'replace'``, meaning unsupported characters are
-+   replaced by a placeholder character.
++   *errors* defaults to ``'strict'``, meaning unsupported characters raise a
++   :class:`UnicodeEncodeError`.
 +   *encoding* and *errors* are ignored if *string* is a :class:`bytes`.
  
 -.. function:: quote_plus(string[, safe])
@@ -40,8 +40,8 @@
 -.. function:: unquote(string)
 +.. function:: quote_from_bytes(bytes[, safe])
  
-+   An alias for :func:`quote`, intended for use with a :class:`bytes` object
-+   rather than a :class:`str`.
++   Like :func:`quote`, but accepts a :class:`bytes` object rather than a
++   :class:`str`, and does not perform string-to-bytes encoding.
 +
 +   Example: ``quote_from_bytes(b'/El Ni\xc3\xb1o/')`` yields
 +   ``'/El%20Ni%C3%B1o/'``.
@@ -54,26 +54,32 @@
 +   :meth:`bytes.decode` method.
  
 -   Example: ``unquote('/%7Econnolly/')`` yields ``'/~connolly/'``.
-+   *encoding* defaults to ``'utf-8'``.
-+   *errors* defaults to ``'replace'``, meaning invalid sequences are
-+   replaced by a placeholder character.
++   *string* must be a :class:`str`.
  
-+   Example: ``unquote('/El%20Ni%C3%B1o/')`` yields ``'/El Niño/'``.
++   *encoding* defaults to ``'utf-8'``.
++   *errors* defaults to ``'strict'``, meaning invalid sequences raise
++   a :class:`UnicodeDecodeError`.
  
 -.. function:: unquote_plus(string)
++   Example: ``unquote('/El%20Ni%C3%B1o/')`` yields ``'/El Niño/'``.
  
++
 +.. function:: unquote_plus(string[, encoding[, errors]])
 +
     Like :func:`unquote`, but also replace plus signs by spaces, as required for
     unquoting HTML form values.
  
-+   Example: ``unquote_plus('/El+Ni%C3%B1o/')`` yields ``'/El Niño/'``.
++   *string* must be a :class:`str`.
  
++   Example: ``unquote_plus('/El+Ni%C3%B1o/')`` yields ``'/El Niño/'``.
++
 +.. function:: unquote_to_bytes(string)
 +
 +   Replace ``%xx`` escapes by their single-octet equivalent, and return a
 +   :class:`bytes` object.
 +
++   *string* must be a :class:`str`.
++
 +   Unescaped non-ASCII characters in the input string are encoded into UTF-8
 +   bytes.
 +
@@ -86,20 +92,23 @@
     Convert a mapping object or a sequence of two-element tuples  to a "url-encoded"
 Index: Lib/urllib/parse.py
 ===================================================================
---- Lib/urllib/parse.py	(revision 65324)
+--- Lib/urllib/parse.py	(revision 65574)
 +++ Lib/urllib/parse.py	(working copy)
-@@ -260,50 +260,98 @@
+@@ -7,7 +7,9 @@
+ import sys
+ 
+ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
+-           "urlsplit", "urlunsplit"]
++           "urlsplit", "urlunsplit",
++           "quote", "quote_plus", "quote_from_bytes",
++           "unquote", "unquote_plus", "unquote_to_bytes"]
+ 
+ # A classification of schemes ('' means apply by default)
+ uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
+@@ -260,50 +262,94 @@
      else:
          return url, ''
  
-+# _hextochr maps 2-hex-digit strings onto single bytes
-+# eg. _hextochr['2f'] = b'\x2f'
-+# Maps lowercase and uppercase variants (but not mixed case).
-+_hextochr = dict(('%02x' % i, bytes([i])) for i in range(256))
-+_hextochr.update(('%02X' % i, bytes([i])) for i in range(256))
- 
--_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
--_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
 +def unquote_to_bytes(s):
 +    """unquote_to_bytes('abc%20def') -> b'abc def'."""
 +    # Note: strings are encoded as UTF-8. This is only an issue if it contains
@@ -109,23 +118,27 @@
 +    for i in range(1, len(res)):
 +        item = res[i]
 +        try:
-+            res[i] = _hextochr[item[:2]] + item[2:].encode('utf-8')
++            res[i] = bytes.fromhex(item[:2]) + item[2:].encode('utf-8')
 +        except KeyError:
 +            res[i] = b'%' + item.encode('utf-8')
 +    return b"".join(res)
  
--def unquote(s):
--    """unquote('abc%20def') -> 'abc def'."""
-+def unquote(s, encoding = "utf-8", errors = "replace"):
+-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
++def unquote(s, encoding='utf-8', errors='strict'):
 +    """Replace %xx escapes by their single-character equivalent. The optional
 +    encoding and errors parameters specify how to decode percent-encoded
 +    sequences into Unicode characters, as accepted by the bytes.decode()
 +    method.
 +    By default, percent-encoded sequences are decoded with UTF-8, and invalid
-+    sequences are replaced by a placeholder character.
-+
++    sequences raise a UnicodeDecodeError.
+ 
+-def unquote(s):
+-    """unquote('abc%20def') -> 'abc def'."""
 +    unquote('abc%20def') -> 'abc def'.
 +    """
++    if encoding is None: encoding = 'utf-8'
++    if errors is None: errors = 'strict'
 +    # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
 +    # (list of single-byte bytes objects)
 +    pct_sequence = []
@@ -134,12 +147,13 @@
          item = res[i]
          try:
 -            res[i] = _hextochr[item[:2]] + item[2:]
-+            pct_sequence.append(_hextochr[item[:2]])
++            pct_sequence.append(bytes.fromhex(item[:2]))
 +            rest = item[2:]
          except KeyError:
 -            res[i] = '%' + item
 -        except UnicodeDecodeError:
 -            res[i] = chr(int(item[:2], 16)) + item[2:]
+-    return "".join(res)
 +            rest = '%' + item
 +        if len(rest) == 0:
 +            # This segment was just a single percent-encoded character.
@@ -155,11 +169,11 @@
 +        # Flush the final pct_sequence
 +        # res[-1] will always be empty if pct_sequence != []
 +        res[-1] = b''.join(pct_sequence).decode(encoding, errors)
-     return "".join(res)
++    return ''.join(res)
  
 -def unquote_plus(s):
 -    """unquote('%7e/abc+def') -> '~/abc def'"""
-+def unquote_plus(s, encoding = "utf-8", errors = "replace"):
++def unquote_plus(s, encoding='utf-8', errors='strict'):
 +    """Like unquote(), but also replace plus signs by spaces, as required for
 +    unquoting HTML form values.
 +
@@ -205,11 +219,11 @@
 +            return res
  
 -def quote(s, safe = '/'):
-+def quote(s, safe = '/', encoding = "utf-8", errors = "replace"):
++def quote(s, safe='/', encoding='utf-8', errors='strict'):
      """quote('abc def') -> 'abc%20def'
  
      Each part of a URL, e.g. the path info, the query, etc., has a
-@@ -323,8 +371,18 @@
+@@ -323,8 +369,20 @@
      is reserved, but in typical usage the quote function is being
      called on a path where the existing slash characters are used as
      reserved characters.
@@ -217,8 +231,10 @@
 +    The optional encoding and errors parameters specify how to deal with
 +    non-ASCII characters, as accepted by the str.encode method.
 +    By default, characters are encoded with UTF-8, and unsupported characters
-+    are replaced by a placeholder character.
++    raise a UnicodeEncodeError.
      """
++    if encoding is None: encoding = 'utf-8'
++    if errors is None: errors = 'strict'
 +    if isinstance(safe, str):
 +        # Normalize 'safe' by converting to bytes and removing non-ASCII chars
 +        safe = safe.encode('ascii', 'ignore')
@@ -228,46 +244,58 @@
      try:
          quoter = _safe_quoters[cachekey]
      except KeyError:
-@@ -333,13 +391,19 @@
+@@ -333,13 +391,32 @@
      res = map(quoter, s)
      return ''.join(res)
  
 -def quote_plus(s, safe = ''):
 -    """Quote the query fragment of a URL; replacing ' ' with '+'"""
-+def quote_plus(s, safe = '', encoding = "utf-8", errors = "replace"):
+-    if ' ' in s:
+-        s = quote(s, safe + ' ')
++def quote_plus(s, safe='', encoding='utf-8', errors='strict'):
 +    """Like quote(), but also replace ' ' with '+', as required for quoting
 +    HTML form values. Plus signs in the original string are escaped unless
 +    they are included in safe. It also does not have safe default to '/'.
 +    """
-     if ' ' in s:
-         s = quote(s, safe + ' ')
++    # Check if ' ' in s, where s may either be a str or bytes
++    if ' ' in s if isinstance(s, str) else b' ' in s:
++        s = quote(s, safe + ' ' if isinstance(safe, str) else safe + b' ')
          return s.replace(' ', '+')
 -    return quote(s, safe)
 +    return quote(s, safe, encoding, errors)
  
-+# quote accepts either bytes or strings, so quote_from_bytes is just an alias
-+quote_from_bytes = quote
++def quote_from_bytes(s, safe='/'):
++    if isinstance(safe, str):
++        # Normalize 'safe' by converting to bytes and removing non-ASCII chars
++        safe = safe.encode('ascii', 'ignore')
++    cachekey = (safe, always_safe)
++    if not isinstance(s, bytes) or isinstance(s, bytearray):
++        raise TypeError("quote_from_bytes() expected a bytes")
++    try:
++        quoter = _safe_quoters[cachekey]
++    except KeyError:
++        quoter = Quoter(safe)
++        _safe_quoters[cachekey] = quoter
++    res = map(quoter, s)
++    return ''.join(res)
 +
  def urlencode(query,doseq=0):
      """Encode a sequence of two-element tuples or dictionary into a URL query string.
  
 Index: Lib/email/utils.py
 ===================================================================
---- Lib/email/utils.py	(revision 65324)
+--- Lib/email/utils.py	(revision 65574)
 +++ Lib/email/utils.py	(working copy)
-@@ -219,7 +219,10 @@
+@@ -219,7 +219,7 @@
      charset is given but not language, the string is encoded using the empty
      string for language.
      """
 -    s = urllib.parse.quote(s, safe='')
-+    try:
-+        s = urllib.parse.quote(s, safe='', encoding=charset)
-+    except:
-+        s = urllib.parse.quote(s, safe='')
++    s = urllib.parse.quote(s, safe='', encoding=charset)
      if charset is None and language is None:
          return s
      if language is None:
-@@ -271,7 +274,10 @@
+@@ -271,7 +271,10 @@
              # language specifiers at the beginning of the string.
              for num, s, encoded in continuations:
                  if encoded:
@@ -281,7 +309,7 @@
              value = quote(EMPTYSTRING.join(value))
 Index: Lib/test/test_http_cookiejar.py
 ===================================================================
---- Lib/test/test_http_cookiejar.py	(revision 65324)
+--- Lib/test/test_http_cookiejar.py	(revision 65574)
 +++ Lib/test/test_http_cookiejar.py	(working copy)
 @@ -539,6 +539,8 @@
              # unquoted unsafe
@@ -304,7 +332,7 @@
          cookie = interact_2965(
 Index: Lib/test/test_cgi.py
 ===================================================================
---- Lib/test/test_cgi.py	(revision 65324)
+--- Lib/test/test_cgi.py	(revision 65574)
 +++ Lib/test/test_cgi.py	(working copy)
 @@ -68,6 +68,8 @@
      ("&a=b", [('a', 'b')]),
@@ -317,7 +345,7 @@
  parse_strict_test_cases = [
 Index: Lib/test/test_wsgiref.py
 ===================================================================
---- Lib/test/test_wsgiref.py	(revision 65324)
+--- Lib/test/test_wsgiref.py	(revision 65574)
 +++ Lib/test/test_wsgiref.py	(working copy)
 @@ -291,6 +291,7 @@
      def testAppURIs(self):
@@ -337,16 +365,36 @@
          self.checkReqURI("http://127.0.0.1/spammity/spam?say=ni",
 Index: Lib/test/test_urllib.py
 ===================================================================
---- Lib/test/test_urllib.py	(revision 65324)
+--- Lib/test/test_urllib.py	(revision 65574)
 +++ Lib/test/test_urllib.py	(working copy)
-@@ -355,6 +355,23 @@
+@@ -336,10 +336,10 @@
+                                  "_.-"])
+         result = urllib.parse.quote(do_not_quote)
+         self.assertEqual(do_not_quote, result,
+-                         "using quote(): %s != %s" % (do_not_quote, result))
++                         "using quote(): %r != %r" % (do_not_quote, result))
+         result = urllib.parse.quote_plus(do_not_quote)
+         self.assertEqual(do_not_quote, result,
+-                        "using quote_plus(): %s != %s" % (do_not_quote, result))
++                        "using quote_plus(): %r != %r" % (do_not_quote, result))
+ 
+     def test_default_safe(self):
+         # Test '/' is default value for 'safe' parameter
+@@ -350,11 +350,28 @@
+         quote_by_default = "<>"
+         result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
          self.assertEqual(quote_by_default, result,
-                          "using quote_plus(): %s != %s" %
+-                         "using quote(): %s != %s" % (quote_by_default, result))
++                         "using quote(): %r != %r" % (quote_by_default, result))
+         result = urllib.parse.quote_plus(quote_by_default, safe=quote_by_default)
+         self.assertEqual(quote_by_default, result,
+-                         "using quote_plus(): %s != %s" %
++                         "using quote_plus(): %r != %r" %
                           (quote_by_default, result))
 +        # Safe expressed as bytes rather than str
 +        result = urllib.parse.quote(quote_by_default, safe=b"<>")
 +        self.assertEqual(quote_by_default, result,
-+                         "using quote(): %s != %s" % (quote_by_default, result))
++                         "using quote(): %r != %r" % (quote_by_default, result))
 +        # "Safe" non-ASCII characters should have no effect
 +        # (Since URIs are not allowed to have non-ASCII characters)
 +        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
@@ -363,9 +411,50 @@
  
      def test_default_quoting(self):
          # Make sure all characters that should be quoted are by default sans
-@@ -407,6 +424,56 @@
+@@ -378,35 +395,100 @@
+         expected = "ab%5B%5Dcd"
+         result = urllib.parse.quote(partial_quote)
+         self.assertEqual(expected, result,
+-                         "using quote(): %s != %s" % (expected, result))
++                         "using quote(): %r != %r" % (expected, result))
+         self.assertEqual(expected, result,
+-                         "using quote_plus(): %s != %s" % (expected, result))
++                         "using quote_plus(): %r != %r" % (expected, result))
+ 
+     def test_quoting_space(self):
+         # Make sure quote() and quote_plus() handle spaces as specified in
+         # their unique way
+         result = urllib.parse.quote(' ')
+         self.assertEqual(result, hexescape(' '),
+-                         "using quote(): %s != %s" % (result, hexescape(' ')))
++                         "using quote(): %r != %r" % (result, hexescape(' ')))
+         result = urllib.parse.quote_plus(' ')
+         self.assertEqual(result, '+',
+-                         "using quote_plus(): %s != +" % result)
++                         "using quote_plus(): %r != +" % result)
+         given = "a b cd e f"
+         expect = given.replace(' ', hexescape(' '))
+         result = urllib.parse.quote(given)
+         self.assertEqual(expect, result,
+-                         "using quote(): %s != %s" % (expect, result))
++                         "using quote(): %r != %r" % (expect, result))
+         expect = given.replace(' ', '+')
+         result = urllib.parse.quote_plus(given)
+         self.assertEqual(expect, result,
+-                         "using quote_plus(): %s != %s" % (expect, result))
++                         "using quote_plus(): %r != %r" % (expect, result))
+ 
+     def test_quoting_plus(self):
+         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
+                          'alpha%2Bbeta+gamma')
          self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
                           'alpha+beta+gamma')
++        # Test with bytes
++        self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
++                         'alpha%2Bbeta+gamma')
++        # Test with safe bytes
++        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
++                         'alpha+beta+gamma')
  
 +    def test_quote_bytes(self):
 +        # Bytes should quote directly to percent-encoded values
@@ -391,6 +480,10 @@
 +        result = urllib.parse.quote(given)
 +        self.assertEqual(expect, result,
 +                         "using quote(): %r != %r" % (expect, result))
++        # Characters in Latin-1 range, encoded by with None (default)
++        result = urllib.parse.quote(given, encoding=None, errors=None)
++        self.assertEqual(expect, result,
++                         "using quote(): %r != %r" % (expect, result))
 +        # Characters in Latin-1 range, encoded with Latin-1
 +        given = "\xa2\xd8ab\xff"
 +        expect = "%A2%D8ab%FF"
@@ -405,8 +498,13 @@
 +                         "using quote(): %r != %r" % (expect, result))
 +        # Characters in BMP, encoded with Latin-1
 +        given = "\u6f22\u5b57"
++        self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
++                                    encoding="latin-1")
++        # Characters in BMP, encoded with Latin-1, with replace error handling
++        given = "\u6f22\u5b57"
 +        expect = "%3F%3F"                   # "??"
-+        result = urllib.parse.quote(given, encoding="latin-1")
++        result = urllib.parse.quote(given, encoding="latin-1",
++                                    errors="replace")
 +        self.assertEqual(expect, result,
 +                         "using quote(): %r != %r" % (expect, result))
 +        # Characters in BMP, Latin-1, with xmlcharref error handling
@@ -420,9 +518,65 @@
  class UnquotingTests(unittest.TestCase):
      """Tests for unquote() and unquote_plus()
  
-@@ -463,10 +530,71 @@
+@@ -422,23 +504,28 @@
+             expect = chr(num)
+             result = urllib.parse.unquote(given)
+             self.assertEqual(expect, result,
+-                             "using unquote(): %s != %s" % (expect, result))
++                             "using unquote(): %r != %r" % (expect, result))
+             result = urllib.parse.unquote_plus(given)
+             self.assertEqual(expect, result,
+-                             "using unquote_plus(): %s != %s" %
++                             "using unquote_plus(): %r != %r" %
+                              (expect, result))
+             escape_list.append(given)
+         escape_string = ''.join(escape_list)
+         del escape_list
+         result = urllib.parse.unquote(escape_string)
+         self.assertEqual(result.count('%'), 1,
+-                         "using quote(): not all characters escaped; %s" %
+-                         result)
+-        result = urllib.parse.unquote(escape_string)
+-        self.assertEqual(result.count('%'), 1,
+                          "using unquote(): not all characters escaped: "
+                          "%s" % result)
+ 
++    def test_unquoting_mixed_case(self):
++        # Test unquoting on mixed-case hex digits in the percent-escapes
++        given = '%Ab%eA'
++        expect = b'\xab\xea'
++        result = urllib.parse.unquote_to_bytes(given)
++        self.assertEqual(expect, result,
++                         "using unquote_to_bytes(): %r != %r"
++                         % (expect, result))
++
+     def test_unquoting_parts(self):
+         # Make sure unquoting works when have non-quoted characters
+         # interspersed
+@@ -446,10 +533,10 @@
+         expect = "abcd"
+         result = urllib.parse.unquote(given)
+         self.assertEqual(expect, result,
+-                         "using quote(): %s != %s" % (expect, result))
++                         "using quote(): %r != %r" % (expect, result))
+         result = urllib.parse.unquote_plus(given)
+         self.assertEqual(expect, result,
+-                         "using unquote_plus(): %s != %s" % (expect, result))
++                         "using unquote_plus(): %r != %r" % (expect, result))
+ 
+     def test_unquoting_plus(self):
+         # Test difference between unquote() and unquote_plus()
+@@ -457,16 +544,85 @@
+         expect = given
+         result = urllib.parse.unquote(given)
          self.assertEqual(expect, result,
-                          "using unquote_plus(): %s != %s" % (expect, result))
+-                         "using unquote(): %s != %s" % (expect, result))
++                         "using unquote(): %r != %r" % (expect, result))
+         expect = given.replace('+', ' ')
+         result = urllib.parse.unquote_plus(given)
+         self.assertEqual(expect, result,
+-                         "using unquote_plus(): %s != %s" % (expect, result))
++                         "using unquote_plus(): %r != %r" % (expect, result))
  
 +    def test_unquote_to_bytes(self):
 +        given = 'br%C3%BCckner_sapporo_20050930.doc'
@@ -449,6 +603,10 @@
 +        result = urllib.parse.unquote(given)
 +        self.assertEqual(expect, result,
 +                         "using unquote(): %r != %r" % (expect, result))
++        # Characters in the Latin-1 range, encoded with None (default)
++        result = urllib.parse.unquote(given, encoding=None, errors=None)
++        self.assertEqual(expect, result,
++                         "using unquote(): %r != %r" % (expect, result))
  
 +        # Characters in the Latin-1 range, encoded with Latin-1
 +        result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
@@ -466,8 +624,12 @@
 +
 +        # Decode with UTF-8, invalid sequence
 +        given = "%F3%B1"
++        self.assertRaises(UnicodeDecodeError, urllib.parse.unquote, given)
++
++        # Decode with UTF-8, invalid sequence, replace errors
++        given = "%F3%B1"
 +        expect = "\ufffd"                   # Replacement character
-+        result = urllib.parse.unquote(given)
++        result = urllib.parse.unquote(given, errors="replace")
 +        self.assertEqual(expect, result,
 +                         "using unquote(): %r != %r" % (expect, result))
 +