Index: Lib/urllib.py =================================================================== --- Lib/urllib.py (revision 81076) +++ Lib/urllib.py (working copy) @@ -27,6 +27,7 @@ import os import time import sys +import re from urlparse import urljoin as basejoin __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", @@ -1162,8 +1163,11 @@ def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + if len(res) == 1: + return s + i = 0 + for item in res[1:]: + i += 1 try: res[i] = _hextochr[item[:2]] + item[2:] except KeyError: @@ -1180,9 +1184,17 @@ always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-') +_unsafe_pattern = r'[^%s%%s]' % re.escape(always_safe) +_safe_map = dict((c, c) for c in always_safe) +for i in xrange(0x80): + c = chr(i) + if c not in _safe_map: + _safe_map[c] = '%%%02X' % i +_safe_map.update((chr(i), '%%%02X' % i) for i in xrange(0x80, 0x100)) _safemaps = {} -def quote(s, safe = '/'): + +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1203,27 +1215,27 @@ called on a path where the existing slash characters are used as reserved characters. """ + if not s: + return s cachekey = (safe, always_safe) try: - safe_map = _safemaps[cachekey] + (is_unsafe, safe_get) = _safemaps[cachekey] except KeyError: - safe += always_safe - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) + pattern = _unsafe_pattern % re.escape(safe) + safe_map = _safe_map.copy() + safe_map.update((c, c) for c in safe) + (is_unsafe, safe_get) = _safemaps[cachekey] = \ + re.compile(pattern).search, safe_map.__getitem__ + return ''.join(map(safe_get, s)) if is_unsafe(s) else s -def quote_plus(s, safe = ''): +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: s = quote(s, safe + ' ') return s.replace(' ', '+') return quote(s, safe) -def urlencode(query,doseq=0): +def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each