Index: Lib/urllib.py =================================================================== --- Lib/urllib.py (revision 81228) +++ Lib/urllib.py (working copy) @@ -92,7 +92,7 @@ def urlcleanup(): if _urlopener: _urlopener.cleanup() - _safemaps.clear() + _safe_quoters.clear() ftpcache.clear() # check for SSL @@ -1157,20 +1157,24 @@ return attr, None _hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) +_hextochr = dict((a + b, chr(int(a + b, 16))) + for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + # fastpath + if len(res) == 1: + return s + s = res[0] + for item in res[1:]: try: - res[i] = _hextochr[item[:2]] + item[2:] + s += _hextochr[item[:2]] + item[2:] except KeyError: - res[i] = '%' + item + s += '%' + item except UnicodeDecodeError: - res[i] = unichr(int(item[:2], 16)) + item[2:] - return "".join(res) + s += unichr(int(item[:2], 16)) + item[2:] + return s def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" @@ -1180,9 +1184,15 @@ always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-') -_safemaps = {} +_safe_map = dict((c, c) for c in always_safe) +for i in xrange(0x80): + c = chr(i) + if c not in _safe_map: + _safe_map[c] = '%%%02X' % i +_safe_map.update((chr(i), '%%%02X' % i) for i in xrange(0x80, 0x100)) +_safe_quoters = {} -def quote(s, safe = '/'): +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1203,27 +1213,42 @@ called on a path where the existing slash characters are used as reserved characters. """ + # fastpath + if not s: + return s cachekey = (safe, always_safe) try: - safe_map = _safemaps[cachekey] + (quoter, safe) = _safe_quoters[cachekey] except KeyError: - safe += always_safe - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) + safe_map = _safe_map.copy() + safe_map.update((c, c) for c in safe) + quoter = safe_map.__getitem__ + safe = always_safe + safe + _safe_quoters[cachekey] = (quoter, safe) + if not s.rstrip(safe): + return s + return ''.join(map(quoter, s)) + +def _foo(): + if not s or not s.rstrip(always_safe + safe): + return s + cachekey = (safe, always_safe) + try: + quoter = _safe_quoters[cachekey] + except KeyError: + safe_map = _safe_map.copy() + safe_map.update((c, c) for c in safe) + _safe_quoters[cachekey] = quoter = safe_map.__getitem__ + return ''.join(map(quoter, s)) -def quote_plus(s, safe = ''): +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: s = quote(s, safe + ' ') return s.replace(' ', '+') return quote(s, safe) -def urlencode(query,doseq=0): +def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each