Index: Lib/urllib.py =================================================================== --- Lib/urllib.py (revision 81076) +++ Lib/urllib.py (working copy) @@ -1162,8 +1162,11 @@ def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + if len(res) == 1: + return s + i = 0 + for item in res[1:]: + i += 1 try: res[i] = _hextochr[item[:2]] + item[2:] except KeyError: @@ -1177,12 +1180,19 @@ s = s.replace('+', ' ') return unquote(s) -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz' - '0123456789' '_.-') +always_safe = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') +_safe_map = dict((c, c) for c in always_safe) +for i in xrange(0x80): + c = chr(i) + if c not in _safe_map: + _safe_map[c] = '%%%02X' % i +_safe_map.update((chr(i), '%%%02X' % i) for i in xrange(0x80, 0x100)) _safemaps = {} -def quote(s, safe = '/'): + +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1203,27 +1213,30 @@ called on a path where the existing slash characters are used as reserved characters. """ + if not s: + return s cachekey = (safe, always_safe) try: - safe_map = _safemaps[cachekey] + (strans, safe_get) = _safemaps[cachekey] except KeyError: - safe += always_safe - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) + safe_set = always_safe | set(safe) + strans = ''.join('% '[chr(i) in safe_set] for i in xrange(0x100)) + safe_map = _safe_map.copy() + safe_map.update((c, c) for c in safe) + safe_get = safe_map.__getitem__ + _safemaps[cachekey] = (strans, safe_get) + if '%' not in s.translate(strans): + return s + return ''.join(map(safe_get, s)) -def quote_plus(s, safe = ''): +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: s = quote(s, safe + ' ') return s.replace(' ', '+') return quote(s, safe) -def urlencode(query,doseq=0): +def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each