diff -r d1e0931bc72e Lib/urllib.py --- a/Lib/urllib.py Fri Apr 30 15:47:34 2010 +0200 +++ b/Lib/urllib.py Fri Apr 30 10:39:51 2010 -0400 @@ -27,6 +27,7 @@ import os import time import sys +import re from urlparse import urljoin as basejoin __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", @@ -1184,6 +1185,15 @@ '0123456789' '_.-') _safemaps = {} +_faster_safe_test = always_safe + '/' +_faster_safe = dict(zip(_faster_safe_test, _faster_safe_test)) + +for c in [chr(i) for i in range(256)]: + if c not in _faster_safe: + _faster_safe[c] = '%%%02X' % ord(c) + +_must_quote = re.compile(r'[^%s]' % _faster_safe_test) + def quote(s, safe = '/'): """quote('abc def') -> 'abc%20def' @@ -1205,6 +1215,16 @@ called on a path where the existing slash characters are used as reserved characters. """ + # fastpath: empty (yes, Virginia, this acutally happens, a *lot*). + if not s: + return s + # fastpath: standard safechars, use regex to eliminate "clean" strings + # (again, this happens a *lot*). + if safe == '/': + if not _must_quote.search(s): + return s + return ''.join(map(_faster_safe.get, s)) + # non-standard safechars cachekey = (safe, always_safe) try: safe_map = _safemaps[cachekey]