Index: Lib/test/test_urlparse.py =================================================================== --- Lib/test/test_urlparse.py (revision 65739) +++ Lib/test/test_urlparse.py (working copy) @@ -147,10 +147,6 @@ # "abnormal" cases from RFC 1808: self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') - self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') - self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') - self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') - self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') @@ -165,6 +161,12 @@ #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') #self.checkJoin(RFC1808_BASE, 'http:', 'http:') + # The following scenarios have been updated in RFC3986 + #self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') + #self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') + #self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') + #self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') + def test_RFC2396(self): # cases from RFC 2396 @@ -190,10 +192,6 @@ self.checkJoin(RFC2396_BASE, '../../', 'http://a/') self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) - self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') - self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') - self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') - self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') @@ -202,8 +200,6 @@ self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') - self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') - self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') @@ -212,11 +208,61 @@ #The following scenarios have been updated in RFC3986 #self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y') #self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') + #self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') + #self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') + #self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') + #self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') + #self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') + #self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') def test_RFC3986(self): + # normal examples + self.checkJoin(RFC3986_BASE, 'g:h', 'g:h') + self.checkJoin(RFC3986_BASE, 'g', 'http://a/b/c/g') + self.checkJoin(RFC3986_BASE, './g', 'http://a/b/c/g') + self.checkJoin(RFC3986_BASE, 'g/', 'http://a/b/c/g/') + self.checkJoin(RFC3986_BASE, '/g', 'http://a/g') + self.checkJoin(RFC3986_BASE, '//g', 'http://g') self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') - self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') + self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') + self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') + self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') + self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') + self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') + self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') + self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') + self.checkJoin(RFC3986_BASE, '', RFC3986_BASE) + self.checkJoin(RFC3986_BASE, '.', 'http://a/b/c/') + self.checkJoin(RFC3986_BASE, './','http://a/b/c/') + self.checkJoin(RFC3986_BASE, '..','http://a/b/') + self.checkJoin(RFC3986_BASE, '../', 'http://a/b/') + self.checkJoin(RFC3986_BASE, '../g', 'http://a/b/g') + self.checkJoin(RFC3986_BASE, '../..', 'http://a/') + self.checkJoin(RFC3986_BASE, '../../', 'http://a/') + self.checkJoin(RFC3986_BASE, '../../g', 'http://a/g') + #abnormal examples + self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g') + self.checkJoin(RFC3986_BASE, '../../../../g', 'http://a/g') + self.checkJoin(RFC3986_BASE, '/./g','http://a/g') + self.checkJoin(RFC3986_BASE, '/../g','http://a/g') + self.checkJoin(RFC3986_BASE, 'g.', 'http://a/b/c/g.') + self.checkJoin(RFC3986_BASE, '.g', 'http://a/b/c/.g') + self.checkJoin(RFC3986_BASE, 'g..', 'http://a/b/c/g..') + self.checkJoin(RFC3986_BASE, '..g', 'http://a/b/c/..g') + self.checkJoin(RFC3986_BASE, './../g', 'http://a/b/g') + self.checkJoin(RFC3986_BASE, './g/.', 'http://a/b/c/g/') + self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') + self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') + self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') + self.checkJoin(RFC3986_BASE, 'g;x=1/../y', 'http://a/b/c/y') + self.checkJoin(RFC3986_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') + self.checkJoin(RFC3986_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') + self.checkJoin(RFC3986_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') + self.checkJoin(RFC3986_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') + self.checkJoin(RFC3986_BASE, 'http:g', 'http://a/b/c/g') + self.checkJoin(RFC3986_BASE, 'g;x?y=s','http://a/b/c/g;x?y=s') + def test_urldefrag(self): for url, defrag, frag in [ ('http://python.org#frag', 'http://python.org', 'frag'), Index: Lib/urlparse.py =================================================================== --- Lib/urlparse.py (revision 65739) +++ Lib/urlparse.py (working copy) @@ -2,6 +2,9 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. +Also confirming to: +RFC2396 - Uniform Resource Identifier (URI) Generic Syntax +RFC3986 - Uniform Resource Identifier (URI) """ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", @@ -215,6 +218,12 @@ params, query, fragment)) netloc = bnetloc if path[:1] == '/': + segments = path.split('/') + while '.' in segments: + segments.remove('.') + while '..' in segments: + segments.remove('..') + path = '/'.join(segments) return urlunparse((scheme, netloc, path, params, query, fragment)) if not path: @@ -243,6 +252,10 @@ and segments[i-1] not in ('', '..')): del segments[i-1:i+1] break + elif (segments[i] == '..'): + while '..' in segments: + segments.remove('..') + break i = i+1 else: break @@ -289,10 +302,10 @@ ../g = ../.. = ../../g = - ../../../g = + ../../../g = ./../g = ./g/. = - /./g = + /./g = g/./h = g/../h = http:g =