diff -r 94d0e842b9ea -r 8f0a1a4c0467 Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Fri Aug 01 12:28:49 2014 +0200 +++ b/Lib/test/test_urlparse.py Tue Aug 05 17:38:20 2014 -0700 @@ -211,8 +211,8 @@ # "abnormal" cases from RFC 1808: self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') - self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') - self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') + self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/g') + self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/g') self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') @@ -259,8 +259,8 @@ self.checkJoin(RFC2396_BASE, '../../', 'http://a/') self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) - self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') - self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') + self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/g') + self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/g') self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') @@ -336,6 +336,9 @@ # Test for issue9721 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') + def test_one(self): + self.checkJoin(SIMPLE_BASE, '..','http://a/b/') + def test_urljoins(self): self.checkJoin(SIMPLE_BASE, 'g:h','g:h') self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') @@ -349,13 +352,14 @@ self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') + self.checkJoin(SIMPLE_BASE, './.','http://a/b/c/') self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') self.checkJoin(SIMPLE_BASE, '..','http://a/b/') self.checkJoin(SIMPLE_BASE, '../','http://a/b/') self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') self.checkJoin(SIMPLE_BASE, '../..','http://a/') self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') - self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') + self.checkJoin(SIMPLE_BASE, '../../../g','http://a/g') self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') diff -r 94d0e842b9ea -r 8f0a1a4c0467 Lib/urllib/parse.py --- a/Lib/urllib/parse.py Fri Aug 01 12:28:49 2014 +0200 +++ b/Lib/urllib/parse.py Tue Aug 05 17:38:20 2014 -0700 @@ -409,11 +409,13 @@ return url if not url: return base + base, url, _coerce_result = _coerce_args(base, url) bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ urlparse(base, '', allow_fragments) scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) + if scheme != bscheme or scheme not in uses_relative: return _coerce_result(url) if scheme in uses_netloc: @@ -431,29 +433,40 @@ query = bquery return _coerce_result(urlunparse((scheme, netloc, path, params, query, fragment))) - segments = bpath.split('/')[:-1] + path.split('/') - # XXX The stuff below is bogus in various ways... - if segments[-1] == '.': - segments[-1] = '' - while '.' in segments: - segments.remove('.') - while 1: - i = 1 - n = len(segments) - 1 - while i < n: - if (segments[i] == '..' - and segments[i-1] not in ('', '..')): - del segments[i-1:i+1] - break - i = i+1 - else: - break - if segments == ['', '..']: - segments[-1] = '' - elif len(segments) >= 2 and segments[-1] == '..': - segments[-2:] = [''] - return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), - params, query, fragment))) + + base_parts = bpath.split('/') + if base_parts[-1] != '': + # the last item is not a directory, so will not be taken into account + # in resolving the relative path + del base_parts[-1] + + segments = base_parts + path.split('/') + + resolved_path = collections.deque() + + seghandlers = { + '..': lambda: resolved_path.pop(), + '.': lambda: None, + } + + for segment in segments: + try: + seghandlers[segment]() + except IndexError: + # no segments added yet, RFC3986 specifies that these should simply + # be ignored + pass + except KeyError as e: + resolved_path.append(segment) + + if segments[-1] in ('.', '..'): + # do some post-processing here. if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append('') + + return _coerce_result(urlunparse((scheme, netloc, '/'.join( + resolved_path), params, query, fragment))) + def urldefrag(url): """Removes any existing fragment from URL.