diff -r 7582a78f573b Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py Wed Apr 06 15:18:12 2011 -0400 +++ b/Lib/test/test_urllib.py Wed Apr 06 16:45:11 2011 -0700 @@ -171,6 +171,16 @@ finally: self.unfakehttp() + def test_url_fragment(self): + # Issue #11703: geturl() omits fragments in the original URL. + url = 'http://docs.python.org/library/urllib.html#OK' + self.fakehttp(b'Hello!') + try: + fp = urllib.request.urlopen(url) + self.assertEqual(fp.geturl(), url) + finally: + self.unfakehttp() + def test_read_bogus(self): # urlopen() should raise IOError for many error codes. self.fakehttp(b'''HTTP/1.1 401 Authentication Required diff -r 7582a78f573b Lib/test/test_urllib2.py --- a/Lib/test/test_urllib2.py Wed Apr 06 15:18:12 2011 -0400 +++ b/Lib/test/test_urllib2.py Wed Apr 06 16:45:11 2011 -0700 @@ -1024,6 +1024,15 @@ o.open("http://www.example.com/") self.assertFalse(hh.req.has_header("Cookie")) + def test_redirect_fragment(self): + redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n' + hh = MockHTTPHandler(302, 'Location: ' + redirected_url) + hdeh = urllib.request.HTTPDefaultErrorHandler() + hrh = urllib.request.HTTPRedirectHandler() + o = build_test_opener(hh, hdeh, hrh) + fp = o.open('http://www.example.com') + self.assertEqual(fp.geturl(), redirected_url.strip()) + def test_proxy(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) @@ -1339,12 +1348,16 @@ req = Request("") self.assertEqual("www.python.org", req.get_host()) - def test_urlwith_fragment(self): + def test_url_fragment(self): req = Request("http://www.python.org/?qs=query#fragment=true") self.assertEqual("/?qs=query", req.get_selector()) req = Request("http://www.python.org/#fun=true") self.assertEqual("/", req.get_selector()) + # Issue 11703: geturl() omits fragment in the original URL. + url = 'http://docs.python.org/library/urllib2.html#OK' + req = Request(url) + self.assertEqual(req.get_full_url(), url) def test_main(verbose=None): from test import test_urllib2 diff -r 7582a78f573b Lib/urllib/request.py --- a/Lib/urllib/request.py Wed Apr 06 15:18:12 2011 -0400 +++ b/Lib/urllib/request.py Wed Apr 06 16:45:11 2011 -0700 @@ -163,7 +163,7 @@ origin_req_host=None, unverifiable=False): # unwrap('') --> 'type://host/path' self.full_url = unwrap(url) - self.full_url, fragment = splittag(self.full_url) + self.full_url, self.fragment = splittag(self.full_url) self.data = data self.headers = {} self._tunnel_host = None @@ -202,7 +202,10 @@ return self.data def get_full_url(self): - return self.full_url + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url def get_type(self): return self.type @@ -1106,7 +1109,7 @@ except socket.error as err: raise URLError(err) - r.url = req.full_url + r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse # with .headers, because urllib clients expect the response to # have the reason in .msg. It would be good to mark this