diff -r 7d5f35326a77 -r 37549b5d1d29 Doc/library/urllib.request.rst --- a/Doc/library/urllib.request.rst Wed Nov 07 00:57:26 2012 +0100 +++ b/Doc/library/urllib.request.rst Wed Nov 07 04:12:00 2012 +0100 @@ -1196,6 +1196,63 @@ >>> f.read().decode('utf-8') +.. _urllib-request-recipes: + +Recipes +------- + +Data URLs aren't supported by :mod:`urllib.request` module. But adding support +for them is easy, as this recipe shows. Note that many browsers allow base64 +encoded data URLs to skip any padding characters. This implementation is strict +and raises an exception when this happens. :: + + import binascii + import urllib.request + import urllib.parse + import email.message + import io + + def parse_data_url(url): + scheme, data = url.split(":",1) + assert scheme == "data", "unsupported scheme: "+scheme + mediatype, data = data.split(",",1) + # base64 urls might have a padding which might be quoted: + data = urllib.parse.unquote_to_bytes(data) + if mediatype.endswith(";base64"): + return binascii.a2b_base64(data), mediatype[:-7] or None + else: + return data, mediatype or None + + class DataResponse(io.BytesIO): + def __init__(self,url): + data, mediatype = parse_data_url(url) + io.BytesIO.__init__(self,data) + self.url = url + self.mediatype = mediatype + self.length = len(data) + self.headers = email.message.Message() + if mediatype is not None: + self.headers.add_header("Content-Type",mediatype) + + def geturl(self): + return self.url + + def info(self): + return self.headers + + class DataHandler(urllib.request.BaseHandler): + def data_open(self, req): + return DataResponse(req.full_url) + +You can then install and use this protocol handler like so:: + + >>> from urllib.request import build_opener, install_opener, urlopen + >>> install_opener(build_opener(DataHandler)) + >>> f = urlopen('data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQhCg==') + >>> print(f.read()) + b'Hello World!\n' + + Legacy interface ----------------