diff -r 9ca59b3cc18b Doc/library/urllib.robotparser.rst --- a/Doc/library/urllib.robotparser.rst Thu Oct 15 08:05:31 2015 -0700 +++ b/Doc/library/urllib.robotparser.rst Thu Oct 15 12:49:57 2015 -0700 @@ -72,6 +72,15 @@ .. versionadded:: 3.6 + .. method:: site_maps() + + Returns the contents of the ``Sitemap`` parameter from + ``robots.txt`` in the form of a :func:`list`. If there is no such + parameter or the ``robots.txt`` entry for this parameter has + invalid syntax, return ``None``. + + .. versionadded:: 3.6 + The following example demonstrates basic use of the :class:`RobotFileParser` class:: diff -r 9ca59b3cc18b Lib/urllib/robotparser.py --- a/Lib/urllib/robotparser.py Thu Oct 15 08:05:31 2015 -0700 +++ b/Lib/urllib/robotparser.py Thu Oct 15 12:49:57 2015 -0700 @@ -24,6 +24,7 @@ def __init__(self, url=''): self.entries = [] + self.sitemaps = [] self.default_entry = None self.disallow_all = False self.allow_all = False @@ -142,6 +143,12 @@ entry.req_rate.requests = int(numbers[0]) entry.req_rate.seconds = int(numbers[1]) state = 2 + elif line[0] == "sitemap": + # According to http://www.sitemaps.org/protocol.html + # "This directive is independent of the user-agent line, + # so it doesn't matter where you place it in your file." + # Therefore we do not change the state of the parser. + self.sitemaps.append(line[1]) if state == 2: self._add_entry(entry) @@ -186,6 +193,11 @@ return entry.req_rate return None + def site_maps(self): + if not self.sitemaps: + return None + return self.sitemaps + def __str__(self): return ''.join([str(entry) + "\n" for entry in self.entries])