Message281315
Suggest adding a user_agent optional parameter, as shown here:
def __init__(self, url='', user_agent=None):
urllib.robotparser.RobotFileParser.__init__(self, url) # init parent
self.user_agent = user_agent # save user agent
def read(self):
"""
Reads the robots.txt URL and feeds it to the parser.
Overrides parent read function.
"""
try:
req = urllib.request.Request( # request with user agent specified
self.url,
data=None)
if self.user_agent is not None : # if overriding user agent
req.add_header("User-Agent", self.user_agent)
f = urllib.request.urlopen(req) # open connection
except urllib.error.HTTPError as err:
if err.code in (401, 403):
self.disallow_all = True
elif err.code >= 400 and err.code < 500:
self.allow_all = True
else:
raw = f.read()
self.parse(raw.decode("utf-8").splitlines()) |
|
Date |
User |
Action |
Args |
2016-11-21 01:26:36 | nagle | set | recipients:
+ nagle |
2016-11-21 01:26:36 | nagle | set | messageid: <1479691596.58.0.0726918397452.issue28756@psf.upfronthosting.co.za> |
2016-11-21 01:26:36 | nagle | link | issue28756 messages |
2016-11-21 01:26:35 | nagle | create | |
|