diff -r 0842c5411ed6 Lib/test/test_robotparser.py --- a/Lib/test/test_robotparser.py Mon Mar 18 09:59:15 2013 +0100 +++ b/Lib/test/test_robotparser.py Mon Mar 18 14:14:25 2013 -0700 @@ -234,6 +234,18 @@ RobotTest(15, doc, good, bad) +# 16. +doc = """ +User-agent: * +Disallow: /catalogs/ +Allow: /catalogs/test? +Allow: /catalogs/sub-catalogs +""" + +good = ['/catalogs/test?','/catalogs/sub-catalogs'] +bad = ['/catalogs/'] + +RobotTest(16, doc, good, bad) class NetworkTestCase(unittest.TestCase): diff -r 0842c5411ed6 Lib/urllib/robotparser.py --- a/Lib/urllib/robotparser.py Mon Mar 18 09:59:15 2013 +0100 +++ b/Lib/urllib/robotparser.py Mon Mar 18 14:14:25 2013 -0700 @@ -157,6 +157,7 @@ if path == '' and not allowance: # an empty value means allow all allowance = True + path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) self.path = urllib.parse.quote(path) self.allowance = allowance @@ -198,7 +199,7 @@ """Preconditions: - our agent applies to this entry - filename is URL decoded""" - for line in self.rulelines: - if line.applies_to(filename): - return line.allowance + lines = list(filter(lambda x: x.applies_to(filename), self.rulelines)) + if lines: + return max(lines, key=lambda x: len(x.path)).allowance return True