Hi,
I've noticed 3 differences between the re and regex engines.
I don't know if they are intended or not, but thought it best to mention
them. (I used the issue2636-20090810#3.zip version.)
Python 2.6.2 (r262:71600, Apr 20 2009, 09:25:38)
[GCC 4.3.2 20081105 (Red Hat 4.3.2-7)] on linux2
IDLE 2.6.2
>>> import re, regex
>>> ############################################################ 1 of 3
>>> re1= re.compile(r"""
(?!<\w)(?P<name>[-\w]+)=
(?P<quote>(?P<single>')|(?P<double>"))?
(?P<value>(?(single)[^']+?|(?(double)[^"]+?|\S+)))
(?(quote)(?P=quote))
""", re.VERBOSE)
>>> re2= regex.compile(r"""
(?!<\w)(?P<name>[-\w]+)=
(?P<quote>(?P<single>')|(?P<double>"))?
(?P<value>(?(single)[^']+?|(?(double)[^"]+?|\S+)))
(?(quote)(?P=quote))
""", re.VERBOSE)
>>> text = "<table border='1'>"
>>> re1.findall(text)
[('border', "'", "'", '', '1')]
>>> re2.findall(text)
[]
>>> text = "<table border=1>"
>>> re1.findall(text)
[('border', '', '', '', '1>')]
>>> re2.findall(text)
[]
>>> ############################################################ 2 of 3
>>> re1 = re.compile(r"""^[ \t]*
(?P<parenthesis>\()?
[- ]?
(?P<area>\d{3})
(?(parenthesis)\))
[- ]?
(?P<local_a>\d{3})
[- ]?
(?P<local_b>\d{4})
[ \t]*$
""", re.VERBOSE)
>>> re2 = regex.compile(r"""^[ \t]*
(?P<parenthesis>\()?
[- ]?
(?P<area>\d{3})
(?(parenthesis)\))
[- ]?
(?P<local_a>\d{3})
[- ]?
(?P<local_b>\d{4})
[ \t]*$
""", re.VERBOSE)
>>> data = ("179-829-2116", "(187) 160 0880", "(286)-771-3878",
"(291) 835-9634", "353-896-0505", "(555) 555 5555", "(555) 555-5555",
"(555)-555-5555", "555 555 5555", "555 555-5555", "555-555-5555",
"601 805 3142", "(675) 372 3135", "810 329 7071", "(820) 951 3885",
"942 818-5280", "(983)8792282")
>>> for d in data:
ans1 = re1.findall(d)
ans2 = re2.findall(d)
print "re=%s rx=%s %d" % (ans1, ans2, ans1 == ans2)
re=[('', '179', '829', '2116')] rx=[('', '179', '829', '2116')] 1
re=[('(', '187', '160', '0880')] rx=[] 0
re=[('(', '286', '771', '3878')] rx=[('(', '286', '771', '3878')] 1
re=[('(', '291', '835', '9634')] rx=[] 0
re=[('', '353', '896', '0505')] rx=[('', '353', '896', '0505')] 1
re=[('(', '555', '555', '5555')] rx=[] 0
re=[('(', '555', '555', '5555')] rx=[] 0
re=[('(', '555', '555', '5555')] rx=[('(', '555', '555', '5555')] 1
re=[('', '555', '555', '5555')] rx=[] 0
re=[('', '555', '555', '5555')] rx=[] 0
re=[('', '555', '555', '5555')] rx=[('', '555', '555', '5555')] 1
re=[('', '601', '805', '3142')] rx=[] 0
re=[('(', '675', '372', '3135')] rx=[] 0
re=[('', '810', '329', '7071')] rx=[] 0
re=[('(', '820', '951', '3885')] rx=[] 0
re=[('', '942', '818', '5280')] rx=[] 0
re=[('(', '983', '879', '2282')] rx=[('(', '983', '879', '2282')] 1
>>> ############################################################ 3 of 3
>>> re1 = re.compile(r"""
<img\s+[^>]*?src=(?:(?P<quote>["'])(?P<qimage>[^\1>]+?)
(?P=quote)|(?P<uimage>[^"' >]+))[^>]*?>""", re.VERBOSE)
>>> re2 = regex.compile(r"""
<img\s+[^>]*?src=(?:(?P<quote>["'])(?P<qimage>[^\1>]+?)
(?P=quote)|(?P<uimage>[^"' >]+))[^>]*?>""", re.VERBOSE)
>>> data = """<body> <img src='a.png'> <img alt='picture' src="b.png">
<img alt="picture" src="Big C.png" other="xyx">
<img src=icon.png alt=icon>
<img src="I'm here!.jpg" alt="aren't I?">"""
>>> data = data.split("\n")
>>> data = [x.strip() for x in data]
>>> for d in data:
ans1 = re1.findall(d)
ans2 = re2.findall(d)
print "re=%s rx=%s %d" % (ans1, ans2, ans1 == ans2)
re=[("'", 'a.png', '')] rx=[("'", 'a.png', '')] 1
re=[('"', 'b.png', '')] rx=[('"', 'b.png', '')] 1
re=[('"', 'Big C.png', '')] rx=[('"', 'Big C.png', '')] 1
re=[('', '', 'icon.png')] rx=[('', '', 'icon.png alt=icon')] 0
re=[('"', "I'm here!.jpg", '')] rx=[('"', "I'm here!.jpg", '')] 1
I'm sorry I haven't had the time to try to minimize the examples, but I
hope that at least they will prove helpful.
Number 3 looks like a problem with non-greedy matching; I don't know
about the others. |