diff -r 814599728ac0 Lib/HTMLParser.py
--- a/Lib/HTMLParser.py Sat Mar 26 02:09:14 2011 +0100
+++ b/Lib/HTMLParser.py Sun Apr 03 20:23:08 2011 +0300
@@ -26,7 +26,7 @@
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
diff -r 814599728ac0 Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py Sat Mar 26 02:09:14 2011 +0100
+++ b/Lib/test/test_htmlparser.py Sun Apr 03 20:23:08 2011 +0300
@@ -208,6 +208,23 @@
("starttag", "a", [("href", "mailto:xyz@example.com")]),
])
+ def test_attr_nonascii(self):
+ # see issue 7311
+ self._run_check(u"
", [
+ ("starttag", "img", [("src", "/foo/bar.png"),
+ ("alt", u"\u4e2d\u6587")]),
+ ])
+ self._run_check(u"", [
+ ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
+ ("href", u"\u30c6\u30b9\u30c8.html")]),
+ ])
+ self._run_check(u'', [
+ ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
+ ("href", u"\u30c6\u30b9\u30c8.html")]),
+ ])
+
def test_attr_entity_replacement(self):
self._run_check("""""", [
("starttag", "a", [("b", "&><\"'")]),