diff -r 8a65e6aff672 Lib/html/parser.py --- a/Lib/html/parser.py Tue Apr 05 18:12:15 2011 +0200 +++ b/Lib/html/parser.py Tue Apr 05 21:50:45 2011 +0300 @@ -28,7 +28,7 @@ # make it correctly strict without breaking backward compatibility. attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?') + r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') attrfind_tolerant = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?') diff -r 8a65e6aff672 Lib/test/test_htmlparser.py --- a/Lib/test/test_htmlparser.py Tue Apr 05 18:12:15 2011 +0200 +++ b/Lib/test/test_htmlparser.py Tue Apr 05 21:50:45 2011 +0300 @@ -217,6 +217,23 @@ ("starttag", "a", [("href", "mailto:xyz@example.com")]), ]) + def test_attr_nonascii(self): + # see issue 7311 + self._run_check("\u4e2d\u6587", [ + ("starttag", "img", [("src", "/foo/bar.png"), + ("alt", "\u4e2d\u6587")]), + ]) + self._run_check("", [ + ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), + ("href", "\u30c6\u30b9\u30c8.html")]), + ]) + self._run_check('', [ + ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), + ("href", "\u30c6\u30b9\u30c8.html")]), + ]) + def test_attr_entity_replacement(self): self._run_check("""""", [ ("starttag", "a", [("b", "&><\"'")]),