diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -270,8 +270,8 @@
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
- self.error("junk characters in start tag: %r"
- % (rawdata[k:endpos][:20],))
+ self.handle_data(rawdata[i:endpos])
+ return endpos
if end.endswith('/>'):
# XHTML-style empty tag:
self.handle_startendtag(tag, attrs)
@@ -308,8 +308,10 @@
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
- self.updatepos(i, j)
- self.error("malformed start tag")
+ if j > i:
+ return j
+ else:
+ return i + 1
raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete
@@ -325,7 +327,13 @@
if self.cdata_elem is not None:
self.handle_data(rawdata[i:j])
return j
- self.error("bad end tag: %r" % (rawdata[i:j],))
+ k = rawdata.find('<', i + 1, j)
+ if k > i:
+ j = k
+ if j <= i:
+ j = i + 1
+ self.handle_data(rawdata[i:j])
+ return j
elem = match.group(1).lower() # script or style
if self.cdata_elem is not None:
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -219,12 +219,13 @@
self._run_check(["", ""], output)
def test_starttag_junk_chars(self):
- self._parse_error(">")
- self._parse_error("$>")
+ self._run_check('>$>',
+ [('data', '>$>")
- self._parse_error("")
self._parse_error("