Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(5)

Side by Side Diff: Lib/test/test_htmlparser.py

Issue 21047: html.parser.HTMLParser: convert_charrefs should become True by default
Patch Set: Created 5 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/html/parser.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Tests for HTMLParser.py.""" 1 """Tests for HTMLParser.py."""
2 2
3 import html.parser 3 import html.parser
4 import pprint 4 import pprint
5 import unittest 5 import unittest
6 from test import support 6 from test import support
7 7
8 8
9 class EventCollector(html.parser.HTMLParser): 9 class EventCollector(html.parser.HTMLParser):
10 10
(...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after
377 def test_condcoms(self): 377 def test_condcoms(self):
378 html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->' 378 html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
379 '<!--[if IE 8]>condcoms<![endif]-->' 379 '<!--[if IE 8]>condcoms<![endif]-->'
380 '<!--[if lte IE 7]>pretty?<![endif]-->') 380 '<!--[if lte IE 7]>pretty?<![endif]-->')
381 expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"), 381 expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"),
382 ('comment', '[if IE 8]>condcoms<![endif]'), 382 ('comment', '[if IE 8]>condcoms<![endif]'),
383 ('comment', '[if lte IE 7]>pretty?<![endif]')] 383 ('comment', '[if lte IE 7]>pretty?<![endif]')]
384 self._run_check(html, expected) 384 self._run_check(html, expected)
385 385
386 def test_convert_charrefs(self): 386 def test_convert_charrefs(self):
387 collector = lambda: EventCollectorCharrefs(convert_charrefs=True) 387 # default value for convert_charrefs is now True
388 collector = lambda: EventCollectorCharrefs()
388 self.assertTrue(collector().convert_charrefs) 389 self.assertTrue(collector().convert_charrefs)
389 charrefs = ['&quot;', '&#34;', '&#x22;', '&quot', '&#34', '&#x22'] 390 charrefs = ['&quot;', '&#34;', '&#x22;', '&quot', '&#34', '&#x22']
390 # check charrefs in the middle of the text/attributes 391 # check charrefs in the middle of the text/attributes
391 expected = [('starttag', 'a', [('href', 'foo"zar')]), 392 expected = [('starttag', 'a', [('href', 'foo"zar')]),
392 ('data', 'a"z'), ('endtag', 'a')] 393 ('data', 'a"z'), ('endtag', 'a')]
393 for charref in charrefs: 394 for charref in charrefs:
394 self._run_check('<a href="foo{0}zar">a{0}z</a>'.format(charref), 395 self._run_check('<a href="foo{0}zar">a{0}z</a>'.format(charref),
395 expected, collector=collector()) 396 expected, collector=collector())
396 # check charrefs at the beginning/end of the text/attributes 397 # check charrefs at the beginning/end of the text/attributes
397 expected = [('data', '"'), 398 expected = [('data', '"'),
(...skipping 23 matching lines...) Expand all
421 self._run_check('no charrefs here', [('data', 'no charrefs here')], 422 self._run_check('no charrefs here', [('data', 'no charrefs here')],
422 collector=collector()) 423 collector=collector())
423 424
424 425
425 class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): 426 class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
426 427
427 def get_collector(self): 428 def get_collector(self):
428 return EventCollector(convert_charrefs=False) 429 return EventCollector(convert_charrefs=False)
429 430
430 def test_deprecation_warnings(self): 431 def test_deprecation_warnings(self):
431 with self.assertWarns(DeprecationWarning):
432 EventCollector() # convert_charrefs not passed explicitly
433 with self.assertWarns(DeprecationWarning): 432 with self.assertWarns(DeprecationWarning):
434 EventCollector(strict=True) 433 EventCollector(strict=True)
435 with self.assertWarns(DeprecationWarning): 434 with self.assertWarns(DeprecationWarning):
436 EventCollector(strict=False) 435 EventCollector(strict=False)
437 with self.assertRaises(html.parser.HTMLParseError): 436 with self.assertRaises(html.parser.HTMLParseError):
438 with self.assertWarns(DeprecationWarning): 437 with self.assertWarns(DeprecationWarning):
439 EventCollector().error('test') 438 EventCollector().error('test')
440 439
441 def test_tolerant_parsing(self): 440 def test_tolerant_parsing(self):
442 self._run_check('<html <html>te>>xt&a<<bc</a></html>\n' 441 self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
738 737
739 def test_attr_funky_names(self): 738 def test_attr_funky_names(self):
740 self._run_check( 739 self._run_check(
741 "<a a.b='v' c:d=v e-f=v>", 740 "<a a.b='v' c:d=v e-f=v>",
742 [("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")])]) 741 [("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")])])
743 742
744 def test_entityrefs_in_attributes(self): 743 def test_entityrefs_in_attributes(self):
745 self._run_check( 744 self._run_check(
746 "<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>", 745 "<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>",
747 [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])]) 746 [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])])
748
749 747
750 748
751 class AttributesTolerantTestCase(AttributesStrictTestCase): 749 class AttributesTolerantTestCase(AttributesStrictTestCase):
752 750
753 def get_collector(self): 751 def get_collector(self):
754 return EventCollector(convert_charrefs=False) 752 return EventCollector(convert_charrefs=False)
755 753
756 def test_attr_funky_names2(self): 754 def test_attr_funky_names2(self):
757 self._run_check( 755 self._run_check(
758 "<a $><b $=%><c \=/>", 756 "<a $><b $=%><c \=/>",
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 def test_end_tag_in_attribute_value(self): 821 def test_end_tag_in_attribute_value(self):
824 # see #1745761 822 # see #1745761
825 self._run_check("<a href='http://www.example.org/\">;'>spam</a>", 823 self._run_check("<a href='http://www.example.org/\">;'>spam</a>",
826 [("starttag", "a", 824 [("starttag", "a",
827 [("href", "http://www.example.org/\">;")]), 825 [("href", "http://www.example.org/\">;")]),
828 ("data", "spam"), ("endtag", "a")]) 826 ("data", "spam"), ("endtag", "a")])
829 827
830 828
831 if __name__ == "__main__": 829 if __name__ == "__main__":
832 unittest.main() 830 unittest.main()
OLDNEW
« no previous file with comments | « Lib/html/parser.py ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+