*** /Users/munch/Projects/python/python/dist/src/Lib/HTMLParser.py Tue May 14 08:50:11 2002 --- HTMLParser.py Sun Jan 19 05:33:17 2003 *************** *** 14,20 **** # Regular expressions used for parsing interesting_normal = re.compile('[&<]') ! interesting_cdata = re.compile(r'<(/|\Z)') incomplete = re.compile('&[a-zA-Z#]') entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') --- 14,20 ---- # Regular expressions used for parsing interesting_normal = re.compile('[&<]') ! interesting_cdata = dict([(tag, re.compile(r'<(/\s*%s\s*>|!--|\Z)' % tag, re.IGNORECASE)) for tag in ("script", "style")]) incomplete = re.compile('&[a-zA-Z#]') entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') *************** *** 84,92 **** reference as the argument. """ - CDATA_CONTENT_ELEMENTS = ("script", "style") - - def __init__(self): """Initialize and reset this instance.""" self.reset() --- 84,89 ---- *************** *** 120,127 **** """Return full source of start tag: '<...>'.""" return self.__starttag_text ! def set_cdata_mode(self): ! self.interesting = interesting_cdata def clear_cdata_mode(self): self.interesting = interesting_normal --- 117,124 ---- """Return full source of start tag: '<...>'.""" return self.__starttag_text ! def set_cdata_mode(self, tag): ! self.interesting = interesting_cdata[tag] def clear_cdata_mode(self): self.interesting = interesting_normal *************** *** 279,286 **** self.handle_startendtag(tag, attrs) else: self.handle_starttag(tag, attrs) ! if tag in self.CDATA_CONTENT_ELEMENTS: ! self.set_cdata_mode() return endpos # Internal -- check to see if we have a complete starttag; return end --- 276,283 ---- self.handle_startendtag(tag, attrs) else: self.handle_starttag(tag, attrs) ! if tag in interesting_cdata: ! self.set_cdata_mode(tag) return endpos # Internal -- check to see if we have a complete starttag; return end