Index: htmllib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/htmllib.py,v
retrieving revision 1.21
diff -u -r1.21 htmllib.py
--- htmllib.py 27 Oct 2003 15:47:48 -0000 1.21
+++ htmllib.py 4 Nov 2003 21:41:55 -0000
@@ -1,7 +1,7 @@
-"""HTML 2.0 parser.
+"""HTML 4.01 parser.
-See the HTML 2.0 specification:
-http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
+See the HTML 4.01 specification:
+http://www.w3.org/TR/html401
"""
@@ -14,8 +14,7 @@
"""This is the basic HTML parser class.
It supports all entity names required by the XHTML 1.0 Recommendation.
- It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2
- elements.
+ It also defines handlers for all HTML 4.01 elements.
"""
@@ -121,7 +120,7 @@
"""
self.handle_data(alt)
- # --------- Top level elememts
+ # --------- Top level elements
def start_html(self, attrs): pass
def end_html(self): pass
@@ -134,28 +133,37 @@
# ------ Head elements
- def start_title(self, attrs):
- self.save_bgn()
-
- def end_title(self):
- self.title = self.save_end()
-
def do_base(self, attrs):
for a, v in attrs:
if a == 'href':
self.base = v
- def do_isindex(self, attrs):
- self.isindex = 1
-
def do_link(self, attrs):
pass
def do_meta(self, attrs):
pass
- def do_nextid(self, attrs): # Deprecated
- pass
+ def start_noscript(self, attrs):
+ pass
+ def end_noscript(self):
+ pass
+
+ def start_script(self, attrs):
+ pass
+ def end_script(self):
+ pass
+
+ def start_style(self, attrs):
+ pass
+ def end_style(self):
+ pass
+
+ def start_title(self, attrs):
+ self.save_bgn()
+
+ def end_title(self):
+ self.title = self.save_end()
# ------ Body elements
@@ -211,33 +219,6 @@
# --- Block Structuring Elements
- def do_p(self, attrs):
- self.formatter.end_paragraph(1)
-
- def start_pre(self, attrs):
- self.formatter.end_paragraph(1)
- self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
- self.nofill = self.nofill + 1
-
- def end_pre(self):
- self.formatter.end_paragraph(1)
- self.formatter.pop_font()
- self.nofill = max(0, self.nofill - 1)
-
- def start_xmp(self, attrs):
- self.start_pre(attrs)
- self.setliteral('xmp') # Tell SGML parser
-
- def end_xmp(self):
- self.end_pre()
-
- def start_listing(self, attrs):
- self.start_pre(attrs)
- self.setliteral('listing') # Tell SGML parser
-
- def end_listing(self):
- self.end_pre()
-
def start_address(self, attrs):
self.formatter.end_paragraph(0)
self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
@@ -246,6 +227,11 @@
self.formatter.end_paragraph(0)
self.formatter.pop_font()
+ def start_bdo(self, attrs):
+ pass
+ def end_bdo(self):
+ pass
+
def start_blockquote(self, attrs):
self.formatter.end_paragraph(1)
self.formatter.push_margin('blockquote')
@@ -254,6 +240,41 @@
self.formatter.end_paragraph(1)
self.formatter.pop_margin()
+ def do_p(self, attrs):
+ self.formatter.end_paragraph(1)
+
+ def start_pre(self, attrs):
+ self.formatter.end_paragraph(1)
+ self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
+ self.nofill = self.nofill + 1
+
+ def end_pre(self):
+ self.formatter.end_paragraph(1)
+ self.formatter.pop_font()
+ self.nofill = max(0, self.nofill - 1)
+
+ def start_q(self, attrs):
+ pass
+ def end_q(self):
+ pass
+
+ def start_span(self, attrs):
+ pass
+ def end_span(self):
+ pass
+
+ # --- Marking text
+
+ def start_del(self, attrs):
+ pass
+ def end_del(self):
+ pass
+
+ def start_ins(self, attrs):
+ pass
+ def end_ins(self):
+ pass
+
# --- List Elements
def start_ul(self, attrs):
@@ -290,18 +311,6 @@
self.formatter.end_paragraph(not self.list_stack)
self.formatter.pop_margin()
- def start_menu(self, attrs):
- self.start_ul(attrs)
-
- def end_menu(self):
- self.end_ul()
-
- def start_dir(self, attrs):
- self.start_ul(attrs)
-
- def end_dir(self):
- self.end_ul()
-
def start_dl(self, attrs):
self.formatter.end_paragraph(1)
self.list_stack.append(['dl', '', 0])
@@ -329,12 +338,21 @@
# Idiomatic Elements
+ def start_abbr(self, attrs): pass
+ def end_abbr(self): pass
+
+ def start_acronym(self, attrs): pass
+ def end_acronym(self): pass
+
def start_cite(self, attrs): self.start_i(attrs)
def end_cite(self): self.end_i()
def start_code(self, attrs): self.start_tt(attrs)
def end_code(self): self.end_tt()
+ def start_dfn(self, attrs): pass
+ def end_dfn(self): pass
+
def start_em(self, attrs): self.start_i(attrs)
def end_em(self): self.end_i()
@@ -352,21 +370,45 @@
# Typographic Elements
+ def start_b(self, attrs):
+ self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
+ def end_b(self):
+ self.formatter.pop_font()
+
+ def start_big(self, attrs):
+ pass
+ def end_big(self):
+ pass
+
def start_i(self, attrs):
self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
def end_i(self):
self.formatter.pop_font()
- def start_b(self, attrs):
- self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
- def end_b(self):
- self.formatter.pop_font()
+ def start_small(self, attrs):
+ pass
+ def end_small(self):
+ pass
+
+ def start_sub(self, attrs):
+ pass
+ def end_sub(self):
+ pass
+
+ def start_sup(self, attrs):
+ pass
+ def end_sup(self):
+ pass
def start_tt(self, attrs):
self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
def end_tt(self):
self.formatter.pop_font()
+
+
+ # Linking
+
def start_a(self, attrs):
href = ''
name = ''
@@ -394,7 +436,15 @@
def do_hr(self, attrs):
self.formatter.add_hor_rule()
- # --- Image
+ # --- Images and objects
+
+ def do_area(self, attrs):
+ pass
+
+ def start_iframe(self, attrs):
+ pass
+ def end_iframe(self):
+ pass
def do_img(self, attrs):
align = ''
@@ -420,11 +470,215 @@
except ValueError: pass
self.handle_image(src, alt, ismap, align, width, height)
- # --- Really Old Unofficial Deprecated Stuff
+ def start_map(self, attrs):
+ pass
+ def end_map(self):
+ pass
+
+ def start_object(self, attrs):
+ pass
+ def end_object(self):
+ pass
+
+ def start_param(self, attrs):
+ pass
+ def end_param(self):
+ pass
+
+ # --- Forms
+
+ def start_button(self, attrs):
+ pass
+ def end_button(self):
+ pass
+
+ def start_fieldset(self, attrs):
+ pass
+ def end_fieldset(self):
+ pass
+
+ def start_form(self, attrs):
+ pass
+ def end_form(self):
+ pass
+
+ def start_input(self, attrs):
+ pass
+ def end_input(self):
+ pass
+
+ def start_label(self, attrs):
+ pass
+ def end_label(self):
+ pass
+
+ def start_legend(self, attrs):
+ pass
+ def end_legend(self):
+ pass
+
+ def start_optgroup(self, attrs):
+ pass
+ def end_optgroup(self):
+ pass
+
+ def start_option(self, attrs):
+ pass
+ def end_option(self):
+ pass
+
+ def start_select(self, attrs):
+ pass
+ def end_select(self):
+ pass
+
+ def start_textarea(self, attrs):
+ pass
+ def end_textarea(self):
+ pass
+
+
+ # --- Table elements
+
+ def start_caption(self, attrs):
+ pass
+ def end_caption(self):
+ pass
+
+ def start_col(self, attrs):
+ pass
+ def end_col(self):
+ pass
+
+ def start_colgroup(self, attrs):
+ pass
+ def end_colgroup(self):
+ pass
+
+ def start_table(self, attrs):
+ pass
+ def end_table(self):
+ pass
+
+ def start_tbody(self, attrs):
+ pass
+ def end_tbody(self):
+ pass
+
+ def start_td(self, attrs):
+ pass
+ def end_td(self):
+ pass
+
+ def start_tfoot(self, attrs):
+ pass
+ def end_tfoot(self):
+ pass
+
+ def start_th(self, attrs):
+ pass
+ def end_th(self):
+ pass
+
+ def start_thead(self, attrs):
+ pass
+ def end_thead(self):
+ pass
+
+ def start_tr(self, attrs):
+ pass
+ def end_tr(self):
+ pass
+
+ # --- Frames
+
+ def start_frame(self, attrs):
+ pass
+ def end_frame(self):
+ pass
+
+ def start_frameset(self, attrs):
+ pass
+ def end_frameset(self):
+ pass
+
+ def start_noframes(self, attrs):
+ pass
+ def end_noframes(self):
+ pass
+
+
+ # --- Constructs deprecated in HTML 4.01
+
+ def start_applet(self, attrs):
+ pass
+ def end_applet(self):
+ pass
+
+ def do_basefont(self, attrs):
+ pass
+
+ def start_center(self, attrs):
+ pass
+ def end_center(self):
+ pass
+
+ def start_dir(self, attrs):
+ self.start_ul(attrs)
+ def end_dir(self):
+ self.end_ul()
+
+ def start_font(self, attrs):
+ pass
+ def end_font(self):
+ pass
+
+ def do_isindex(self, attrs):
+ self.isindex = 1
+
+ def start_menu(self, attrs):
+ self.start_ul(attrs)
+ def end_menu(self):
+ self.end_ul()
+
+ def start_s(self, attrs):
+ pass
+ def end_s(self):
+ pass
+
+ def start_strike(self, attrs):
+ pass
+ def end_strike(self):
+ pass
+
+ def start_u(self, attrs):
+ pass
+ def end_u(self):
+ pass
+
+
+ # --- Unofficial deprecated elements (pre-HTML 2.0, mostly)
+
+ def start_listing(self, attrs):
+ self.start_pre(attrs)
+ self.setliteral('listing') # Tell SGML parser
+
+ def end_listing(self):
+ self.end_pre()
+
+ def do_nextid(self, attrs): # Deprecated
+ pass
def do_plaintext(self, attrs):
self.start_pre(attrs)
self.setnomoretags() # Tell SGML parser
+
+ def start_xmp(self, attrs):
+ self.start_pre(attrs)
+ self.setliteral('xmp') # Tell SGML parser
+
+ def end_xmp(self):
+ self.end_pre()
# --- Unhandled tags