Index: htmllib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/htmllib.py,v retrieving revision 1.21 diff -u -r1.21 htmllib.py --- htmllib.py 27 Oct 2003 15:47:48 -0000 1.21 +++ htmllib.py 4 Nov 2003 21:41:55 -0000 @@ -1,7 +1,7 @@ -"""HTML 2.0 parser. +"""HTML 4.01 parser. -See the HTML 2.0 specification: -http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html +See the HTML 4.01 specification: +http://www.w3.org/TR/html401 """ @@ -14,8 +14,7 @@ """This is the basic HTML parser class. It supports all entity names required by the XHTML 1.0 Recommendation. - It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2 - elements. + It also defines handlers for all HTML 4.01 elements. """ @@ -121,7 +120,7 @@ """ self.handle_data(alt) - # --------- Top level elememts + # --------- Top level elements def start_html(self, attrs): pass def end_html(self): pass @@ -134,28 +133,37 @@ # ------ Head elements - def start_title(self, attrs): - self.save_bgn() - - def end_title(self): - self.title = self.save_end() - def do_base(self, attrs): for a, v in attrs: if a == 'href': self.base = v - def do_isindex(self, attrs): - self.isindex = 1 - def do_link(self, attrs): pass def do_meta(self, attrs): pass - def do_nextid(self, attrs): # Deprecated - pass + def start_noscript(self, attrs): + pass + def end_noscript(self): + pass + + def start_script(self, attrs): + pass + def end_script(self): + pass + + def start_style(self, attrs): + pass + def end_style(self): + pass + + def start_title(self, attrs): + self.save_bgn() + + def end_title(self): + self.title = self.save_end() # ------ Body elements @@ -211,33 +219,6 @@ # --- Block Structuring Elements - def do_p(self, attrs): - self.formatter.end_paragraph(1) - - def start_pre(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) - self.nofill = self.nofill + 1 - - def end_pre(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - self.nofill = max(0, self.nofill - 1) - - def start_xmp(self, attrs): - self.start_pre(attrs) - self.setliteral('xmp') # Tell SGML parser - - def end_xmp(self): - self.end_pre() - - def start_listing(self, attrs): - self.start_pre(attrs) - self.setliteral('listing') # Tell SGML parser - - def end_listing(self): - self.end_pre() - def start_address(self, attrs): self.formatter.end_paragraph(0) self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) @@ -246,6 +227,11 @@ self.formatter.end_paragraph(0) self.formatter.pop_font() + def start_bdo(self, attrs): + pass + def end_bdo(self): + pass + def start_blockquote(self, attrs): self.formatter.end_paragraph(1) self.formatter.push_margin('blockquote') @@ -254,6 +240,41 @@ self.formatter.end_paragraph(1) self.formatter.pop_margin() + def do_p(self, attrs): + self.formatter.end_paragraph(1) + + def start_pre(self, attrs): + self.formatter.end_paragraph(1) + self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) + self.nofill = self.nofill + 1 + + def end_pre(self): + self.formatter.end_paragraph(1) + self.formatter.pop_font() + self.nofill = max(0, self.nofill - 1) + + def start_q(self, attrs): + pass + def end_q(self): + pass + + def start_span(self, attrs): + pass + def end_span(self): + pass + + # --- Marking text + + def start_del(self, attrs): + pass + def end_del(self): + pass + + def start_ins(self, attrs): + pass + def end_ins(self): + pass + # --- List Elements def start_ul(self, attrs): @@ -290,18 +311,6 @@ self.formatter.end_paragraph(not self.list_stack) self.formatter.pop_margin() - def start_menu(self, attrs): - self.start_ul(attrs) - - def end_menu(self): - self.end_ul() - - def start_dir(self, attrs): - self.start_ul(attrs) - - def end_dir(self): - self.end_ul() - def start_dl(self, attrs): self.formatter.end_paragraph(1) self.list_stack.append(['dl', '', 0]) @@ -329,12 +338,21 @@ # Idiomatic Elements + def start_abbr(self, attrs): pass + def end_abbr(self): pass + + def start_acronym(self, attrs): pass + def end_acronym(self): pass + def start_cite(self, attrs): self.start_i(attrs) def end_cite(self): self.end_i() def start_code(self, attrs): self.start_tt(attrs) def end_code(self): self.end_tt() + def start_dfn(self, attrs): pass + def end_dfn(self): pass + def start_em(self, attrs): self.start_i(attrs) def end_em(self): self.end_i() @@ -352,21 +370,45 @@ # Typographic Elements + def start_b(self, attrs): + self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS)) + def end_b(self): + self.formatter.pop_font() + + def start_big(self, attrs): + pass + def end_big(self): + pass + def start_i(self, attrs): self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) def end_i(self): self.formatter.pop_font() - def start_b(self, attrs): - self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS)) - def end_b(self): - self.formatter.pop_font() + def start_small(self, attrs): + pass + def end_small(self): + pass + + def start_sub(self, attrs): + pass + def end_sub(self): + pass + + def start_sup(self, attrs): + pass + def end_sup(self): + pass def start_tt(self, attrs): self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) def end_tt(self): self.formatter.pop_font() + + + # Linking + def start_a(self, attrs): href = '' name = '' @@ -394,7 +436,15 @@ def do_hr(self, attrs): self.formatter.add_hor_rule() - # --- Image + # --- Images and objects + + def do_area(self, attrs): + pass + + def start_iframe(self, attrs): + pass + def end_iframe(self): + pass def do_img(self, attrs): align = '' @@ -420,11 +470,215 @@ except ValueError: pass self.handle_image(src, alt, ismap, align, width, height) - # --- Really Old Unofficial Deprecated Stuff + def start_map(self, attrs): + pass + def end_map(self): + pass + + def start_object(self, attrs): + pass + def end_object(self): + pass + + def start_param(self, attrs): + pass + def end_param(self): + pass + + # --- Forms + + def start_button(self, attrs): + pass + def end_button(self): + pass + + def start_fieldset(self, attrs): + pass + def end_fieldset(self): + pass + + def start_form(self, attrs): + pass + def end_form(self): + pass + + def start_input(self, attrs): + pass + def end_input(self): + pass + + def start_label(self, attrs): + pass + def end_label(self): + pass + + def start_legend(self, attrs): + pass + def end_legend(self): + pass + + def start_optgroup(self, attrs): + pass + def end_optgroup(self): + pass + + def start_option(self, attrs): + pass + def end_option(self): + pass + + def start_select(self, attrs): + pass + def end_select(self): + pass + + def start_textarea(self, attrs): + pass + def end_textarea(self): + pass + + + # --- Table elements + + def start_caption(self, attrs): + pass + def end_caption(self): + pass + + def start_col(self, attrs): + pass + def end_col(self): + pass + + def start_colgroup(self, attrs): + pass + def end_colgroup(self): + pass + + def start_table(self, attrs): + pass + def end_table(self): + pass + + def start_tbody(self, attrs): + pass + def end_tbody(self): + pass + + def start_td(self, attrs): + pass + def end_td(self): + pass + + def start_tfoot(self, attrs): + pass + def end_tfoot(self): + pass + + def start_th(self, attrs): + pass + def end_th(self): + pass + + def start_thead(self, attrs): + pass + def end_thead(self): + pass + + def start_tr(self, attrs): + pass + def end_tr(self): + pass + + # --- Frames + + def start_frame(self, attrs): + pass + def end_frame(self): + pass + + def start_frameset(self, attrs): + pass + def end_frameset(self): + pass + + def start_noframes(self, attrs): + pass + def end_noframes(self): + pass + + + # --- Constructs deprecated in HTML 4.01 + + def start_applet(self, attrs): + pass + def end_applet(self): + pass + + def do_basefont(self, attrs): + pass + + def start_center(self, attrs): + pass + def end_center(self): + pass + + def start_dir(self, attrs): + self.start_ul(attrs) + def end_dir(self): + self.end_ul() + + def start_font(self, attrs): + pass + def end_font(self): + pass + + def do_isindex(self, attrs): + self.isindex = 1 + + def start_menu(self, attrs): + self.start_ul(attrs) + def end_menu(self): + self.end_ul() + + def start_s(self, attrs): + pass + def end_s(self): + pass + + def start_strike(self, attrs): + pass + def end_strike(self): + pass + + def start_u(self, attrs): + pass + def end_u(self): + pass + + + # --- Unofficial deprecated elements (pre-HTML 2.0, mostly) + + def start_listing(self, attrs): + self.start_pre(attrs) + self.setliteral('listing') # Tell SGML parser + + def end_listing(self): + self.end_pre() + + def do_nextid(self, attrs): # Deprecated + pass def do_plaintext(self, attrs): self.start_pre(attrs) self.setnomoretags() # Tell SGML parser + + def start_xmp(self, attrs): + self.start_pre(attrs) + self.setliteral('xmp') # Tell SGML parser + + def end_xmp(self): + self.end_pre() # --- Unhandled tags