Issue1452246
Created on 2006-03-17 11:57 by gnarfk, last changed 2006-04-01 01:16 by tim_one.
| Messages (3) | |||
|---|---|---|---|
| msg27815 - (view) | Author: Helmut Grohne (gnarfk) | Date: 2006-03-17 11:57 | |
I'd like to illustrate and suggest a fix by showing a
simple python file (which was named htmllib2.py so you
can uncomment the line in the doctest case to see that
my fix works). It's more like a hack than the fix though:
#!/usr/bin/env python2.4
"""
Use this instead of htmllib for having entitydefs
substituted in attributes,too.
Example:
>>> import htmllib
# >>> import htmllib2 as htmllib
>>> import formatter
>>> import StringIO
>>> s = StringIO.StringIO()
>>> p =
htmllib.HTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
>>> p.feed('<img alt="<>&">')
>>> s.getvalue()
'<>&'
"""
__all__ = ("HTMLParser",)
import htmllib
from htmlentitydefs import name2codepoint as entitytable
entitytable = dict([(k, chr(v)) for k, v in
entitytable.items() if v < 256])
def entitysub(s):
ret = ""
state = ""
for c in s:
if state.startswith('&'):
if c == ';':
ret += entitytable.get(state[1:], '%s;'
% state)
state = ""
else:
state += c
elif c == '&':
state = c
else:
ret += c
return ret
class HTMLParser(htmllib.HTMLParser):
def handle_starttag(self, tag, method, attrs):
"""Repair attribute values."""
attrs = [(k, entitysub(v)) for (k, v) in attrs]
method(attrs)
if __name__ == '__main__':
import doctest
doctest.testmod()
|
|||
| msg27816 - (view) | Author: Rares Vernica (rvernica) | Date: 2006-04-01 01:13 | |
Logged In: YES user_id=1491427 This bug has been fixed on patch #1462498. Ray |
|||
| msg27817 - (view) | Author: Tim Peters (tim_one) | Date: 2006-04-01 01:16 | |
Logged In: YES user_id=31435 Thanks, Ray! Closing as Fixed. |
|||
| History | |||
|---|---|---|---|
| Date | User | Action | Args |
| 2006-03-17 11:57:36 | gnarfk | create | |