--- HTMLParser.py 2009-07-24 13:11:43.000000000 +0200
+++ HTMLParser.py 2009-07-30 09:02:19.000000000 +0200
@@ -362,6 +362,12 @@
def unescape(self, s):
if '&' not in s:
return s
+
+ if isinstance(s, unicode):
+ chr_func = unichr
+ else:
+ chr_func = chr
+
def replaceEntities(s):
s = s.groups()[0]
if s[0] == "#":
@@ -370,15 +376,18 @@
c = int(s[1:], 16)
else:
c = int(s)
- return unichr(c)
+ return chr_func(c)
else:
# Cannot use name2codepoint directly, because HTMLParser supports apos,
# which is not part of HTML 4
import htmlentitydefs
if HTMLParser.entitydefs is None:
- entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
+ entitydefs = HTMLParser.entitydefs = {'apos' : type(s)("'")}
for k, v in htmlentitydefs.name2codepoint.iteritems():
- entitydefs[k] = unichr(v)
+ try:
+ entitydefs[k] = chr_func(v)
+ except ValueError:
+ pass
try:
return self.entitydefs[s]
except KeyError: