--- HTMLParser.py 2009-07-24 13:11:43.000000000 +0200 +++ HTMLParser.py 2009-07-30 09:02:19.000000000 +0200 @@ -362,6 +362,12 @@ def unescape(self, s): if '&' not in s: return s + + if isinstance(s, unicode): + chr_func = unichr + else: + chr_func = chr + def replaceEntities(s): s = s.groups()[0] if s[0] == "#": @@ -370,15 +376,18 @@ c = int(s[1:], 16) else: c = int(s) - return unichr(c) + return chr_func(c) else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 import htmlentitydefs if HTMLParser.entitydefs is None: - entitydefs = HTMLParser.entitydefs = {'apos':u"'"} + entitydefs = HTMLParser.entitydefs = {'apos' : type(s)("'")} for k, v in htmlentitydefs.name2codepoint.iteritems(): - entitydefs[k] = unichr(v) + try: + entitydefs[k] = chr_func(v) + except ValueError: + pass try: return self.entitydefs[s] except KeyError: