Index: Lib/html/parser.py
===================================================================
--- Lib/html/parser.py	(revision 63474)
+++ Lib/html/parser.py	(working copy)
@@ -10,6 +10,7 @@
 
 import markupbase
 import re
+import html.entities
 
 # Regular expressions used for parsing
 
@@ -45,7 +46,39 @@
 endendtag = re.compile('>')
 endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
 
+# remove HTML escape sequences
+def unescape(s):
+    """convert &...; escape sequences into unicode characters"""
+    
+    # many web pages accidentally use MS code page 1252 characters instead of iso-8859-1 or unicode characters
+    cp1252_to_unicode = {92:2019, 145:8216, 146:8217, 147:8220, 148:8221, 149:8226, 150:8211, 151:8212, 152:732, 153:8482}    
+    if '&' not in s:
+        return s
+    def replaceEntities(s):
+        s = s.groups()[0]
+        if s[0] == "#":
+            s = s[1:]
+            if s[0] in ['x','X']:
+                c = int(s[1:], 16)
+            else:
+                c = int(s)
+            if c in cp1252_to_unicode:
+                c = cp1252_to_unicode[c]
+            return unichr(c)
+        else:
+            try:
+                return unichr(html.entities.name2codepoint[s])
+            except KeyError:
+                # HTMLParser also supports apos, which is not in HTML 4                
+                if s == 'apos':
+                    return u"'"
+                else:
+                    return '&'+s+';'
 
+    return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
+                  replaceEntities, s)
+
+
 class HTMLParseError(Exception):
     """Exception raised for all parse errors."""
 
@@ -246,7 +279,7 @@
             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                  attrvalue[:1] == '"' == attrvalue[-1:]:
                 attrvalue = attrvalue[1:-1]
-                attrvalue = self.unescape(attrvalue)
+                attrvalue = unescape(attrvalue)
             attrs.append((attrname.lower(), attrvalue))
             k = m.end()
 
@@ -357,32 +390,3 @@
     def unknown_decl(self, data):
         self.error("unknown declaration: %r" % (data,))
 
-    # Internal -- helper to remove special character quoting
-    entitydefs = None
-    def unescape(self, s):
-        if '&' not in s:
-            return s
-        def replaceEntities(s):
-            s = s.groups()[0]
-            if s[0] == "#":
-                s = s[1:]
-                if s[0] in ['x','X']:
-                    c = int(s[1:], 16)
-                else:
-                    c = int(s)
-                return unichr(c)
-            else:
-                # Cannot use name2codepoint directly, because HTMLParser
-                # supports apos, which is not part of HTML 4
-                import html.entities
-                if HTMLParser.entitydefs is None:
-                    entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
-                    for k, v in html.entities.name2codepoint.iteritems():
-                        entitydefs[k] = unichr(v)
-                try:
-                    return self.entitydefs[s]
-                except KeyError:
-                    return '&'+s+';'
-
-        return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
-                      replaceEntities, s)
Index: Lib/test/test_htmlparser.py
===================================================================
--- Lib/test/test_htmlparser.py	(revision 63474)
+++ Lib/test/test_htmlparser.py	(working copy)
@@ -314,8 +314,17 @@
                 ])
 
 
+class UnescapeTestCase(unittest.TestCase):
+    def test_unescaping(self):
+        escaped = u"<p>There&#146;s the C&ocirc;te</p>"
+        correct_unescaped = u"<p>There"+unichr(8217) + u"s the C" + unichr(0x00f4) + u"te</p>"
+        actual_unescaped = html.parser.unescape(escaped)
+        if actual_unescaped != correct_unescaped:
+            self.fail ("failed to unescape properly")
+
 def test_main():
     test_support.run_unittest(HTMLParserTestCase)
+    test_support.run_unittest(UnescapeTestCase)
 
 
 if __name__ == "__main__":