from HTMLParser import HTMLParser as HP s = u""" \xe4 and " """.encode('UTF-8') print 'Without & in attribute' HP().feed(s.replace('&', '', 1)) print 'OK' print 'With & in attribute' HP().feed(s) print 'OK'