diff -r 1b58f14f5d60 Doc/library/html.rst
--- a/Doc/library/html.rst Wed Nov 20 17:44:28 2013 +0200
+++ b/Doc/library/html.rst Wed Nov 20 20:19:25 2013 +0200
@@ -31,6 +31,19 @@
.. versionadded:: 3.4
+.. function:: htmlcharrefreplace_errors(exception)
+
+ Implements the ``htmlcharrefreplace`` error handling (for encoding only): the
+ unencodable character is replaced by an appropriate HTML named or numeric
+ character reference. This error handler is not registered by default, you
+ should register it with :meth:`codecs.register_error`::
+
+ >>> codecs.register_error('htmlcharrefreplace', htmlcharrefreplace_errors)
+ >>> '∀ x∈ℜ'.encode('ascii', 'htmlcharrefreplace')
+ b'∀ x∈ℜ'
+
+ .. versionadded:: 3.4
+
--------------
Submodules in the ``html`` package are:
diff -r 1b58f14f5d60 Lib/html/__init__.py
--- a/Lib/html/__init__.py Wed Nov 20 17:44:28 2013 +0200
+++ b/Lib/html/__init__.py Wed Nov 20 20:19:25 2013 +0200
@@ -3,7 +3,7 @@
"""
import re as _re
-from html.entities import html5 as _html5
+from html.entities import codepoint2name as _codepoint2name, html5 as _html5
__all__ = ['escape', 'unescape']
@@ -130,3 +130,21 @@
if '&' not in s:
return s
return _charref.sub(_replace_charref, s)
+
+
+def htmlcharrefreplace_errors(exception):
+ """
+ Implements the 'htmlcharrefreplace' error handling, which replaces an
+ unencodable character with the appropriate HTML named or numeric
+ character reference.
+ """
+ if not isinstance(exception, UnicodeEncodeError):
+ raise exception
+ replace = []
+ for c in exception.object[exception.start:exception.end]:
+ n = ord(c)
+ try:
+ replace.append(r'&%s;' % _codepoint2name[n])
+ except KeyError:
+ replace.append(r'%d;' % n)
+ return ''.join(replace), exception.end
diff -r 1b58f14f5d60 Lib/test/test_html.py
--- a/Lib/test/test_html.py Wed Nov 20 17:44:28 2013 +0200
+++ b/Lib/test/test_html.py Wed Nov 20 20:19:25 2013 +0200
@@ -2,6 +2,7 @@
Tests for the html module functions.
"""
+import codecs
import html
import unittest
from test.support import run_unittest
@@ -99,6 +100,14 @@
'ÉricÉric&alphacentauriαcentauri')
check('&co;', '&co;')
+ def test_htmlcharrefreplace(self):
+ codecs.register_error('htmlcharrefreplace',
+ html.htmlcharrefreplace_errors)
+ self.assertEqual(
+ '[$\xa5\u20a3\u20ac\U0001d56b]'.encode('latin1',
+ 'htmlcharrefreplace'),
+ b'[$\xa5₣€𝕫]')
+
if __name__ == '__main__':
unittest.main()