Index: Include/codecs.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/codecs.h,v
retrieving revision 2.3
diff -u -r2.3 codecs.h
--- Include/codecs.h	3 Aug 2000 16:24:24 -0000	2.3
+++ Include/codecs.h	29 May 2002 20:43:58 -0000
@@ -117,7 +117,117 @@
        const char *errors
        );
 
+/* Unicode encoding error handling callback registry API */
+
+/* Register the encoding error handling callback function error under
+   the name name this function will be called by the encoder when it
+   encounters an unencodable character, name is specified as the
+   error parameter in the call to the encode function.
+   Return 0 on success, -1 on error */
+extern DL_IMPORT(int) PyCodec_RegisterUnicodeEncodeErrorHandler(
+       const char *name,
+       PyObject *error
+       );
+
+/* Lookup the error handling callback function registered under the
+   name error if error is a string or unicode object. As a special
+   case NULL can be passed, in which case the error handling callback
+   for strict encoding will be returned. */
+extern DL_IMPORT(PyObject *) PyCodec_LookupUnicodeEncodeErrorHandler(
+       const char *name
+       );
+
+/* Raises a Unicode exception */
+extern DL_IMPORT(void) PyCodec_RaiseUnicodeEncodeError(
+       const char *encoding,
+       const Py_UNICODE *str,
+       int startpos,
+       int endpos,
+       const char *reason
+       );
+
+/* Encode error handler that raises an exception */
+extern DL_IMPORT(PyObject *) PyCodec_RaiseUnicodeEncodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Encode error handler that returns a empty string and so
+   ignores the unencodable characters */
+extern DL_IMPORT(PyObject *) PyCodec_IgnoreUnicodeEncodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Encode error handler that returns questions marks for each
+   unencodable character */
+extern DL_IMPORT(PyObject *) PyCodec_ReplaceUnicodeEncodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Encode error handler that returns XML character references
+   for the unencodable characters */
+extern DL_IMPORT(PyObject *) PyCodec_XMLCharRefReplaceUnicodeEncodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Encode error handler that returns an \x (or \u or \U) escape sequence
+   for each unencodable character */
+extern DL_IMPORT(PyObject *) PyCodec_BackslashReplaceUnicodeEncodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+
+/* Unicode decoding error handling callback registry API */
+
+/* Register the decoding error handling callback function error under
+   the name name. Return 0 on success, -1 on error */
+extern DL_IMPORT(int) PyCodec_RegisterUnicodeDecodeErrorHandler(
+       const char *name,
+       PyObject *error
+       );
+
+/* Lookup the decoding error handling callback function registered
+   under the name name. As a special case NULL can be passed, in which
+   case the error handling callback for strict encoding will be returned. */
+extern DL_IMPORT(PyObject *) PyCodec_LookupUnicodeDecodeErrorHandler(
+       const char *name
+       );
+
+/* Raises a Unicode exception */
+extern DL_IMPORT(void) PyCodec_RaiseUnicodeDecodeError(
+       const char *encoding,
+       const char *str,
+       int startpos,
+       int endpos,
+       const char *reason
+       );
+
+/* Decode error handler that raises an exception */
+extern DL_IMPORT(PyObject *) PyCodec_RaiseUnicodeDecodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Decode error handler that returns a empty string and so
+   ignores the undecodable bytes (probably resulting in
+   more errors from the next bytes) */
+extern DL_IMPORT(PyObject *) PyCodec_IgnoreUnicodeDecodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
+/* Decode error handler that returns "?" as a replacement for
+   the undecodable bytes. */
+extern DL_IMPORT(PyObject *) PyCodec_ReplaceUnicodeDecodeErrors(
+       PyObject *self,
+       PyObject *args
+       );
+
 #ifdef __cplusplus
 }
 #endif
-#endif /* !Py_CODECREGISTRY_H */
+#endif
Index: Lib/codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.24
diff -u -r1.24 codecs.py
--- Lib/codecs.py	5 Mar 2002 15:46:38 -0000	1.24
+++ Lib/codecs.py	29 May 2002 20:43:58 -0000
@@ -18,7 +18,13 @@
           'Failed to load the builtin codecs: %s' % why
 
 __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
-           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE"]
+           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
+	   "raise_unicodeencode_errors", "ignore_unicodeencode_errors",
+	   "replace_unicodeencode_errors", "xmlcharrefreplace_unicodeencode_errors",
+	   "backslashreplace_unicodeencode_errors", "raise_unicodedecode_errors",
+	   "ignore_unicodedecode_errors", "replace_unicodedecode_errors",
+	   "register_unicodeencodeerrorhandler", "lookup_unicodeencodeerrorhandler",
+	   "register_unicodedecodeerrorhandler", "lookup_unicodedecodeerrorhandler" ]
 
 ### Constants
 
Index: Modules/_codecsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_codecsmodule.c,v
retrieving revision 2.11
diff -u -r2.11 _codecsmodule.c
--- Modules/_codecsmodule.c	17 Jan 2002 23:15:58 -0000	2.11
+++ Modules/_codecsmodule.c	29 May 2002 20:44:03 -0000
@@ -664,6 +664,56 @@
 #endif /* MS_WIN32 */
 #endif /* Py_USING_UNICODE */
 
+/* --- Error handler registry --------------------------------------------- */
+
+static PyObject *register_unicodeencodeerrorhandler(PyObject *self, PyObject *args)
+{
+    const char *name;
+    PyObject *handler;
+
+    if (!PyArg_ParseTuple(args, "sO:register_unicodeencodeerrorhandler",
+			  &name, &handler))
+	return NULL;
+    if (PyCodec_RegisterUnicodeEncodeErrorHandler(name, handler))
+        return NULL;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyObject *lookup_unicodeencodeerrorhandler(PyObject *self, PyObject *args)
+{
+    const char *name;
+
+    if (!PyArg_ParseTuple(args, "s:lookup_unicodeencodeerrorhandler",
+			  &name))
+	return NULL;
+    return PyCodec_LookupUnicodeEncodeErrorHandler(name);
+}
+
+static PyObject *register_unicodedecodeerrorhandler(PyObject *self, PyObject *args)
+{
+    const char *name;
+    PyObject *handler;
+
+    if (!PyArg_ParseTuple(args, "sO:register_unicodedecodeerrorhandler",
+			  &name, &handler))
+	return NULL;
+    if (PyCodec_RegisterUnicodeDecodeErrorHandler(name, handler))
+        return NULL;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyObject *lookup_unicodedecodeerrorhandler(PyObject *self, PyObject *args)
+{
+    const char *name;
+
+    if (!PyArg_ParseTuple(args, "s:lookup_unicodedecodeerrorhandler",
+			  &name))
+	return NULL;
+    return PyCodec_LookupUnicodeDecodeErrorHandler(name);
+}
+
 /* --- Module API --------------------------------------------------------- */
 
 static PyMethodDef _codecs_functions[] = {
@@ -699,7 +749,31 @@
     {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
     {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
 #endif
+    {"raise_unicodeencode_errors",
+        PyCodec_RaiseUnicodeEncodeErrors, METH_VARARGS},
+    {"ignore_unicodeencode_errors",
+        PyCodec_IgnoreUnicodeEncodeErrors, METH_VARARGS},
+    {"replace_unicodeencode_errors",
+        PyCodec_ReplaceUnicodeEncodeErrors, METH_VARARGS},
+    {"xmlcharrefreplace_unicodeencode_errors",
+        PyCodec_XMLCharRefReplaceUnicodeEncodeErrors, METH_VARARGS},
+    {"backslashreplace_unicodeencode_errors",
+        PyCodec_BackslashReplaceUnicodeEncodeErrors, METH_VARARGS},
+    {"raise_unicodedecode_errors",
+        PyCodec_RaiseUnicodeDecodeErrors, METH_VARARGS},
+    {"ignore_unicodedecode_errors",
+        PyCodec_IgnoreUnicodeDecodeErrors, METH_VARARGS},
+    {"replace_unicodedecode_errors",
+        PyCodec_ReplaceUnicodeDecodeErrors, METH_VARARGS},
 #endif /* Py_USING_UNICODE */
+    {"register_unicodeencodeerrorhandler",
+        register_unicodeencodeerrorhandler, METH_VARARGS},
+    {"lookup_unicodeencodeerrorhandler",
+        lookup_unicodeencodeerrorhandler, METH_VARARGS},
+    {"register_unicodedecodeerrorhandler",
+        register_unicodedecodeerrorhandler, METH_VARARGS},
+    {"lookup_unicodedecodeerrorhandler",
+        lookup_unicodedecodeerrorhandler, METH_VARARGS},
     {NULL, NULL}		/* sentinel */
 };
 
Index: Objects/stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.164
diff -u -r2.164 stringobject.c
--- Objects/stringobject.c	24 May 2002 19:01:58 -0000	2.164
+++ Objects/stringobject.c	29 May 2002 20:44:04 -0000
@@ -2211,7 +2211,9 @@
 Encodes S using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.";
+a ValueError. Other possible values are 'ignore', 'replace' and\n\
+'xmlcharrefreplace' as well as any other name registered with\n\
+codecs.register_unicodeencodeerrorhandler.";
 
 static PyObject *
 string_encode(PyStringObject *self, PyObject *args)
@@ -2230,7 +2232,9 @@
 Decodes S using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.";
+a ValueError. Other possible values are 'ignore' and 'replace' as well\n\
+as any other name registerd with\n\
+codecs.register_unicodedecodeerrorhandler.";
 
 static PyObject *
 string_decode(PyStringObject *self, PyObject *args)
Index: Objects/unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.149
diff -u -r2.149 unicodeobject.c
--- Objects/unicodeobject.c	24 May 2002 19:01:59 -0000	2.149
+++ Objects/unicodeobject.c	29 May 2002 20:44:05 -0000
@@ -489,8 +489,8 @@
 			   const char *errors)
 {
     PyObject *buffer = NULL, *unicode;
-    
-    if (encoding == NULL) 
+
+    if (encoding == NULL)
 	encoding = PyUnicode_GetDefaultEncoding();
 
     /* Shortcuts for common default encodings */
@@ -641,6 +641,89 @@
     return -1;
 }
 
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   if no exception occured, copy the replacement to the output
+   and adjust various state variables.
+   return 0 on success, -1 on error
+*/
+
+static
+int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
+                 const char *encoding, const char *reason,
+                 const char *input, int insize, int *startinpos, int *endinpos, PyObject **inputObject, const char **inptr,
+                 PyObject **output, int *outpos, Py_UNICODE **outptr)
+{
+    static char *argparse = "O!i;decoding error handler must return (unicode, int) tuple";
+
+    PyObject *args;
+    PyObject *restuple = NULL;
+    PyObject *repunicode = NULL;
+    int outsize = PyUnicode_GET_SIZE(*output);
+    int requiredsize;
+    int newpos;
+    Py_UNICODE *repptr;
+    int repsize;
+    int res = -1;
+
+    if (*errorHandler == NULL) {
+	*errorHandler = PyCodec_LookupUnicodeDecodeErrorHandler(errors);
+	if (*errorHandler == NULL)
+	   goto onError;
+    }
+
+    if (*inputObject == NULL) {
+	*inputObject = PyString_FromStringAndSize(input, insize);
+	if (*inputObject == NULL)
+	   goto onError;
+    }
+
+    /* we don't need a state => use None */
+    args = Py_BuildValue("sOiisO", encoding, *inputObject, *startinpos, *endinpos, reason, Py_None);
+    if (args == NULL)
+	goto onError;
+    restuple = PyEval_CallObject(*errorHandler, args);
+    Py_DECREF(args);
+    if (restuple == NULL)
+	goto onError;
+    if (!PyTuple_Check(restuple)) {
+	PyErr_Format(PyExc_TypeError, &argparse[4]);
+	goto onError;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+	goto onError;
+    if (newpos<0)
+	newpos = 0;
+    else if (newpos>insize)
+	newpos = insize;
+
+    /* need more space? (at least enough for what we
+       have+the replacement+the rest of the string (starting
+       at the new input position), so we won't have to check space
+       when there are no errors in the rest of the string) */
+    repptr = PyUnicode_AS_UNICODE(repunicode);
+    repsize = PyUnicode_GET_SIZE(repunicode);
+    requiredsize = *outpos + repsize + insize-newpos;
+    if (requiredsize > outsize) {
+	if (requiredsize<2*outsize)
+	    requiredsize = 2*outsize;
+	if (PyUnicode_Resize(output, requiredsize))
+	    goto onError;
+	*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
+    }
+    *endinpos = newpos;
+    *inptr = input + newpos;
+    Py_UNICODE_COPY(*outptr, repptr, repsize);
+    *outptr += repsize;
+    *outpos += repsize;
+    /* we made it! */
+    res = 0;
+
+    onError:
+    Py_XDECREF(restuple);
+    return res;
+}
+
 /* --- UTF-7 Codec -------------------------------------------------------- */
 
 /* see RFC2152 for details */
@@ -699,40 +782,14 @@
 		} \
     } \
 
-static
-int utf7_decoding_error(Py_UNICODE **dest,
-                        const char *errors,
-                        const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-7 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        if (dest != NULL) {
-            **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-            (*dest)++;
-        }
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-7 decoding error; unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeUTF7(const char *s,
 			       int size,
 			       const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
     const char *e;
     PyUnicodeObject *unicode;
     Py_UNICODE *p;
@@ -740,7 +797,9 @@
     int inShift = 0;
     unsigned int bitsleft = 0;
     unsigned long charsleft = 0;
-	int surrogate = 0;
+    int surrogate = 0;
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
 
     unicode = _PyUnicode_New(size);
     if (!unicode)
@@ -752,7 +811,9 @@
     e = s + size;
 
     while (s < e) {
-        Py_UNICODE ch = *s;
+        Py_UNICODE ch;
+        restart:
+        ch = *s;
 
         if (inShift) {
             if ((ch == '-') || !B64CHAR(ch)) {
@@ -797,6 +858,7 @@
             }
         }
         else if ( ch == '+' ) {
+            startinpos = s-starts;
             s++;
             if (s < e && *s == '-') {
                 s++;
@@ -818,21 +880,39 @@
         }
         continue;
     utf7Error:
-      if (utf7_decoding_error(&p, errors, errmsg))
-          goto onError;
+        outpos = p-PyUnicode_AS_UNICODE(unicode);
+        endinpos = s-starts;
+        if (unicode_decode_call_errorhandler(
+             errors, &errorHandler,
+             "utf7", errmsg,
+             starts, size, &startinpos, &endinpos, &inputObject, &s,
+             (PyObject **)&unicode, &outpos, &p))
+        goto onError;
     }
 
     if (inShift) {
-        if (utf7_decoding_error(&p, errors, "unterminated shift sequence"))
+        outpos = p-PyUnicode_AS_UNICODE(unicode);
+        endinpos = size;
+        if (unicode_decode_call_errorhandler(
+             errors, &errorHandler,
+             "utf7", "unterminated shift sequence",
+             starts, size, &startinpos, &endinpos, &inputObject, &s,
+             (PyObject **)&unicode, &outpos, &p))
             goto onError;
+        if (s < e)
+           goto restart;
     }
 
-    if (_PyUnicode_Resize(&unicode, p - unicode->str))
+    if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)))
         goto onError;
 
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)unicode;
 
 onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     Py_DECREF(unicode);
     return NULL;
 }
@@ -962,46 +1042,21 @@
     4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
 };
 
-static
-int utf8_decoding_error(const char **source,
-                        Py_UNICODE **dest,
-                        const char *errors,
-                        const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-8 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        (*source)++;
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        (*source)++;
-        **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-        (*dest)++;
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-8 decoding error; unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeUTF8(const char *s,
 			       int size,
 			       const char *errors)
 {
+    const char *starts = s;
     int n;
+    int startinpos;
+    int endinpos;
+    int outpos;
     const char *e;
     PyUnicodeObject *unicode;
     Py_UNICODE *p;
     const char *errmsg = "";
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
@@ -1028,6 +1083,8 @@
 
         if (s + n > e) {
 	    errmsg = "unexpected end of data";
+	    startinpos = s-starts;
+	    endinpos = size;
 	    goto utf8Error;
 	}
 
@@ -1035,19 +1092,27 @@
 
         case 0:
             errmsg = "unexpected code byte";
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
 	    goto utf8Error;
 
         case 1:
             errmsg = "internal error";
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
 	    goto utf8Error;
 
         case 2:
             if ((s[1] & 0xc0) != 0x80) {
                 errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+2;
 		goto utf8Error;
 	    }
             ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
             if (ch < 0x80) {
+		startinpos = s-starts;
+		endinpos = startinpos+2;
                 errmsg = "illegal encoding";
 		goto utf8Error;
 	    }
@@ -1059,6 +1124,8 @@
             if ((s[1] & 0xc0) != 0x80 || 
                 (s[2] & 0xc0) != 0x80) {
                 errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+3;
 		goto utf8Error;
 	    }
             ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
@@ -1071,6 +1138,8 @@
 		       unit.
 		*/
                 errmsg = "illegal encoding";
+		startinpos = s-starts;
+		endinpos = startinpos+3;
 		goto utf8Error;
 	    }
 	    else
@@ -1082,6 +1151,8 @@
                 (s[2] & 0xc0) != 0x80 ||
                 (s[3] & 0xc0) != 0x80) {
                 errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+4;
 		goto utf8Error;
 	    }
             ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
@@ -1093,6 +1164,8 @@
 					 UTF-16 */
 	    {
                 errmsg = "illegal encoding";
+		startinpos = s-starts;
+		endinpos = startinpos+4;
 		goto utf8Error;
 	    }
 #ifdef Py_UNICODE_WIDE
@@ -1114,23 +1187,34 @@
         default:
             /* Other sizes are only needed for UCS-4 */
             errmsg = "unsupported Unicode code range";
+	    startinpos = s-starts;
+	    endinpos = startinpos+n;
 	    goto utf8Error;
         }
         s += n;
 	continue;
 	
     utf8Error:
-      if (utf8_decoding_error(&s, &p, errors, errmsg))
-          goto onError;
+    outpos = p-PyUnicode_AS_UNICODE(unicode);
+    if (unicode_decode_call_errorhandler(
+	     errors, &errorHandler,
+	     "utf8", errmsg,
+	     starts, size, &startinpos, &endinpos, &inputObject, &s,
+	     (PyObject **)&unicode, &outpos, &p))
+	goto onError;
     }
 
     /* Adjust length */
     if (_PyUnicode_Resize(&unicode, p - unicode->str))
         goto onError;
 
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)unicode;
 
 onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     Py_DECREF(unicode);
     return NULL;
 }
@@ -1248,43 +1332,16 @@
 
 /* --- UTF-16 Codec ------------------------------------------------------- */
 
-static
-int utf16_decoding_error(Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-16 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	if (dest) {
-	    **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	    (*dest)++;
-	}
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-16 decoding error; "
-		     "unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *
 PyUnicode_DecodeUTF16(const char *s,
 		      int size,
 		      const char *errors,
 		      int *byteorder)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
     PyUnicodeObject *unicode;
     Py_UNICODE *p;
     const unsigned char *q, *e;
@@ -1296,13 +1353,8 @@
 #else
     int ihi = 0, ilo = 1;
 #endif
-
-    /* size should be an even number */
-    if (size & 1) {
-        if (utf16_decoding_error(NULL, errors, "truncated data"))
-            return NULL;
-        --size;  /* else ignore the oddball byte */
-    }
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
@@ -1359,7 +1411,18 @@
     }
 
     while (q < e) {
-	Py_UNICODE ch = (q[ihi] << 8) | q[ilo];
+	Py_UNICODE ch;
+	/* remaing bytes at the end? (size should be even) */
+	if (e-q<2) {
+	    errmsg = "truncated data";
+	    startinpos = ((const char *)q)-starts;
+	    endinpos = ((const char *)e)-starts;
+	    goto utf16Error;
+	    /* The remaining input chars are ignored if the callback
+	       chooses to skip the input */
+	}
+	ch = (q[ihi] << 8) | q[ilo];
+
 	q += 2;
 
 	if (ch < 0xD800 || ch > 0xDFFF) {
@@ -1370,6 +1433,8 @@
 	/* UTF-16 code pair: */
 	if (q >= e) {
 	    errmsg = "unexpected end of data";
+	    startinpos = (((const char *)q)-2)-starts;
+	    endinpos = ((const char *)e)-starts;
 	    goto utf16Error;
 	}
 	if (0xD800 <= ch && ch <= 0xDBFF) {
@@ -1386,15 +1451,24 @@
 	    }
 	    else {
                 errmsg = "illegal UTF-16 surrogate";
+		startinpos = (((const char *)q)-4)-starts;
+		endinpos = startinpos+2;
 		goto utf16Error;
 	    }
 
 	}
 	errmsg = "illegal encoding";
+	startinpos = (((const char *)q)-2)-starts;
+	endinpos = startinpos+2;
 	/* Fall through to report the error */
 
     utf16Error:
-	if (utf16_decoding_error(&p, errors, errmsg))
+	outpos = p-PyUnicode_AS_UNICODE(unicode);
+	if (unicode_decode_call_errorhandler(
+	         errors, &errorHandler,
+	         "utf16", errmsg,
+	         starts, size, &startinpos, &endinpos, &inputObject, (const char **)&q,
+	         (PyObject **)&unicode, &outpos, &p))
 	    goto onError;
     }
 
@@ -1405,10 +1479,14 @@
     if (_PyUnicode_Resize(&unicode, p - unicode->str))
         goto onError;
 
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)unicode;
 
 onError:
     Py_DECREF(unicode);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return NULL;
 }
 
@@ -1489,70 +1567,50 @@
 
 /* --- Unicode Escape Codec ----------------------------------------------- */
 
-static
-int unicodeescape_decoding_error(Py_UNICODE **x,
-                                 const char *errors,
-                                 const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "Unicode-Escape decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        **x = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*x)++;
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "Unicode-Escape decoding error; "
-                     "unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
 
 PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
 					int size,
 					const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    int i;
     PyUnicodeObject *v;
-    Py_UNICODE *p, *buf;
+    Py_UNICODE *p;
     const char *end;
     char* message;
     Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
 
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
-       length after conversion to the true value. */
+       length after conversion to the true value.
+       (but if the error callback returns a long replacement string
+       we'll have to allocate more space) */
     v = _PyUnicode_New(size);
     if (v == NULL)
         goto onError;
     if (size == 0)
         return (PyObject *)v;
 
-    p = buf = PyUnicode_AS_UNICODE(v);
+    p = PyUnicode_AS_UNICODE(v);
     end = s + size;
 
     while (s < end) {
         unsigned char c;
         Py_UNICODE x;
-        int i, digits;
+        int digits;
 
         /* Non-escape characters are interpreted as Unicode ordinals */
         if (*s != '\\') {
             *p++ = (unsigned char) *s++;
             continue;
         }
-
+        startinpos = s-starts;
         /* \ - Escapes */
         s++;
         switch (*s++) {
@@ -1601,14 +1659,28 @@
             message = "truncated \\UXXXXXXXX escape";
         hexescape:
             chr = 0;
-            for (i = 0; i < digits; i++) {
+            outpos = p-PyUnicode_AS_UNICODE(v);
+            if (s+digits>end) {
+                endinpos = size;
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", "end of string in escape sequence",
+                    starts, size, &startinpos, &endinpos, &inputObject, &s,
+                    (PyObject **)&v, &outpos, &p))
+                    goto onError;
+                goto nextByte;
+            }
+            for (i = 0; i < digits; ++i) {
                 c = (unsigned char) s[i];
                 if (!isxdigit(c)) {
-                    if (unicodeescape_decoding_error(&p, errors, message))
+                    endinpos = (s+i+1)-starts;
+                    if (unicode_decode_call_errorhandler(
+                        errors, &errorHandler,
+                        "unicodeescape", message,
+                        starts, size, &startinpos, &endinpos, &inputObject, &s,
+                        (PyObject **)&v, &outpos, &p))
                         goto onError;
-                    chr = 0xffffffff;
-                    i++;
-                    break;
+                    goto nextByte;
                 }
                 chr = (chr<<4) & ~0xF;
                 if (c >= '0' && c <= '9')
@@ -1620,9 +1692,9 @@
             }
             s += i;
             if (chr == 0xffffffff)
-                    /* _decoding_error will have already written into the
-                       target buffer. */
-                    break;
+                /* _decoding_error will have already written into the
+                   target buffer. */
+                break;
         store:
             /* when we get here, chr is a 32-bit unicode character */
             if (chr <= 0xffff)
@@ -1639,10 +1711,13 @@
                 *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
 #endif
             } else {
-                if (unicodeescape_decoding_error(
-                    &p, errors,
-                    "illegal Unicode character")
-                    )
+                endinpos = s-starts;
+                outpos = p-PyUnicode_AS_UNICODE(v);
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", "illegal Unicode character",
+                    starts, size, &startinpos, &endinpos, &inputObject, &s,
+                    (PyObject **)&v, &outpos, &p))
                     goto onError;
             }
             break;
@@ -1678,13 +1753,28 @@
                         goto store;
                 }
             }
-            if (unicodeescape_decoding_error(&p, errors, message))
+            /* s--; */
+            endinpos = s-starts;
+            outpos = p-PyUnicode_AS_UNICODE(v);
+            if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "unicodeescape", message,
+                starts, size, &startinpos, &endinpos, &inputObject, &s,
+                (PyObject **)&v, &outpos, &p))
                 goto onError;
             break;
 
         default:
             if (s > end) {
-                if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
+                message = "\\ at end of string";
+                s--;
+                endinpos = s-starts;
+                outpos = p-PyUnicode_AS_UNICODE(v);
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", message,
+                    starts, size, &startinpos, &endinpos, &inputObject, &s,
+                    (PyObject **)&v, &outpos, &p))
                     goto onError;
             }
             else {
@@ -1693,9 +1783,11 @@
             }
             break;
         }
+        nextByte:
+        ;
     }
-    if (_PyUnicode_Resize(&v, (int)(p - buf)))
-                goto onError;
+    if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
+        goto onError;
     return (PyObject *)v;
 
 ucnhashError:
@@ -1703,10 +1795,14 @@
         PyExc_UnicodeError,
         "\\N escapes not supported (can't load unicodedata module)"
         );
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return NULL;
 
 onError:
     Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return NULL;
 }
 
@@ -1870,20 +1966,27 @@
 					   int size,
 					   const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
     PyUnicodeObject *v;
-    Py_UNICODE *p, *buf;
+    Py_UNICODE *p;
     const char *end;
     const char *bs;
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
     
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
-       length after conversion to the true value. */
+       length after conversion to the true value. (But decoding error
+       handler might have to resize the string) */
     v = _PyUnicode_New(size);
     if (v == NULL)
 	goto onError;
     if (size == 0)
 	return (PyObject *)v;
-    p = buf = PyUnicode_AS_UNICODE(v);
+    p = PyUnicode_AS_UNICODE(v);
     end = s + size;
     while (s < end) {
 	unsigned char c;
@@ -1895,6 +1998,7 @@
 	    *p++ = (unsigned char)*s++;
 	    continue;
 	}
+	startinpos = s-starts;
 
 	/* \u-escapes are only interpreted iff the number of leading
 	   backslashes if odd */
@@ -1913,15 +2017,18 @@
 	s++;
 
 	/* \uXXXX with 4 hex digits */
-	for (x = 0, i = 0; i < 4; i++) {
-	    c = (unsigned char)s[i];
+	outpos = p-PyUnicode_AS_UNICODE(v);
+	for (x = 0, i = 0; i < 4; ++i, ++s) {
+	    c = (unsigned char)*s;
 	    if (!isxdigit(c)) {
-		if (unicodeescape_decoding_error(&p, errors,
-						 "truncated \\uXXXX"))
+		endinpos = s-starts;
+		if (unicode_decode_call_errorhandler(
+		    errors, &errorHandler,
+		    "rawunicodeescape", "truncated \\uXXXX",
+		    starts, size, &startinpos, &endinpos, &inputObject, &s,
+		    (PyObject **)&v, &outpos, &p))
 		    goto onError;
-		x = 0xffffffff;
-		i++;
-		break;
+		goto nextByte;
 	    }
 	    x = (x<<4) & ~0xF;
 	    if (c >= '0' && c <= '9')
@@ -1931,16 +2038,20 @@
 	    else
 		x += 10 + c - 'A';
 	}
-	s += i;
-	if (x != 0xffffffff)
-		*p++ = x;
+	*p++ = x;
+	nextByte:
+	;
     }
-    if (_PyUnicode_Resize(&v, (int)(p - buf)))
+    if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)v;
     
  onError:
     Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return NULL;
 }
 
@@ -2020,71 +2131,237 @@
     return NULL;
 }
 
-static
-int latin1_encoding_error(const Py_UNICODE **source,
-			  char **dest,
-			  const char *errors,
-			  const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "Latin-1 encoding error: %.400s",
-		     details);
-	return -1;
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   put the result into newpos and return the replacement string, which
+   has to be freed by the caller
+*/
+
+static PyObject *unicode_encode_call_errorhandler(const char *errors, PyObject **errorHandler,
+                 const char *encoding, const char *reason,
+                 const Py_UNICODE *unicode, int size, PyObject **unicodeObject, int startpos, int endpos,
+                 int *newpos)
+{
+    static char *argparse = "O!i;encoding error handler must return (unicode, int) tuple";
+
+    PyObject *args;
+    PyObject *restuple;
+    PyObject *resunicode;
+
+    if (*errorHandler == NULL) {
+	*errorHandler = PyCodec_LookupUnicodeEncodeErrorHandler(errors);
+        if (*errorHandler == NULL)
+	    return NULL;
     }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
+
+    if (*unicodeObject == NULL) {
+	*unicodeObject = PyUnicode_FromUnicode(unicode, size);
+        if (*unicodeObject == NULL)
+	    return NULL;
     }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
+
+    /* we don't need a state */
+    args = Py_BuildValue("sOiisO", encoding, *unicodeObject, startpos, endpos, reason, Py_None);
+    if (args == NULL)
+	return NULL;
+    restuple = PyEval_CallObject(*errorHandler, args);
+    Py_DECREF(args);
+    if (restuple == NULL)
+	return NULL;
+    if (!PyTuple_Check(restuple)) {
+	PyErr_Format(PyExc_TypeError, &argparse[4]);
+	Py_DECREF(restuple);
+	return NULL;
     }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "Latin-1 encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &resunicode, newpos)) {
+	Py_DECREF(restuple);
+	return NULL;
     }
+    if (*newpos<0)
+	*newpos = 0;
+    else if (*newpos>size)
+	*newpos = size;
+    Py_INCREF(resunicode);
+    Py_DECREF(restuple);
+    return resunicode;
 }
 
-PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 				 int size,
-				 const char *errors)
+				 const char *errors,
+				 int limit)
 {
-    PyObject *repr;
-    char *s, *start;
-
-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
+    /* output object */
+    PyObject *res;
+    /* object version of input */
+    PyObject *unicodeObject = NULL;
+    /* pointers to the beginning and end+1 of input */
+    const Py_UNICODE *startp = p;
+    const Py_UNICODE *endp = p + size;
+    /* pointer to the beginning of the unencodable characters */
+    /* const Py_UNICODE *badp = NULL; */
+    /* pointer into the output */
+    char *str;
+    /* current output position */
+    int respos = 0;
+    int ressize;
+    char *encoding = (limit == 256) ? "latin-1" : "ascii";
+    char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
+    PyObject *errorHandler = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
+
+    /* allocate enough for a simple encoding without
+       replacements, if we need more, we'll resize */
+    res = PyString_FromStringAndSize(NULL, size);
+    if (res == NULL)
+        goto onError;
     if (size == 0)
-	return repr;
-
-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 256) {
-	    if (latin1_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(256)"))
-		goto onError;
+	return res;
+    str = PyString_AS_STRING(res);
+    ressize = size;
+
+    while (p<endp) {
+	Py_UNICODE c = *p;
+
+	/* can we encode this? */
+	if (c<limit) {
+	    /* no overflow check, because we know that the space is enough */
+	    *str++ = (char)c;
+	    ++p;
+	}
+	else {
+	    int unicodepos = p-startp;
+	    int requiredsize;
+	    PyObject *repunicode;
+	    int repsize;
+	    int newpos;
+	    int respos;
+	    Py_UNICODE *uni2;
+	    /* startpos for collecting unencodable chars */
+	    const Py_UNICODE *collstart = p;
+	    const Py_UNICODE *collend = p;
+	    /* find all unecodable characters */
+	    while ((collend < endp) && ((*collend)>=limit))
+		++collend;
+	    /* cache callback name lookup (if not done yet, i.e. it's the first error) */
+	    if (known_errorHandler==-1) {
+		if ((errors==NULL) || (!strcmp(errors, "strict")))
+		    known_errorHandler = 1;
+		else if (!strcmp(errors, "replace"))
+		    known_errorHandler = 2;
+		else if (!strcmp(errors, "ignore"))
+		    known_errorHandler = 3;
+		else if (!strcmp(errors, "xmlcharrefreplace"))
+		    known_errorHandler = 4;
+		else
+		    known_errorHandler = 0;
+	    }
+	    switch (known_errorHandler) {
+		case 1: /* strict */
+		    PyCodec_RaiseUnicodeEncodeError(encoding, startp, collstart-startp, collend-startp, reason);
+		    goto onError;
+		case 2: /* replace */
+		    while (collstart++<collend)
+			*str++ = '?'; /* fall through */
+		case 3: /* ignore */
+		    p = collend;
+		    break;
+		case 4: /* xmlcharrefreplace */
+		    respos = str-PyString_AS_STRING(res);
+		    /* determine replacement size (temporarily (mis)uses p) */
+		    for (p = collstart, repsize = 0; p < collend; ++p) {
+			if (*p<10)
+			    repsize += 2+1+1;
+			else if (*p<100)
+			    repsize += 2+2+1;
+			else if (*p<1000)
+			    repsize += 2+3+1;
+			else if (*p<10000)
+			    repsize += 2+4+1;
+			else if (*p<100000)
+			    repsize += 2+5+1;
+			else if (*p<1000000)
+			    repsize += 2+6+1;
+			else
+			    repsize += 2+7+1;
+		    }
+		    requiredsize = respos+repsize+(endp-collend);
+		    if (requiredsize > ressize) {
+			if (requiredsize<2*ressize)
+			    requiredsize = 2*ressize;
+			if (_PyString_Resize(&res, requiredsize))
+			    goto onError;
+			str = PyString_AS_STRING(res) + respos;
+			ressize = requiredsize;
+		    }
+		    /* generate replacement (temporarily (mis)uses p) */
+		    for (p = collstart; p < collend; ++p) {
+			str += sprintf(str, "&#%d;", (int)*p);
+		    }
+		    p = collend;
+		    break;
+		default:
+		    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+			encoding, reason, startp, size, &unicodeObject, collstart-startp, collend-startp, &newpos);
+		    if (repunicode == NULL)
+			goto onError;
+		    /* need more space? (at least enough for what we
+		       have+the replacement+the rest of the string, so
+		       we won't have to check space for encodable characters) */
+		    respos = str-PyString_AS_STRING(res);
+		    repsize = PyUnicode_GET_SIZE(repunicode);
+		    requiredsize = respos+repsize+(endp-collend);
+		    if (requiredsize > ressize) {
+			if (requiredsize<2*ressize)
+			    requiredsize = 2*ressize;
+			if (_PyString_Resize(&res, requiredsize)) {
+			    Py_DECREF(repunicode);
+			    goto onError;
+			}
+			str = PyString_AS_STRING(res) + respos;
+			ressize = requiredsize;
+		    }
+		    /* check if there is anything unencodable in the replacement
+		       and copy it to the output */
+		    for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
+			c = *uni2;
+			if (c >= limit) {
+			    PyCodec_RaiseUnicodeEncodeError(encoding, startp, unicodepos, unicodepos+1, reason);
+			    Py_DECREF(repunicode);
+			    goto onError;
+			}
+			*str = (char)c;
+		    }
+		    p = startp + newpos;
+		    Py_DECREF(repunicode);
+	    }
 	}
-	else
-            *s++ = (char)ch;
     }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	_PyString_Resize(&repr, s - start);
-    return repr;
+    /* Resize if we allocated to much */
+    respos = str-PyString_AS_STRING(res);
+    if (respos<ressize)
+       /* If this falls res will be NULL */
+	_PyString_Resize(&res, respos);
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
+    return res;
 
- onError:
-    Py_DECREF(repr);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
     return NULL;
 }
 
+PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+				 int size,
+				 const char *errors)
+{
+    return unicode_encode_ucs1(p, size, errors, 256);
+}
+
 PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
 {
     if (!PyUnicode_Check(unicode)) {
@@ -2098,42 +2375,19 @@
 
 /* --- 7-bit ASCII Codec -------------------------------------------------- */
 
-static
-int ascii_decoding_error(const char **source,
-			 Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "ASCII decoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "ASCII decoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeASCII(const char *s,
 				int size,
 				const char *errors)
 {
+    const char *starts = s;
     PyUnicodeObject *v;
     Py_UNICODE *p;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    const char *e;
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
     
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
     if (size == 1 && *(unsigned char*)s < 128) {
@@ -2147,89 +2401,44 @@
     if (size == 0)
 	return (PyObject *)v;
     p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	register unsigned char c;
-
-	c = (unsigned char)*s++;
-	if (c < 128)
+    e = s + size;
+    while (s < e) {
+	register unsigned char c = (unsigned char)*s;
+	if (c < 128) {
 	    *p++ = c;
-	else if (ascii_decoding_error(&s, &p, errors, 
-				      "ordinal not in range(128)"))
+	    ++s;
+	}
+	else {
+	    startinpos = s-starts;
+	    endinpos = startinpos + 1;
+	    outpos = p-PyUnicode_AS_UNICODE(v);
+	    if (unicode_decode_call_errorhandler(
+		 errors, &errorHandler,
+		 "ascii", "ordinal not in range(128)",
+		 starts, size, &startinpos, &endinpos, &inputObject, &s,
+		 (PyObject **)&v, &outpos, &p))
 		goto onError;
+	}
     }
     if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
 	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)v;
     
  onError:
     Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return NULL;
 }
 
-static
-int ascii_encoding_error(const Py_UNICODE **source,
-			 char **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "ASCII encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "ASCII encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
 				int size,
 				const char *errors)
 {
-    PyObject *repr;
-    char *s, *start;
-
-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-	return repr;
-
-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 128) {
-	    if (ascii_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(128)"))
-		goto onError;
-	}
-	else
-            *s++ = (char)ch;
-    }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	_PyString_Resize(&repr, s - start);
-    return repr;
-
- onError:
-    Py_DECREF(repr);
-    return NULL;
+    return unicode_encode_ucs1(p, size, errors, 128);
 }
 
 PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
@@ -2309,44 +2518,21 @@
 
 /* --- Character Mapping Codec -------------------------------------------- */
 
-static
-int charmap_decoding_error(const char **source,
-			 Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "charmap decoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "charmap decoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeCharmap(const char *s,
 				  int size,
 				  PyObject *mapping,
 				  const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    const char *e;
     PyUnicodeObject *v;
     Py_UNICODE *p;
     int extrachars = 0;
+    PyObject *errorHandler = NULL;
+    PyObject *inputObject = NULL;
     
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -2358,8 +2544,9 @@
     if (size == 0)
 	return (PyObject *)v;
     p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	unsigned char ch = *s++;
+    e = s + size;
+    while (s < e) {
+	unsigned char ch = *s;
 	PyObject *w, *x;
 
 	/* Get mapping (char ordinal -> integer, Unicode char or None) */
@@ -2391,11 +2578,18 @@
 	}
 	else if (x == Py_None) {
 	    /* undefined mapping */
-	    if (charmap_decoding_error(&s, &p, errors, 
-				       "character maps to <undefined>")) {
+	    outpos = p-PyUnicode_AS_UNICODE(v);
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
+	    if (unicode_decode_call_errorhandler(
+		 errors, &errorHandler,
+		 "charmap", "character maps to <undefined>",
+		 starts, size, &startinpos, &endinpos, &inputObject, &s,
+		 (PyObject **)&v, &outpos, &p)) {
 		Py_DECREF(x);
 		goto onError;
 	    }
+	    continue;
 	}
 	else if (PyUnicode_Check(x)) {
 	    int targetsize = PyUnicode_GET_SIZE(x);
@@ -2435,45 +2629,229 @@
 	    goto onError;
 	}
 	Py_DECREF(x);
+	++s;
     }
     if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
 	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     return (PyObject *)v;
     
  onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(inputObject);
     Py_XDECREF(v);
     return NULL;
 }
 
-static
-int charmap_encoding_error(const Py_UNICODE **source,
-			   char **dest,
-			   const char *errors,
-			   const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "charmap encoding error: %.400s",
-		     details);
-	return -1;
+/* Lookup the character ch in the mapping. If the character
+   can't be found, Py_None is returned (or NULL, if another
+   error occured). */
+static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
+{
+    PyObject *w = PyInt_FromLong((long)c);
+    PyObject *x;
+
+    if (w == NULL)
+	 return NULL;
+    x = PyObject_GetItem(mapping, w);
+    Py_DECREF(w);
+    if (x == NULL) {
+	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+	    /* No mapping found means: mapping is undefined. */
+	    PyErr_Clear();
+	    x = Py_None;
+	    Py_INCREF(x);
+	    return x;
+	} else
+	    return NULL;
     }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
+    else if (PyInt_Check(x)) {
+	long value = PyInt_AS_LONG(x);
+	if (value < 0 || value > 255) {
+	    PyErr_SetString(PyExc_TypeError,
+			     "character mapping must be in range(256)");
+	    Py_DECREF(x);
+	    return NULL;
+	}
+	return x;
     }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
+    else if (PyString_Check(x))
+	return x;
+    else {
+	/* wrong return value */
+	PyErr_SetString(PyExc_TypeError,
+	      "character mapping must return integer, None or str");
+	Py_DECREF(x);
+	return NULL;
+    }
+}
+
+/* lookup the character, put the result in the output string and adjust
+   various state variables. Return a new reference to the object that
+   was put in the output buffer, or Py_None, if the mapping was undefined
+   (in which case no character was written) or NULL, if a
+   reallocation error ocurred. The called must decref the result */
+static
+PyObject *charmapencode_output(Py_UNICODE c, PyObject *mapping,
+    PyObject **outobj, int *outpos)
+{
+    char *outstart;
+    int outsize;
+    int requiredsize;
+    const char *repchars;
+    char repchar;
+    int repsize;
+    PyObject *rep = charmapencode_lookup(c, mapping);
+
+    if (rep==NULL)
+	return NULL;
+    else if (rep==Py_None)
+	return rep;
+    else if (PyInt_Check(rep)) {
+	repchar = (char)PyInt_AS_LONG(rep);
+	repchars = &repchar;
+	repsize = 1;
     }
     else {
-	PyErr_Format(PyExc_ValueError,
-		     "charmap encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
+	repsize = PyString_GET_SIZE(rep);
+	repchars = PyString_AS_STRING(rep);
+    }
+    outstart = PyString_AS_STRING(*outobj);
+    outsize = PyString_GET_SIZE(*outobj);
+    requiredsize = *outpos+repsize;
+
+    if (outsize<requiredsize) {
+	/* exponentially overallocate to minimize reallocations */
+	if (requiredsize < 2*outsize)
+	    requiredsize = 2*outsize;
+	if (_PyString_Resize(outobj, requiredsize)) {
+	    Py_DECREF(rep);
+	    return NULL;
+	}
+	outstart = PyString_AS_STRING(*outobj);
+    }
+    memcpy(outstart + *outpos, repchars, repsize);
+    *outpos += repsize;
+    return rep;
+}
+
+/* handle an error in PyUnicode_EncodeCharmap
+   Return 0 on success, -1 on error */
+static
+int charmap_encoding_error(
+    const Py_UNICODE *p, int size, int *inpos, PyObject *mapping,
+    PyObject **unicodeObject,
+    int *known_errorHandler, PyObject *errorHandler, const char *errors,
+    PyObject **res, int *respos)
+{
+    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+    int repsize;
+    int newpos;
+    Py_UNICODE *uni2;
+    /* startpos for collecting unencodable chars */
+    int collstartpos = *inpos;
+    int collendpos = *inpos+1;
+    int collpos;
+    char *encoding = "charmap";
+    char *reason = "character maps to <undefined>";
+
+    PyObject *x;
+    /* find all unencodable characters */
+    while (collendpos < size) {
+	x = charmapencode_lookup(p[collendpos], mapping);
+	if (x==NULL)
+	    return -1;
+	else if (x!=Py_None) {
+	    Py_DECREF(x);
+	    break;
+	}
+	Py_DECREF(x);
+	++collendpos;
+    }
+    /* cache callback name lookup
+     * (if not done yet, i.e. it's the first error) */
+    if (*known_errorHandler==-1) {
+	if ((errors==NULL) || (!strcmp(errors, "strict")))
+	    *known_errorHandler = 1;
+	else if (!strcmp(errors, "replace"))
+	    *known_errorHandler = 2;
+	else if (!strcmp(errors, "ignore"))
+	    *known_errorHandler = 3;
+	else if (!strcmp(errors, "xmlcharrefreplace"))
+	    *known_errorHandler = 4;
+	else
+	    *known_errorHandler = 0;
+    }
+    switch (*known_errorHandler) {
+	case 1: /* strict */
+	    PyCodec_RaiseUnicodeEncodeError(encoding, p, collstartpos, collendpos, reason);
+	    return -1;
+	case 2: /* replace */
+	    for (collpos = collstartpos; collpos<collendpos; ++collpos) {
+		x = charmapencode_output('?', mapping, res, respos);
+		if (x==NULL) {
+		    return -1;
+		}
+		else if (x==Py_None) {
+		    Py_DECREF(x);
+		    PyCodec_RaiseUnicodeEncodeError(encoding, p, collstartpos, collendpos, reason);
+		    return -1;
+		}
+		Py_DECREF(x);
+	    }
+	    /* fall through */
+	case 3: /* ignore */
+	    *inpos = collendpos;
+	    break;
+	case 4: /* xmlcharrefreplace */
+	    /* generate replacement (temporarily (mis)uses p) */
+	    for (collpos = collstartpos; collpos < collendpos; ++collpos) {
+		char buffer[2+29+1+1];
+		char *cp;
+		sprintf(buffer, "&#%d;", (int)p[collpos]);
+		for (cp = buffer; *cp; ++cp) {
+		    x = charmapencode_output(*cp, mapping, res, respos);
+		    if (x==NULL)
+			return -1;
+		    else if (x==Py_None) {
+			Py_DECREF(x);
+			PyCodec_RaiseUnicodeEncodeError(encoding, p, collstartpos, collendpos, reason);
+			return -1;
+		    }
+		    Py_DECREF(x);
+		}
+	    }
+	    *inpos = collendpos;
+	    break;
+	default:
+	    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+		encoding, reason, p, size, unicodeObject,
+		collstartpos, collendpos, &newpos);
+	    if (repunicode == NULL)
+		return -1;
+	    /* generate replacement  */
+	    repsize = PyUnicode_GET_SIZE(repunicode);
+	    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+		x = charmapencode_output(*uni2, mapping, res, respos);
+		if (x==NULL) {
+		    Py_DECREF(repunicode);
+		    return -1;
+		}
+		else if (x==Py_None) {
+		    Py_DECREF(repunicode);
+		    Py_DECREF(x);
+		    PyCodec_RaiseUnicodeEncodeError(encoding, p,
+			collstartpos, collendpos, reason);
+		    return -1;
+		}
+		Py_DECREF(x);
+	    }
+	    *inpos = newpos;
+	    Py_DECREF(repunicode);
     }
+    return 0;
 }
 
 PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
@@ -2481,101 +2859,63 @@
 				  PyObject *mapping,
 				  const char *errors)
 {
-    PyObject *v;
-    char *s;
-    int extrachars = 0;
+    /* output object */
+    PyObject *res = NULL;
+    /* object version of input */
+    PyObject *unicodeObject = NULL;
+    /* current input position */
+    int inpos = 0;
+    /* current output position */
+    int respos = 0;
+    PyObject *errorHandler = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
+     * 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
 
     /* Default to Latin-1 */
     if (mapping == NULL)
 	return PyUnicode_EncodeLatin1(p, size, errors);
 
-    v = PyString_FromStringAndSize(NULL, size);
-    if (v == NULL)
-        return NULL;
+    /* allocate enough for a simple encoding without
+       replacements, if we need more, we'll resize */
+    res = PyString_FromStringAndSize(NULL, size);
+    if (res == NULL)
+        goto onError;
     if (size == 0)
-	return v;
-    s = PyString_AS_STRING(v);
-    while (size-- > 0) {
-	Py_UNICODE ch = *p++;
-	PyObject *w, *x;
+	return res;
 
-	/* Get mapping (Unicode ordinal -> string char, integer or None) */
-	w = PyInt_FromLong((long)ch);
-	if (w == NULL)
+    while (inpos<size) {
+	/* try to encode it */
+	PyObject *x = charmapencode_output(p[inpos], mapping, &res, &respos);
+	if (x==NULL) /* error */
 	    goto onError;
-	x = PyObject_GetItem(mapping, w);
-	Py_DECREF(w);
-	if (x == NULL) {
-	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found means: mapping is undefined. */
-		PyErr_Clear();
-		x = Py_None;
-		Py_INCREF(x);
-	    } else
+	if (x==Py_None) { /* unencodable character */
+	    if (charmap_encoding_error(p, size, &inpos, mapping,
+		&unicodeObject,
+		&known_errorHandler, errorHandler, errors,
+		&res, &respos))
 		goto onError;
 	}
+	else
+	    /* done with this character => adjust input position */
+	    ++inpos;
+	Py_DECREF(x);
+    }
 
-	/* Apply mapping */
-	if (PyInt_Check(x)) {
-	    long value = PyInt_AS_LONG(x);
-	    if (value < 0 || value > 255) {
-		PyErr_SetString(PyExc_TypeError,
-				"character mapping must be in range(256)");
-		Py_DECREF(x);
-		goto onError;
-	    }
-	    *s++ = (char)value;
-	}
-	else if (x == Py_None) {
-	    /* undefined mapping */
-	    if (charmap_encoding_error(&p, &s, errors, 
-				       "character maps to <undefined>")) {
-		Py_DECREF(x);
-		goto onError;
-	    }
-	}
-	else if (PyString_Check(x)) {
-	    int targetsize = PyString_GET_SIZE(x);
-
-	    if (targetsize == 1)
-		/* 1-1 mapping */
-		*s++ = *PyString_AS_STRING(x);
-
-	    else if (targetsize > 1) {
-		/* 1-n mapping */
-		if (targetsize > extrachars) {
-		    /* resize first */
-		    int oldpos = (int)(s - PyString_AS_STRING(v));
-		    int needed = (targetsize - extrachars) + \
-			         (targetsize << 2);
-		    extrachars += needed;
-		    if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) {
-			Py_DECREF(x);
-			goto onError;
-		    }
-		    s = PyString_AS_STRING(v) + oldpos;
-		}
-		memcpy(s, PyString_AS_STRING(x), targetsize);
-		s += targetsize;
-		extrachars -= targetsize;
-	    }
-	    /* 1-0 mapping: skip the character */
-	}
-	else {
-	    /* wrong return value */
-	    PyErr_SetString(PyExc_TypeError,
-		  "character mapping must return integer, None or unicode");
-	    Py_DECREF(x);
+    /* Resize if we allocated to much */
+    if (respos<PyString_GET_SIZE(res)) {
+	if (_PyString_Resize(&res, respos))
 	    goto onError;
-	}
-	Py_DECREF(x);
     }
-    if (s - PyString_AS_STRING(v) < PyString_GET_SIZE(v))
-	_PyString_Resize(&v, (int)(s - PyString_AS_STRING(v)));
-    return v;
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
+    return res;
 
- onError:
-    Py_XDECREF(v);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
     return NULL;
 }
 
@@ -2592,115 +2932,260 @@
 				   NULL);
 }
 
+/* Lookup the character ch in the mapping and put the result in result,
+   which must be decrefed by the caller.
+   Return 0 on success, -1 on error */
 static
-int translate_error(const Py_UNICODE **source,
-		    Py_UNICODE **dest,
-		    const char *errors,
-		    const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "translate error: %.400s",
-		     details);
-	return -1;
+int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
+{
+    PyObject *w = PyInt_FromLong((long)c);
+    PyObject *x;
+
+    if (w == NULL)
+	 return -1;
+    x = PyObject_GetItem(mapping, w);
+    Py_DECREF(w);
+    if (x == NULL) {
+	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+	    /* No mapping found means: use 1:1 mapping. */
+	    PyErr_Clear();
+	    *result = NULL;
+	    return 0;
+	} else
+	    return -1;
     }
-    else if (strcmp(errors,"ignore") == 0) {
+    else if (x == Py_None) {
+	*result = x;
 	return 0;
     }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
+    else if (PyInt_Check(x)) {
+	long value = PyInt_AS_LONG(x);
+	long max = PyUnicode_GetMax();
+	if (value < 0 || value > max) {
+	    PyErr_Format(PyExc_TypeError,
+			     "character mapping must be in range(0x%lx)", max+1);
+	    Py_DECREF(x);
+	    return -1;
+	}
+	*result = x;
+	return 0;
+    }
+    else if (PyUnicode_Check(x)) {
+	*result = x;
 	return 0;
     }
     else {
-	PyErr_Format(PyExc_ValueError,
-		     "translate error; "
-		     "unknown error handling code: %.400s",
-		     errors);
+	/* wrong return value */
+	PyErr_SetString(PyExc_TypeError,
+	      "character mapping must return integer, None or unicode");
 	return -1;
     }
 }
+/* ensure that *outobj is at least requiredsize characters long,
+if not reallocate and adjust various state variables.
+Return 0 on success, -1 on error */
+static
+int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
+    int requiredsize)
+{
+    if (requiredsize > *outsize) {
+	/* remember old output position */
+	int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
+	/* exponentially overallocate to minimize reallocations */
+	if (requiredsize < 2 * *outsize)
+	    requiredsize = 2 * *outsize;
+	if (_PyUnicode_Resize(outobj, requiredsize))
+	    return -1;
+	*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
+	*outsize = requiredsize;
+    }
+    return 0;
+}
+/* lookup the character, put the result in the output string and adjust
+   various state variables. Return a new reference to the object that
+   was put in the output buffer in *result, or Py_None, if the mapping was
+   undefined (in which case no character was written).
+   The called must decref result.
+   Return 0 on success, -1 on error. */
+static
+int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
+    PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
+{
+    if (charmaptranslate_lookup(c, mapping, res))
+	return -1;
+    if (*res==NULL) {
+	/* not found => default to 1:1 mapping */
+	*(*outp)++ = (Py_UNICODE)c;
+    }
+    else if (*res==Py_None)
+	;
+    else if (PyInt_Check(*res)) {
+	/* no overflow check, because we know that the space is enough */
+	*(*outp)++ = (Py_UNICODE)PyInt_AS_LONG(*res);
+    }
+    else if (PyUnicode_Check(*res)) {
+	int repsize = PyUnicode_GET_SIZE(*res);
+	if (repsize==1) {
+	    /* no overflow check, because we know that the space is enough */
+	    *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
+	}
+	else if (repsize!=0) {
+	    /* more than one character */
+	    int requiredsize = *outsize + repsize - 1;
+	    if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
+		return -1;
+	    memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
+	    *outp += repsize;
+	}
+    }
+    else
+	return -1;
+    return 0;
+}
 
-PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s,
+PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
 				     int size,
 				     PyObject *mapping,
 				     const char *errors)
 {
-    PyUnicodeObject *v;
-    Py_UNICODE *p;
-    
+    /* output object */
+    PyObject *res = NULL;
+    /* object version of input */
+    PyObject *unicodeObject = NULL;
+    /* pointers to the beginning and end+1 of input */
+    const Py_UNICODE *startp = p;
+    const Py_UNICODE *endp = p + size;
+    /* pointer into the output */
+    Py_UNICODE *str;
+    /* current output position */
+    int respos = 0;
+    int ressize;
+    char *encoding = "charmap";
+    char *reason = "character maps to <undefined>";
+    PyObject *errorHandler = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
+     * 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
+
     if (mapping == NULL) {
 	PyErr_BadArgument();
 	return NULL;
     }
-    
-    /* Output will never be longer than input */
-    v = _PyUnicode_New(size);
-    if (v == NULL)
-	goto onError;
-    if (size == 0)
-	goto done;
-    p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	Py_UNICODE ch = *s++;
-	PyObject *w, *x;
 
-	/* Get mapping */
-	w = PyInt_FromLong(ch);
-	if (w == NULL)
-	    goto onError;
-	x = PyObject_GetItem(mapping, w);
-	Py_DECREF(w);
-	if (x == NULL) {
-	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found: default to 1-1 mapping */
-		PyErr_Clear();
-		*p++ = ch;
-		continue;
-	    }
+    /* allocate enough for a simple 1:1 translation without
+       replacements, if we need more, we'll resize */
+    res = PyUnicode_FromUnicode(NULL, size);
+    if (res == NULL)
+        goto onError;
+    if (size == 0)
+	return res;
+    str = PyUnicode_AS_UNICODE(res);
+    ressize = size;
+
+    while (p<endp) {
+	/* try to encode it */
+	PyObject *x = NULL;
+	if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
+	    Py_XDECREF(x);
 	    goto onError;
 	}
-
-	/* Apply mapping */
-	if (PyInt_Check(x))
-	    *p++ = (Py_UNICODE)PyInt_AS_LONG(x);
-	else if (x == Py_None) {
-	    /* undefined mapping */
-	    if (translate_error(&s, &p, errors, 
-				"character maps to <undefined>")) {
-		Py_DECREF(x);
-		goto onError;
-	    }
-	}
-	else if (PyUnicode_Check(x)) {
-	    if (PyUnicode_GET_SIZE(x) != 1) {
-		/* 1-n mapping */
-		PyErr_SetString(PyExc_NotImplementedError,
-				"1-n mappings are currently not implemented");
-		Py_DECREF(x);
-		goto onError;
+	if (x!=Py_None) /* it worked => adjust input pointer */
+	    ++p;
+	else { /* untranslatable character */
+	    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+	    int repsize;
+	    int newpos;
+	    Py_UNICODE *uni2;
+	    /* startpos for collecting untranslatable chars */
+	    const Py_UNICODE *collstart = p;
+	    const Py_UNICODE *collend = p+1;
+	    const Py_UNICODE *coll;
+
+	    Py_XDECREF(x);
+	    /* find all untranslatable characters */
+	    while (collend < endp) {
+	    	if (charmaptranslate_lookup(*collend, mapping, &x))
+		    goto onError;
+		Py_XDECREF(x);
+		if (x!=Py_None)
+		    break;
+		++collend;
+	    }
+	    /* cache callback name lookup
+	     * (if not done yet, i.e. it's the first error) */
+	    if (known_errorHandler==-1) {
+		if ((errors==NULL) || (!strcmp(errors, "strict")))
+		    known_errorHandler = 1;
+		else if (!strcmp(errors, "replace"))
+		    known_errorHandler = 2;
+		else if (!strcmp(errors, "ignore"))
+		    known_errorHandler = 3;
+		else if (!strcmp(errors, "xmlcharrefreplace"))
+		    known_errorHandler = 4;
+		else
+		    known_errorHandler = 0;
+	    }
+	    switch (known_errorHandler) {
+		case 1: /* strict */
+		    PyCodec_RaiseUnicodeEncodeError(encoding, startp, collstart-startp, collend-startp, reason);
+		    goto onError;
+		case 2: /* replace */
+		    /* No need to check for space, this is a 1:1 replacement */
+		    for (coll = collstart; coll<collend; ++coll)
+			*str++ = '?';
+		    /* fall through */
+		case 3: /* ignore */
+		    p = collend;
+		    break;
+		case 4: /* xmlcharrefreplace */
+		    /* generate replacement (temporarily (mis)uses p) */
+		    for (p = collstart; p < collend; ++p) {
+			char buffer[2+29+1+1];
+			char *cp;
+			sprintf(buffer, "&#%d;", (int)*p);
+			if (charmaptranslate_makespace(&res, &str, &ressize,
+			    (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
+			    goto onError;
+			for (cp = buffer; *cp; ++cp)
+			    *str++ = *cp;
+		    }
+		    p = collend;
+		    break;
+		default:
+		    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+			encoding, reason, startp, size, &unicodeObject,
+			collstart-startp, collend-startp, &newpos);
+		    if (repunicode == NULL)
+			goto onError;
+		    /* generate replacement  */
+		    repsize = PyUnicode_GET_SIZE(repunicode);
+		    if (charmaptranslate_makespace(&res, &str, &ressize,
+			(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
+			Py_DECREF(repunicode);
+			goto onError;
+		    }
+		    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
+			*str++ = *uni2;
+		    p = startp + newpos;
+		    Py_DECREF(repunicode);
 	    }
-	    *p++ = *PyUnicode_AS_UNICODE(x);
 	}
-	else {
-	    /* wrong return value */
-	    PyErr_SetString(PyExc_TypeError,
-		  "translate mapping must return integer, None or unicode");
-	    Py_DECREF(x);
-	    goto onError;
-	}
-	Py_DECREF(x);
     }
-    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
-	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
+    /* Resize if we allocated to much */
+    respos = str-PyUnicode_AS_UNICODE(res);
+    if (respos<ressize) {
+	if (_PyUnicode_Resize(&res, respos))
 	    goto onError;
+    }
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
+    return res;
 
- done:
-    return (PyObject *)v;
-    
- onError:
-    Py_XDECREF(v);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
     return NULL;
 }
 
@@ -2733,6 +3218,14 @@
 			    const char *errors)
 {
     Py_UNICODE *p, *end;
+    /* object version of input */
+    PyObject *unicodeObject = NULL;
+    PyObject *errorHandler = NULL;
+    const char *encoding = "decimal";
+    const char *reason = "invalid decimal Unicode string";
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
 
     if (output == NULL) {
 	PyErr_BadArgument();
@@ -2742,40 +3235,110 @@
     p = s;
     end = s + length;
     while (p < end) {
-	register Py_UNICODE ch = *p++;
+	register Py_UNICODE ch = *p;
 	int decimal;
+	PyObject *repunicode;
+	int repsize;
+	int newpos;
+	Py_UNICODE *uni2;
+	Py_UNICODE *collstart;
+	Py_UNICODE *collend;
 	
 	if (Py_UNICODE_ISSPACE(ch)) {
 	    *output++ = ' ';
+	    ++p;
 	    continue;
 	}
 	decimal = Py_UNICODE_TODECIMAL(ch);
 	if (decimal >= 0) {
 	    *output++ = '0' + decimal;
+	    ++p;
 	    continue;
 	}
 	if (0 < ch && ch < 256) {
 	    *output++ = (char)ch;
+	    ++p;
 	    continue;
 	}
-	/* All other characters are considered invalid */
-	if (errors == NULL || strcmp(errors, "strict") == 0) {
-	    PyErr_SetString(PyExc_ValueError,
-			    "invalid decimal Unicode string");
-	    goto onError;
+	/* All other characters are considered unencodable */
+	collstart = p;
+	collend = p+1;
+	while (collend < end) {
+	    if ((0 < *collend && *collend < 256) ||
+	        !Py_UNICODE_ISSPACE(*collend) ||
+	        Py_UNICODE_TODECIMAL(*collend))
+		break;
 	}
-	else if (strcmp(errors, "ignore") == 0)
-	    continue;
-	else if (strcmp(errors, "replace") == 0) {
-	    *output++ = '?';
-	    continue;
+	/* cache callback name lookup
+	 * (if not done yet, i.e. it's the first error) */
+	if (known_errorHandler==-1) {
+	    if ((errors==NULL) || (!strcmp(errors, "strict")))
+		known_errorHandler = 1;
+	    else if (!strcmp(errors, "replace"))
+		known_errorHandler = 2;
+	    else if (!strcmp(errors, "ignore"))
+		known_errorHandler = 3;
+	    else if (!strcmp(errors, "xmlcharrefreplace"))
+		known_errorHandler = 4;
+	    else
+		known_errorHandler = 0;
+	}
+	switch (known_errorHandler) {
+	    case 1: /* strict */
+		PyCodec_RaiseUnicodeEncodeError(encoding, s, collstart-s, collend-s, reason);
+		goto onError;
+	    case 2: /* replace */
+		for (p = collstart; p < collend; ++p)
+		    *output++ = '?';
+		/* fall through */
+	    case 3: /* ignore */
+		p = collend;
+		break;
+	    case 4: /* xmlcharrefreplace */
+		/* generate replacement (temporarily (mis)uses p) */
+		for (p = collstart; p < collend; ++p)
+		    output += sprintf(output, "&#%d;", (int)*p);
+		p = collend;
+		break;
+	    default:
+		repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+		    encoding, reason, s, length, &unicodeObject,
+		    collstart-s, collend-s, &newpos);
+		if (repunicode == NULL)
+		    goto onError;
+		/* generate replacement  */
+		repsize = PyUnicode_GET_SIZE(repunicode);
+		for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+		    Py_UNICODE ch = *uni2;
+		    if (Py_UNICODE_ISSPACE(ch))
+			*output++ = ' ';
+		    else {
+			decimal = Py_UNICODE_TODECIMAL(ch);
+			if (decimal >= 0)
+			    *output++ = '0' + decimal;
+			else if (0 < ch && ch < 256)
+			    *output++ = (char)ch;
+			else {
+			    Py_DECREF(repunicode);
+			    PyCodec_RaiseUnicodeEncodeError(encoding, s,
+				collstart-s, collend-s, reason);
+			    goto onError;
+			}
+		    }
+		}
+		p = s + newpos;
+		Py_DECREF(repunicode);
 	}
     }
     /* 0-terminate the output string */
     *output++ = '\0';
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
     return 0;
 
  onError:
+    Py_XDECREF(unicodeObject);
+    Py_XDECREF(errorHandler);
     return -1;
 }
 
@@ -3865,7 +4428,9 @@
 Return an encoded string version of S. Default encoding is the current\n\
 default string encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.";
+a ValueError. Other possible values are 'ignore', 'replace' and\n\
+'xmlcharrefreplace' as well as any other name registered with\n\
+codecs.register_unicodeencodeerrorhandler.";
 
 static PyObject *
 unicode_encode(PyUnicodeObject *self, PyObject *args)
Index: Python/codecs.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/codecs.c,v
retrieving revision 2.13
diff -u -r2.13 codecs.c
--- Python/codecs.c	26 Sep 2000 05:46:01 -0000	2.13
+++ Python/codecs.c	29 May 2002 20:44:05 -0000
@@ -416,12 +416,487 @@
     return NULL;
 }
 
+static PyObject *_PyCodec_UnicodeEncodeErrorHandlerRegistry;
+
+/* Register the error handling callback function error under the name name
+   this function will be called by the encode when it encounters
+   an unencodable character and doesn't know the callback name,
+   as name is specified as the error parameter in the call to the encode function.
+   Return 0 on success, -1 on error */
+int PyCodec_RegisterUnicodeEncodeErrorHandler(const char *name, PyObject *error)
+{
+    if (!PyCallable_Check(error)) {
+	PyErr_SetString(PyExc_TypeError,
+			"handler must be callable");
+	return -1;
+    }
+    return PyDict_SetItemString(
+	_PyCodec_UnicodeEncodeErrorHandlerRegistry, (char *)name, error);
+}
+
+/* Lookup the error handling callback function registered under the name name
+   As a special cases NULL can be passed which means "strict". */
+PyObject *PyCodec_LookupUnicodeEncodeErrorHandler(const char *name)
+{
+    PyObject *handler = NULL;
+
+    if (name==NULL)
+	name = "strict";
+    handler = PyDict_GetItemString(
+	_PyCodec_UnicodeEncodeErrorHandlerRegistry, (char *)name);
+    if (!handler)
+	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
+    else
+	Py_INCREF(handler);
+    return handler;
+}
+
+void PyCodec_RaiseUnicodeEncodeError(
+    const char *encoding, const Py_UNICODE *str, int startpos, int endpos,
+    const char *reason)
+{
+    if (endpos==startpos+1) {
+	PyErr_Format(PyExc_UnicodeError,
+	    "'%.400s' codec can't encode character '\\u%x' in position %d: %.400s",
+	    encoding, (int)str[startpos], startpos, reason);
+    }
+    else {
+	PyErr_Format(PyExc_UnicodeError,
+	    "'%.400s' codec can't encode characters in position %d-%d: %.400s",
+	    encoding, startpos, endpos-1, reason);
+    }
+}
+
+PyObject *PyCodec_RaiseUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    char *encoding;
+    Py_UNICODE *unicode;
+    int size;
+    int startpos;
+    int endpos;
+    const char *reason;
+    PyObject *state;
+
+    if (PyArg_ParseTuple(args, "su#iisO:raise_unicodeencode_errors",
+	&encoding, &unicode, &size, &startpos, &endpos, &reason, &state))
+	PyCodec_RaiseUnicodeEncodeError(encoding, unicode, startpos, endpos, reason);
+    return NULL;
+}
+
+
+PyObject *PyCodec_IgnoreUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    PyObject *unicode;
+    int startpos;
+    int endpos;
+    PyObject *reason;
+    PyObject *state;
+
+    if (!PyArg_ParseTuple(args, "OOiiOO:ignore_unicodeencode_errors",
+	&encoding, &unicode, &startpos, &endpos, &reason, &state))
+	return NULL;
+    /* ouch: passing NULL, 0, pos gives None instead of u'' */
+    return Py_BuildValue("(u#i)", &endpos, 0, endpos);
+}
+
+
+PyObject *PyCodec_ReplaceUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    PyObject *unicode;
+    int startpos;
+    int endpos;
+    PyObject *reason;
+    PyObject *state;
+    PyObject *res;
+    PyObject *restuple;
+    Py_UNICODE *p;
+    Py_UNICODE *end;
+
+    if (!PyArg_ParseTuple(args, "OOiiOO:replace_unicodeencode_errors",
+	&encoding, &unicode, &startpos, &endpos, &reason, &state))
+	return NULL;
+
+    res = PyUnicode_FromUnicode(NULL, endpos-startpos);
+    if (res == NULL)
+	return NULL;
+    for (p = PyUnicode_AS_UNICODE(res), end = p + PyUnicode_GET_SIZE(res);
+	p<end; ++p)
+	*p = '?';
+    restuple = Py_BuildValue("(Oi)", res, endpos);
+    if (restuple == NULL) {
+	Py_DECREF(res);
+    }
+    return restuple;
+}
+
+PyObject *PyCodec_XMLCharRefReplaceUnicodeEncodeErrors(
+    PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    Py_UNICODE *unicode;
+    int size;
+    int startpos;
+    int endpos;
+    PyObject *reason;
+    PyObject *state;
+    PyObject *res;
+    int ressize;
+    PyObject *restuple;
+    Py_UNICODE *inp;
+    Py_UNICODE *outp;
+
+    if (!PyArg_ParseTuple(args, "Ou#iiOO:xmlcharrefreplace_unicodeencode_errors",
+	&encoding, &unicode, &size, &startpos, &endpos, &reason, &state))
+	return NULL;
+
+    /* calculate replacement size */
+    for (inp = unicode+startpos, ressize = 0; inp < unicode+endpos; ++inp) {
+	if (*inp<10)
+	    ressize += 2+1+1;
+	else if (*inp<100)
+	    ressize += 2+2+1;
+	else if (*inp<1000)
+	    ressize += 2+3+1;
+	else if (*inp<10000)
+	    ressize += 2+4+1;
+	else if (*inp<100000)
+	    ressize += 2+5+1;
+	else if (*inp<1000000)
+	    ressize += 2+6+1;
+	else
+	    ressize += 2+7+1;
+    }
+    /* allocate replacement */
+    res = PyUnicode_FromUnicode(NULL, ressize);
+    if (res == NULL)
+	return NULL;
+    /* generate replacement */
+    for (inp = unicode+startpos, outp = PyUnicode_AS_UNICODE(res);
+	inp < unicode+endpos; ++inp) {
+	Py_UNICODE c = *inp;
+	int digits;
+	int base;
+	*outp++ = '&';
+	*outp++ = '#';
+	if (*inp<10) {
+	    digits = 1;
+	    base = 1;
+	}
+	else if (*inp<100) {
+	    digits = 2;
+	    base = 10;
+	}
+	else if (*inp<1000) {
+	    digits = 3;
+	    base = 100;
+	}
+	else if (*inp<10000) {
+	    digits = 4;
+	    base = 1000;
+	}
+	else if (*inp<100000) {
+	    digits = 5;
+	    base = 10000;
+	}
+	else if (*inp<1000000) {
+	    digits = 6;
+	    base = 100000;
+	}
+	else {
+	    digits = 7;
+	    base = 1000000;
+	}
+	while (digits-->0) {
+	    *outp++ = '0' + c/base;
+	    c %= base;
+	    base /= 10;
+	}
+	*outp++ = ';';
+    }
+
+     restuple = Py_BuildValue("(Oi)", res, endpos);
+     if (restuple==NULL) {
+	Py_DECREF(res);
+	return NULL;
+     }
+     return restuple;
+}
+
+static Py_UNICODE hexdigits[] = {
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+};
+
+PyObject *PyCodec_BackslashReplaceUnicodeEncodeErrors(
+    PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    Py_UNICODE *unicode;
+    int size;
+    int startpos;
+    int endpos;
+    PyObject *reason;
+    PyObject *state;
+    Py_UNICODE *inp;
+    Py_UNICODE *outp;
+    PyObject *res;
+    PyObject *restuple;
+    int ressize;
+
+    if (!PyArg_ParseTuple(args, "Ou#iiOO:backslashreplace_unicodeencode_errors",
+	&encoding, &unicode, &size, &startpos, &endpos, &reason, &state))
+	return NULL;
+
+    for (inp = unicode+startpos, ressize = 0; inp < unicode+endpos; ++inp) {
+	if (*inp >= 0x00010000)
+	    ressize += 1+1+8;
+	else if (*inp >= 0x100) {
+	    ressize += 1+1+4;
+	}
+	else
+	    ressize += 1+1+2;
+    }
+    res = PyUnicode_FromUnicode(NULL, ressize);
+    if (res==NULL)
+	return NULL;
+    for (inp = unicode+startpos, outp = PyUnicode_AS_UNICODE(res);
+	inp < unicode+endpos; ++inp) {
+	Py_UNICODE c = *inp;
+	*outp++ = '\\';
+	if (c >= 0x00010000) {
+	    *outp++ = 'U';
+	    *outp++ = hexdigits[(c>>28)&0xf];
+	    *outp++ = hexdigits[(c>>24)&0xf];
+	    *outp++ = hexdigits[(c>>20)&0xf];
+	    *outp++ = hexdigits[(c>>16)&0xf];
+	    *outp++ = hexdigits[(c>>12)&0xf];
+	    *outp++ = hexdigits[(c>>8)&0xf];
+	}
+	else if (c >= 0x100) {
+	    *outp++ = 'u';
+	    *outp++ = hexdigits[(c>>12)&0xf];
+	    *outp++ = hexdigits[(c>>8)&0xf];
+	}
+	else
+	    *outp++ = 'x';
+	*outp++ = hexdigits[(c>>4)&0xf];
+	*outp++ = hexdigits[c&0xf];
+    }
+
+    restuple = Py_BuildValue("(Oi)", res, endpos);
+    if (restuple==NULL) {
+	Py_DECREF(res);
+	return NULL;
+    }
+    return restuple;
+}
+
+static PyObject *_PyCodec_UnicodeDecodeErrorHandlerRegistry;
+
+int PyCodec_RegisterUnicodeDecodeErrorHandler(const char *name, PyObject *error)
+{
+    if (!PyCallable_Check(error)) {
+	PyErr_SetString(PyExc_TypeError, "handler must be callable");
+	return -1;
+    }
+    return PyDict_SetItemString(
+	_PyCodec_UnicodeDecodeErrorHandlerRegistry, (char *)name, error);
+}
+
+PyObject *PyCodec_LookupUnicodeDecodeErrorHandler(const char *name)
+{
+    PyObject *handler = NULL;
+
+    if (name==NULL)
+	name = "strict";
+    handler = PyDict_GetItemString(
+	_PyCodec_UnicodeDecodeErrorHandlerRegistry, (char *)name);
+    if (!handler)
+	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
+    else
+	Py_INCREF(handler);
+    return handler;
+}
+
+void PyCodec_RaiseUnicodeDecodeError(
+    const char *encoding, const char *str, int startpos, int endpos,
+    const char *reason)
+{
+    if (endpos==startpos+1) {
+	PyErr_Format(PyExc_UnicodeError,
+	    "'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
+	    encoding, ((int)str[startpos])&0xff, startpos, reason);
+    }
+    else {
+	PyErr_Format(PyExc_UnicodeError,
+	    "'%.400s' codec can't decode bytes in position %d-%d: %.400s",
+	    encoding, startpos, endpos-1, reason);
+    }
+}
+
+
+PyObject *PyCodec_RaiseUnicodeDecodeErrors(PyObject *self, PyObject *args)
+{
+    char *encoding;
+    char *str;
+    int size;
+    int startpos;
+    int endpos;
+    const char *reason;
+    PyObject *state;
+
+    if (PyArg_ParseTuple(args, "ss#iisO:raise_unicodedecode_errors",
+	&encoding, &str, &size, &startpos, &endpos, &reason, &state))
+	PyCodec_RaiseUnicodeDecodeError(encoding, str, startpos, endpos, reason);
+    return NULL;
+}
+
+
+PyObject *PyCodec_IgnoreUnicodeDecodeErrors(PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    PyObject *str;
+    int startpos;
+    int endpos;
+    PyObject *reason;
+    PyObject *state;
+
+    if (!PyArg_ParseTuple(args, "OOiiOO:ignore_unicodedecode_errors",
+	&encoding, &str, &startpos, &endpos, &reason, &state))
+	return NULL;
+    /* ouch: passing NULL, 0, pos gives None instead of u'' */
+    return Py_BuildValue("(u#i)", &endpos, 0, endpos);
+}
+
+
+PyObject *PyCodec_ReplaceUnicodeDecodeErrors(PyObject *self, PyObject *args)
+{
+    PyObject *encoding;
+    PyObject *str;
+    int startpos;
+	 int endpos;
+    PyObject *reason;
+    PyObject *state;
+    Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
+
+    if (!PyArg_ParseTuple(args, "OOiiOO:replace_unicodedecode_errors",
+	&encoding, &str, &startpos, &endpos, &reason, &state))
+	return NULL;
+    return Py_BuildValue("(u#i)", &res, 1, endpos);
+}
+
 void _PyCodecRegistry_Init(void)
 {
+    static struct {
+	char *name;
+	PyMethodDef def;
+    } methods[] =
+    {
+	{
+	    "strict",
+	    {
+		"raise_unicodeencode_errors",
+		PyCodec_RaiseUnicodeEncodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "ignore",
+	    {
+		"ignore_unicodeencode_errors",
+		PyCodec_IgnoreUnicodeEncodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "replace",
+	    {
+		"replace_unicodeencode_errors",
+		PyCodec_ReplaceUnicodeEncodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "xmlcharrefreplace",
+	    {
+		"xmlcharrefreplace_unicodeencode_errors",
+		PyCodec_XMLCharRefReplaceUnicodeEncodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "backslashreplace",
+	    {
+		"backslashreplace_unicodeencode_errors",
+		PyCodec_BackslashReplaceUnicodeEncodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "strict",
+	    {
+		"raise_unicodedecode_errors",
+		PyCodec_RaiseUnicodeDecodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "ignore",
+	    {
+		"ignore_unicodedecode_errors",
+		PyCodec_IgnoreUnicodeDecodeErrors,
+		METH_VARARGS
+	    }
+	},
+	{
+	    "replace",
+	    {
+		"replace_unicodedecode_errors",
+		PyCodec_ReplaceUnicodeDecodeErrors,
+		METH_VARARGS
+	    }
+	}
+    };
     if (_PyCodec_SearchPath == NULL)
 	_PyCodec_SearchPath = PyList_New(0);
     if (_PyCodec_SearchCache == NULL)
 	_PyCodec_SearchCache = PyDict_New();
+    if (_PyCodec_UnicodeEncodeErrorHandlerRegistry == NULL) {
+	int i;
+	_PyCodec_UnicodeEncodeErrorHandlerRegistry = PyDict_New();
+
+	if (_PyCodec_UnicodeEncodeErrorHandlerRegistry) {
+	    for (i = 0; i < 5; ++i) {
+		PyObject *func = PyCFunction_New(&methods[i].def, NULL);
+		int res;
+		if (!func)
+		    Py_FatalError("can't initialize codec registry");
+		res = PyCodec_RegisterUnicodeEncodeErrorHandler(methods[i].name, func);
+		Py_DECREF(func);
+		if (res)
+		    Py_FatalError("can't initialize codec registry");
+	    }
+	}
+    }
+    if (_PyCodec_UnicodeDecodeErrorHandlerRegistry == NULL) {
+	int i;
+	_PyCodec_UnicodeDecodeErrorHandlerRegistry = PyDict_New();
+
+	if (_PyCodec_UnicodeDecodeErrorHandlerRegistry) {
+	    for (i = 5; i < 8; ++i) {
+		PyObject *func = PyCFunction_New(&methods[i].def, NULL);
+		int res;
+		if (!func)
+		    Py_FatalError("can't initialize codec registry");
+		res = PyCodec_RegisterUnicodeDecodeErrorHandler(methods[i].name, func);
+		Py_DECREF(func);
+		if (res)
+		    Py_FatalError("can't initialize codec registry");
+	    }
+	}
+    }
     if (_PyCodec_SearchPath == NULL || 
 	_PyCodec_SearchCache == NULL)
 	Py_FatalError("can't initialize codec registry");
@@ -433,4 +908,8 @@
     _PyCodec_SearchPath = NULL;
     Py_XDECREF(_PyCodec_SearchCache);
     _PyCodec_SearchCache = NULL;
+    Py_XDECREF(_PyCodec_UnicodeEncodeErrorHandlerRegistry);
+    _PyCodec_UnicodeEncodeErrorHandlerRegistry = NULL;
+    Py_XDECREF(_PyCodec_UnicodeDecodeErrorHandlerRegistry);
+    _PyCodec_UnicodeDecodeErrorHandlerRegistry = NULL;
 }