Index: Include/codecs.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/codecs.h,v
retrieving revision 2.3
diff -u -r2.3 codecs.h
--- Include/codecs.h	2000/08/03 16:24:24	2.3
+++ Include/codecs.h	2001/06/13 13:01:05
@@ -53,12 +53,18 @@
 
    object is passed through the encoder function found for the given
    encoding using the error handling method defined by errors. errors
-   may be NULL to use the default method defined for the codec.
+   may be NULL to use the strict encoding.
    
    Raises a LookupError in case no encoder can be found.
 
  */
 
+extern DL_IMPORT(PyObject *) PyCodec_EncodeEx(
+       PyObject *object,
+       const char *encoding,
+       PyObject *errors
+       );
+
 extern DL_IMPORT(PyObject *) PyCodec_Encode(
        PyObject *object,
        const char *encoding,
@@ -111,11 +117,48 @@
 
 /* Get a StreamWriter factory function for the given encoding. */
 
+extern DL_IMPORT(PyObject *) PyCodec_StreamWriterEx(
+       const char *encoding,
+       PyObject *stream,
+       PyObject *errors
+       );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject *) PyCodec_StreamWriter(
        const char *encoding,
        PyObject *stream,
        const char *errors
        );
+
+/* Return a new reference to one of the builtin unicode
+   encode error handlers. error can be:
+
+      * NULL, Py_None, "strict" or u"strict" for
+        codecs.raise_unicodeencode_errors
+      * "ignore" or u"ignore" for codecs.ignore_unicodeencode_errors
+      * "replace" or u"replace" for codecs.replace_unicodeencode_errors
+      * a callable which will be returned directy
+
+      everything else will raise an exception */
+extern DL_IMPORT(PyObject *) PyCodec_UnicodeEncodeHandlerForObject(PyObject *error);
+
+/* Raises a Unicode exception */
+extern DL_IMPORT(void) PyCodec_RaiseUnicodeEncodeError(const char *encoding, Py_UNICODE c, int pos);
+
+/* Encode error handler that raises an exception */
+extern DL_IMPORT(PyObject *) PyCodec_RaiseUnicodeEncodeErrors(PyObject *self, PyObject *args);
+
+/* Encode error handler that returns a empty string and so ignores the
+   unencodable character */
+extern DL_IMPORT(PyObject *) PyCodec_IgnoreUnicodeEncodeErrors(PyObject *self, PyObject *args);
+
+/* Encode error handler that returns an Unicode replacement character
+   that will be used by the codec to replace the unencodable character */
+extern DL_IMPORT(PyObject *) PyCodec_ReplaceUnicodeEncodeErrors(PyObject *self, PyObject *args);
+
+/* Encode error handler that returns an XML character reference for the
+   unencodable character */
+extern DL_IMPORT(PyObject *) PyCodec_XMLCharRefReplaceUnicodeEncodeErrors(PyObject *self, PyObject *args);
 
 #ifdef __cplusplus
 }
Index: Include/unicodeobject.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/unicodeobject.h,v
retrieving revision 2.21
diff -u -r2.21 unicodeobject.h
--- Include/unicodeobject.h	2001/05/21 20:30:15	2.21
+++ Include/unicodeobject.h	2001/06/13 13:01:14
@@ -407,9 +407,15 @@
     const char *errors          /* error handling */
     );
 
-/* Encodes a Py_UNICODE buffer of the given size and returns a 
-   Python string object. */
+/* Encodes a Unicode object and returns a Python string object. */
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeEx(
+    PyObject *unicode,	 	/* Unicode object */
+    const char *encoding,	/* encoding */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_Encode(
     const Py_UNICODE *s,        /* Unicode char buffer */
     int size,                   /* number of Py_UNICODE chars to encode */
@@ -417,9 +423,12 @@
     const char *errors          /* error handling */
     );
 
+#define PyUnicode_AsEncodedStringEx PyUnicode_EncodeEx
+
 /* Encodes a Unicode object and returns the result as Python string
    object. */
 
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
     PyObject *unicode,	 	/* Unicode object */
     const char *encoding,	/* encoding */
@@ -438,6 +447,12 @@
     PyObject *unicode	 	/* Unicode object */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8Ex(
+    PyObject *unicode,		/* Unicode object */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length,	 		/* number of Py_UNICODE chars to encode */
@@ -505,6 +520,13 @@
 
 */
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16Ex(
+    PyObject *unicode,		/* Unicode object */
+    PyObject *errors,		/* error handling */
+    int byteorder		/* byteorder to use 0=BOM+native;-1=LE,1=BE */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length,	 		/* number of Py_UNICODE chars to encode */
@@ -524,6 +546,10 @@
     PyObject *unicode	 	/* Unicode object */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscapeEx(
+    PyObject *unicode		/* Unicode object */
+    );
+
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length	 		/* Number of Py_UNICODE chars to encode */
@@ -537,7 +563,7 @@
     const char *errors		/* error handling */
     );
 
-extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscapeEx(
     PyObject *unicode	 	/* Unicode object */
     );
 
@@ -546,6 +572,8 @@
     int length	 		/* Number of Py_UNICODE chars to encode */
     );
 
+#define PyUnicode_AsRawUnicodeEscapeString PyUnicode_EncodeRawUnicodeEscapeEx
+
 /* --- Latin-1 Codecs ----------------------------------------------------- 
 
    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
@@ -562,6 +590,12 @@
     PyObject *unicode	 	/* Unicode object */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1Ex(
+    PyObject *unicode, 	/* Unicode object */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length,	 		/* Number of Py_UNICODE chars to encode */
@@ -584,6 +618,12 @@
     PyObject *unicode	 	/* Unicode object */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCIIEx(
+    PyObject *unicode,		/* Unicode object */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length,	 		/* Number of Py_UNICODE chars to encode */
@@ -626,6 +666,14 @@
 				   (unicode ordinal -> char ordinal) */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmapEx(
+    PyObject *unicode,		/* Unicode object */
+    PyObject *mapping,		/* character mapping 
+				   (unicode ordinal -> char ordinal) */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
     const Py_UNICODE *data, 	/* Unicode char buffer */
     int length,	 		/* Number of Py_UNICODE chars to encode */
@@ -668,6 +716,12 @@
     PyObject *unicode           /* Unicode object */
     );
 
+extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCSEx(
+    const Pyobject *unicode,    /* Unicode object */
+    const PyObject *errors      /* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
     const Py_UNICODE *data,     /* Unicode char buffer */
     int length,                 /* Number of Py_UNICODE chars to encode */
@@ -682,7 +736,8 @@
    an output buffer using standard ASCII digit codes.
 
    The output buffer has to provide at least length+1 bytes of storage
-   area. The output string is 0-terminated.
+   area (more if longer replacement string are generated).
+   The output string is 0-terminated.
 
    The encoder converts whitespace to ' ', decimal characters to their
    corresponding ASCII digit and all other Latin-1 characters except
@@ -691,15 +746,23 @@
 
    Error handling is defined by the errors argument:
 
-      NULL or "strict": raise a ValueError
-      "ignore": ignore the wrong characters (these are not copied to the
-		output buffer)
-      "replace": replaces illegal characters with '?'
+      NULL, None, "strict" or u"strict": raise a UnicodeError
+      "ignore" or u"ignore": ignore the wrong characters (these are
+                  not copied to the output buffer)
+      "replace" or u"replace": replaces illegal characters with '?'
+      callable object: use what the object returns as replacement
 
    Returns 0 on success, -1 on failure.
 
 */
 
+extern DL_IMPORT(int) PyUnicode_EncodeDecimalEx(
+    PyObject *unicode,		/* Unicode object */
+    char *output,		/* Output buffer; must have size >= length */
+    PyObject *errors		/* error handling */
+    );
+
+/* DEPRECATED */
 extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
     Py_UNICODE *s,		/* Unicode buffer */
     int length,			/* Number of Py_UNICODE chars to encode */
@@ -760,7 +823,7 @@
 */
 
 extern DL_IMPORT(PyObject *) PyUnicode_Translate(
-    PyObject *str,		/* String */ 
+    PyObject *str,		/* String */
     PyObject *table,		/* Translate table */
     const char *errors		/* error handling */
     );
Index: Lib/codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.19
diff -u -r1.19 codecs.py
--- Lib/codecs.py	2001/05/29 06:06:54	1.19
+++ Lib/codecs.py	2001/06/13 13:01:25
@@ -51,13 +51,17 @@
 
         The .encode()/.decode() methods may implement different error
         handling schemes by providing the errors argument. These
-        string values are defined:
+        values are defined:
 
-         'strict' - raise a ValueError error (or a subclass)
+         None or 'strict' - raise a UnicodeError error (or a subclass)
          'ignore' - ignore the character and continue with the next
          'replace' - replace with a suitable replacement character;
-                    Python will use the official U+FFFD REPLACEMENT
-                    CHARACTER for the builtin Unicode codecs.
+                     Python will use the official U+FFFD REPLACEMENT
+                     CHARACTER for the builtin Unicode codecs.
+         callable object - call the object with the arguments
+                           encoding name, character, position
+                           and encode the unicode object returned
+                           instead of the original character.
 
     """
     def encode(self, input, errors='strict'):
@@ -66,7 +70,7 @@
             object, length consumed).
 
             errors defines the error handling to apply. It defaults to
-            'strict' handling.
+            strict handling.
 
             The method may not store state in the Codec instance. Use
             StreamCodec for codecs which have to keep state in order to
@@ -122,9 +126,15 @@
             schemes by providing the errors keyword argument. These
             parameters are defined:
 
-             'strict' - raise a ValueError (or a subclass)
-             'ignore' - ignore the character and continue with the next
-             'replace'- replace with a suitable replacement character
+               None or 'strict' - raise a UnicodeError error (or a subclass)
+               'ignore' - ignore the character and continue with the next
+               'replace' - replace with a suitable replacement character;
+                           Python will use the official U+FFFD REPLACEMENT
+                           CHARACTER for the builtin Unicode codecs.
+               callable object - call the object with the arguments
+                                 encoding name, character, position
+                                 and encode the unicode object returned
+                                 instead of the original character.
 
         """
         self.stream = stream
Index: Lib/encodings/base64_codec.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/base64_codec.py,v
retrieving revision 1.1
diff -u -r1.1 base64_codec.py
--- Lib/encodings/base64_codec.py	2001/05/15 12:00:02	1.1
+++ Lib/encodings/base64_codec.py	2001/06/13 13:02:40
@@ -10,17 +10,17 @@
 
 ### Codec APIs
 
-def base64_encode(input,errors='strict'):
+def base64_encode(input,errors='None'):
 
     """ Encodes the object input and returns a tuple (output
         object, length consumed).
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
-    assert errors == 'strict'
+    assert errors is None or errors == "strict" or errors == codecs.raise_unicodeencode_errors
     output = base64.encodestring(input)
     return (output, len(input))
 
@@ -34,7 +34,7 @@
         mapped files are examples of objects providing this slot.
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
Index: Lib/encodings/hex_codec.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/hex_codec.py,v
retrieving revision 1.1
diff -u -r1.1 hex_codec.py
--- Lib/encodings/hex_codec.py	2001/05/15 12:00:02	1.1
+++ Lib/encodings/hex_codec.py	2001/06/13 13:02:48
@@ -10,17 +10,17 @@
 
 ### Codec APIs
 
-def hex_encode(input,errors='strict'):
+def hex_encode(input,errors=None):
 
     """ Encodes the object input and returns a tuple (output
         object, length consumed).
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
-    assert errors == 'strict'
+    assert errors is None or errors == "strict" or errors == codecs.raise_unicodeencode_errors
     output = binascii.b2a_hex(input)
     return (output, len(input))
 
@@ -34,7 +34,7 @@
         mapped files are examples of objects providing this slot.
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
Index: Lib/encodings/quopri_codec.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/quopri_codec.py,v
retrieving revision 1.1
diff -u -r1.1 quopri_codec.py
--- Lib/encodings/quopri_codec.py	2001/05/15 15:34:07	1.1
+++ Lib/encodings/quopri_codec.py	2001/06/13 13:02:51
@@ -9,15 +9,15 @@
 except ImportError:
     from StringIO import StringIO
 
-def quopri_encode(input, errors='strict'):
+def quopri_encode(input, errors=None):
     """Encode the input, returning a tuple (output object, length consumed).
 
     errors defines the error handling to apply. It defaults to
-    'strict' handling which is the only currently supported
+    strict handling which is the only currently supported
     error handling for this codec.
 
     """
-    assert errors == 'strict'
+    assert errors is None or errors == "strict" or errors == codecs.raise_unicodeencode_errors
     f = StringIO(input)
     g = StringIO()
     quopri.encode(f, g, 1)
Index: Lib/encodings/uu_codec.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/uu_codec.py,v
retrieving revision 1.1
diff -u -r1.1 uu_codec.py
--- Lib/encodings/uu_codec.py	2001/05/15 12:00:02	1.1
+++ Lib/encodings/uu_codec.py	2001/06/13 13:02:51
@@ -12,17 +12,17 @@
 
 ### Codec APIs
 
-def uu_encode(input,errors='strict',filename='<data>',mode=0666):
+def uu_encode(input,errors=None,filename='<data>',mode=0666):
 
     """ Encodes the object input and returns a tuple (output
         object, length consumed).
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
-    assert errors == 'strict'
+    assert errors is None or errors == "strict" or errors == codecs.raise_unicodeencode_errors
     from cStringIO import StringIO
     from binascii import b2a_uu
     infile = StringIO(input)
@@ -50,7 +50,7 @@
         mapped files are examples of objects providing this slot.
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
         Note: filename and file mode information in the input data is
Index: Lib/encodings/zlib_codec.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/zlib_codec.py,v
retrieving revision 1.1
diff -u -r1.1 zlib_codec.py
--- Lib/encodings/zlib_codec.py	2001/05/15 12:00:02	1.1
+++ Lib/encodings/zlib_codec.py	2001/06/13 13:02:51
@@ -11,17 +11,17 @@
 
 ### Codec APIs
 
-def zlib_encode(input,errors='strict'):
+def zlib_encode(input,errors=None):
 
     """ Encodes the object input and returns a tuple (output
         object, length consumed).
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
-    assert errors == 'strict'
+    assert errors is None or errors == "strict" or errors == codecs.raise_unicodeencode_errors
     output = zlib.compress(input)
     return (output, len(input))
 
@@ -35,7 +35,7 @@
         mapped files are examples of objects providing this slot.
 
         errors defines the error handling to apply. It defaults to
-        'strict' handling which is the only currently supported
+        strict handling which is the only currently supported
         error handling for this codec.
 
     """
Index: Modules/_codecsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_codecsmodule.c,v
retrieving revision 2.6
diff -u -r2.6 _codecsmodule.c
--- Modules/_codecsmodule.c	2000/09/21 21:09:45	2.6
+++ Modules/_codecsmodule.c	2001/06/13 13:11:54
@@ -327,9 +327,9 @@
 {
     const char *data;
     int size;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
+    if (!PyArg_ParseTuple(args, "s#|O:readbuffer_encode",
 			  &data, &size, &errors))
 	return NULL;
 
@@ -343,9 +343,9 @@
 {
     const char *data;
     int size;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
+    if (!PyArg_ParseTuple(args, "t#|O:charbuffer_encode",
 			  &data, &size, &errors))
 	return NULL;
 
@@ -358,11 +358,11 @@
 			PyObject *args)
 {
     PyObject *obj;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
     const char *data;
     int size;
     
-    if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
+    if (!PyArg_ParseTuple(args, "O|O:unicode_internal_encode",
 			  &obj, &errors))
 	return NULL;
 
@@ -382,22 +382,21 @@
 
 static PyObject *
 utf_8_encode(PyObject *self,
-	    PyObject *args)
+             PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
+    if (!PyArg_ParseTuple(args, "O|O:utf_8_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
-					 PyUnicode_GET_SIZE(str),
-					 errors),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeUTF8Ex(str, errors),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
@@ -411,172 +410,161 @@
 
 static PyObject *
 utf_16_encode(PyObject *self,
-	    PyObject *args)
+              PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
     int byteorder = 0;
 
-    if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
+    if (!PyArg_ParseTuple(args, "O|Oi:utf_16_encode",
 			  &str, &errors, &byteorder))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
-					  PyUnicode_GET_SIZE(str),
-					  errors,
-					  byteorder),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeUTF16Ex(str, errors, byteorder),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 utf_16_le_encode(PyObject *self,
-		 PyObject *args)
+                 PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|zi:utf_16_le_encode",
+    if (!PyArg_ParseTuple(args, "O|Oi:utf_16_le_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
-					     PyUnicode_GET_SIZE(str),
-					     errors,
-					     -1),
-		       PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeUTF16Ex(str, errors, -1),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 utf_16_be_encode(PyObject *self,
-		 PyObject *args)
+                 PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|zi:utf_16_be_encode",
+    if (!PyArg_ParseTuple(args, "O|Oi:utf_16_be_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
-					  PyUnicode_GET_SIZE(str),
-					  errors,
-					  +1),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeUTF16Ex(str, errors, +1),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 unicode_escape_encode(PyObject *self,
-		     PyObject *args)
+                      PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
+    if (!PyArg_ParseTuple(args, "O|O:unicode_escape_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str), 
-						  PyUnicode_GET_SIZE(str)),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeUnicodeEscapeEx(str),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 raw_unicode_escape_encode(PyObject *self,
-			PyObject *args)
+                          PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
+    if (!PyArg_ParseTuple(args, "O|O:raw_unicode_escape_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
-			       PyUnicode_AS_UNICODE(str), 
-			       PyUnicode_GET_SIZE(str)),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeRawUnicodeEscapeEx(str),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 latin_1_encode(PyObject *self,
-	       PyObject *args)
+               PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
+    if (!PyArg_ParseTuple(args, "O|O:latin_1_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeLatin1(
-			       PyUnicode_AS_UNICODE(str), 
-			       PyUnicode_GET_SIZE(str),
-			       errors),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeLatin1Ex(str, errors),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 ascii_encode(PyObject *self,
-	     PyObject *args)
+             PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
+    if (!PyArg_ParseTuple(args, "O|O:ascii_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeASCII(
-			       PyUnicode_AS_UNICODE(str), 
-			       PyUnicode_GET_SIZE(str),
-			       errors),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeASCIIEx(str, errors),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
 
 static PyObject *
 charmap_encode(PyObject *self,
-	     PyObject *args)
+               PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
     PyObject *mapping = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
+    if (!PyArg_ParseTuple(args, "O|OO:charmap_encode",
 			  &str, &errors, &mapping))
 	return NULL;
     if (mapping == Py_None)
@@ -585,12 +573,9 @@
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeCharmap(
-			       PyUnicode_AS_UNICODE(str), 
-			       PyUnicode_GET_SIZE(str),
-			       mapping, 
-			       errors),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeCharmapEx(str, mapping, errors),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
@@ -602,20 +587,18 @@
 	    PyObject *args)
 {
     PyObject *str, *v;
-    const char *errors = NULL;
+    PyObject *errors = NULL;
 
-    if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
+    if (!PyArg_ParseTuple(args, "O|O:mbcs_encode",
 			  &str, &errors))
 	return NULL;
 
     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
-    v = codec_tuple(PyUnicode_EncodeMBCS(
-			       PyUnicode_AS_UNICODE(str), 
-			       PyUnicode_GET_SIZE(str),
-			       errors),
-		    PyUnicode_GET_SIZE(str));
+    v = codec_tuple(
+	PyUnicode_EncodeMBCSEx(str, errors),
+	PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }
@@ -625,36 +608,40 @@
 /* --- Module API --------------------------------------------------------- */
 
 static PyMethodDef _codecs_functions[] = {
-    {"register",		codecregister,			1},
-    {"lookup",			codeclookup, 			1},
-    {"utf_8_encode",		utf_8_encode,			1},
-    {"utf_8_decode",		utf_8_decode,			1},
-    {"utf_16_encode",		utf_16_encode,			1},
-    {"utf_16_le_encode",	utf_16_le_encode,		1},
-    {"utf_16_be_encode",	utf_16_be_encode,		1},
-    {"utf_16_decode",		utf_16_decode,			1},
-    {"utf_16_le_decode",	utf_16_le_decode,		1},
-    {"utf_16_be_decode",	utf_16_be_decode,		1},
-    {"utf_16_ex_decode",	utf_16_ex_decode,		1},
-    {"unicode_escape_encode",	unicode_escape_encode,		1},
-    {"unicode_escape_decode",	unicode_escape_decode,		1},
-    {"unicode_internal_encode",	unicode_internal_encode,	1},
-    {"unicode_internal_decode",	unicode_internal_decode,	1},
-    {"raw_unicode_escape_encode", raw_unicode_escape_encode,	1},
-    {"raw_unicode_escape_decode", raw_unicode_escape_decode,	1},
-    {"latin_1_encode", 		latin_1_encode,			1},
-    {"latin_1_decode", 		latin_1_decode,			1},
-    {"ascii_encode", 		ascii_encode,			1},
-    {"ascii_decode", 		ascii_decode,			1},
-    {"charmap_encode", 		charmap_encode,			1},
-    {"charmap_decode", 		charmap_decode,			1},
-    {"readbuffer_encode",	readbuffer_encode,		1},
-    {"charbuffer_encode",	charbuffer_encode,		1},
+    {"register",				codecregister,					1},
+    {"lookup",					codeclookup, 					1},
+    {"utf_8_encode",				utf_8_encode,					1},
+    {"utf_8_decode",				utf_8_decode,					1},
+    {"utf_16_encode",				utf_16_encode,					1},
+    {"utf_16_le_encode",			utf_16_le_encode,				1},
+    {"utf_16_be_encode",			utf_16_be_encode,				1},
+    {"utf_16_decode",				utf_16_decode,					1},
+    {"utf_16_le_decode",			utf_16_le_decode,				1},
+    {"utf_16_be_decode",			utf_16_be_decode,				1},
+    {"utf_16_ex_decode",			utf_16_ex_decode,				1},
+    {"unicode_escape_encode",			unicode_escape_encode,				1},
+    {"unicode_escape_decode",			unicode_escape_decode,				1},
+    {"unicode_internal_encode",			unicode_internal_encode,			1},
+    {"unicode_internal_decode",			unicode_internal_decode,			1},
+    {"raw_unicode_escape_encode",		raw_unicode_escape_encode,			1},
+    {"raw_unicode_escape_decode",		raw_unicode_escape_decode,			1},
+    {"latin_1_encode",				latin_1_encode,					1},
+    {"latin_1_decode",				latin_1_decode,					1},
+    {"ascii_encode",				ascii_encode,					1},
+    {"ascii_decode",				ascii_decode,					1},
+    {"charmap_encode",				charmap_encode,					1},
+    {"charmap_decode",				charmap_decode,					1},
+    {"readbuffer_encode",			readbuffer_encode,				1},
+    {"charbuffer_encode",			charbuffer_encode,				1},
 #ifdef MS_WIN32
-    {"mbcs_encode", 		mbcs_encode,			1},
-    {"mbcs_decode", 		mbcs_decode,			1},
+    {"mbcs_encode", 				mbcs_encode,					1},
+    {"mbcs_decode", 				mbcs_decode,					1},
 #endif
-    {NULL, NULL}		/* sentinel */
+    {"raise_unicodeencode_errors",		PyCodec_RaiseUnicodeEncodeErrors,		1},
+    {"ignore_unicodeencode_errors",		PyCodec_IgnoreUnicodeEncodeErrors,		1},
+    {"replace_unicodeencode_errors",		PyCodec_ReplaceUnicodeEncodeErrors,		1},
+    {"xmlcharrefreplace_unicodeencode_errors",	PyCodec_XMLCharRefReplaceUnicodeEncodeErrors,	1},
+    {NULL, NULL}				/* sentinel */
 };
 
 DL_EXPORT(void)
Index: Objects/unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.93
diff -u -r2.93 unicodeobject.c
--- Objects/unicodeobject.c	2001/06/07 12:26:56	2.93
+++ Objects/unicodeobject.c	2001/06/13 13:14:05
@@ -497,47 +497,30 @@
     return NULL;
 }
 
-PyObject *PyUnicode_Encode(const Py_UNICODE *s,
-			   int size,
-			   const char *encoding,
-			   const char *errors)
+PyObject *PyUnicode_EncodeEx(PyObject *unicode,
+                             const char *encoding,
+                             PyObject *errors)
 {
-    PyObject *v, *unicode;
-    
-    unicode = PyUnicode_FromUnicode(s, size);
-    if (unicode == NULL)
-	return NULL;
-    v = PyUnicode_AsEncodedString(unicode, encoding, errors);
-    Py_DECREF(unicode);
-    return v;
-}
-
-PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
-                                    const char *encoding,
-                                    const char *errors)
-{
     PyObject *v;
-    
+
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
         goto onError;
     }
 
-    if (encoding == NULL) 
+    if (encoding == NULL)
 	encoding = PyUnicode_GetDefaultEncoding();
 
     /* Shortcuts for common default encodings */
-    if (errors == NULL) {
-	if (strcmp(encoding, "utf-8") == 0)
-	    return PyUnicode_AsUTF8String(unicode);
-	else if (strcmp(encoding, "latin-1") == 0)
-	    return PyUnicode_AsLatin1String(unicode);
-	else if (strcmp(encoding, "ascii") == 0)
-	    return PyUnicode_AsASCIIString(unicode);
-    }
+    if (strcmp(encoding, "utf-8") == 0)
+	return PyUnicode_EncodeUTF8Ex(unicode, errors);
+    else if ((strcmp(encoding, "latin-1") == 0) || (strcmp(encoding, "iso-8859-1") == 0))
+	return PyUnicode_EncodeLatin1Ex(unicode, errors);
+    else if (strcmp(encoding, "ascii") == 0)
+	return PyUnicode_EncodeASCIIEx(unicode, errors);
 
     /* Encode via the codec registry */
-    v = PyCodec_Encode(unicode, encoding, errors);
+    v = PyCodec_EncodeEx(unicode, encoding, errors);
     if (v == NULL)
         goto onError;
     /* XXX Should we really enforce this ? */
@@ -554,6 +537,28 @@
     return NULL;
 }
 
+PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
+                                    const char *encoding,
+                                    const char *errors)
+{
+    PyObject *errorstr;
+    PyObject *res;
+
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr)
+	    return NULL;
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
+
+    res = PyUnicode_AsEncodedStringEx(unicode, encoding, errorstr);
+    Py_DECREF(errorstr);
+    return res;
+}
+
 /* Return a Python string holding the default encoded value of the
    Unicode object. 
 
@@ -848,23 +853,36 @@
 }
 #endif
 
-PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
-			       int size,
-			       const char *errors)
+PyObject *PyUnicode_EncodeUTF8Ex(PyObject *unicode,
+                                 PyObject *errors)
 {
+    Py_UNICODE *s;
+    int size;
     PyObject *v;
     char *p;
     char *q;
     Py_UCS4 ch2;
-    unsigned int cbAllocated = 3 * size;
+    unsigned int cbAllocated;
     unsigned int cbWritten = 0;
     int i = 0;
 
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (errors == NULL)
+	return NULL;
+    s = PyUnicode_AS_UNICODE(unicode);
+    size = PyUnicode_GET_SIZE(unicode);
+    cbAllocated = 3 * size;
     v = PyString_FromStringAndSize(NULL, cbAllocated);
     if (v == NULL)
         return NULL;
-    if (size == 0)
+    if (size == 0) {
+	Py_DECREF(errors);
         return v;
+    }
 
     p = q = PyString_AS_STRING(v);
     while (i < size) {
@@ -918,20 +936,44 @@
 
  onError:
     Py_DECREF(v);
+    Py_DECREF(errors);
     return NULL;
 }
 
-PyObject *PyUnicode_AsUTF8String(PyObject *unicode)
+PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
+			       int size,
+			       const char *errors)
 {
-    if (!PyUnicode_Check(unicode)) {
-        PyErr_BadArgument();
-        return NULL;
+    PyObject *unicode;
+    PyObject *errorstr;
+    PyObject *res;
+
+    unicode = PyUnicode_FromUnicode(s, size);
+    if (!unicode)
+	return NULL;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return NULL;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
     }
-    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-				PyUnicode_GET_SIZE(unicode),
-				NULL);
+
+    res = PyUnicode_EncodeUTF8Ex(unicode, errorstr);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
 }
 
+PyObject *PyUnicode_AsUTF8String(PyObject *unicode)
+{
+    return PyUnicode_EncodeUTF8Ex(unicode, NULL);
+}
+
 /* --- UTF-16 Codec ------------------------------------------------------- */
 
 static
@@ -1084,53 +1126,91 @@
 
 #undef UTF16_ERROR
 
-PyObject *PyUnicode_EncodeUTF16(const Py_UNICODE *s,
-				int size,
-				const char *errors,
-				int byteorder)
-{
-    PyObject *v;
+PyObject *PyUnicode_EncodeUTF16Ex(PyObject *unicode,
+                                  PyObject *errors,
+                                  int byteorder)
+{
+    Py_UNICODE *s;
+    int size;
+    PyObject *v = NULL;
     Py_UNICODE *p;
     char *q;
 
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (errors == NULL)
+	return NULL;
+
+    s = PyUnicode_AS_UNICODE(unicode);
+    size = PyUnicode_GET_SIZE(unicode);
+
     /* We don't create UTF-16 pairs... */
-    v = PyString_FromStringAndSize(NULL, 
+    v = PyString_FromStringAndSize(NULL,
 			sizeof(Py_UNICODE) * (size + (byteorder == 0)));
     if (v == NULL)
-        return NULL;
+        goto finish;
 
     q = PyString_AS_STRING(v);
     p = (Py_UNICODE *)q;
     if (byteorder == 0)
 	*p++ = 0xFEFF;
-    if (size == 0)
-        return v;
-    if (byteorder == 0 ||
+    if (size > 0) {
+	if (byteorder == 0 ||
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN	
-	byteorder == -1
+	    byteorder == -1
 #else
-	byteorder == 1
+	    byteorder == 1
 #endif
-	)
-	Py_UNICODE_COPY(p, s, size);
-    else
-	while (size-- > 0) {
-	    Py_UNICODE ch = *s++;
-	    *p++ = (ch >> 8) | (ch << 8);
-	}
+	    )
+	    Py_UNICODE_COPY(p, s, size);
+	else
+	    while (size-- > 0) {
+		Py_UNICODE ch = *s++;
+		*p++ = (ch >> 8) | (ch << 8);
+	    }
+    }
+    finish:
+    Py_DECREF(errors);
     return v;
 }
 
-PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
+
+PyObject *PyUnicode_EncodeUTF16(const Py_UNICODE *s,
+				int size,
+				const char *errors,
+				int byteorder)
 {
-    if (!PyUnicode_Check(unicode)) {
-        PyErr_BadArgument();
-        return NULL;
+    PyObject *unicode;
+    PyObject *errorstr;
+    PyObject *res;
+
+    unicode = PyUnicode_FromUnicode(s, size);
+    if (!unicode)
+	return NULL;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return NULL;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
     }
-    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode),
-				 PyUnicode_GET_SIZE(unicode),
-				 NULL,
-				 0);
+
+    res = PyUnicode_EncodeUTF16Ex(unicode, errorstr, byteorder);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
+}
+
+PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
+{
+    return PyUnicode_EncodeUTF16Ex(unicode, NULL, 0);
 }
 
 /* --- Unicode Escape Codec ----------------------------------------------- */
@@ -1430,6 +1510,18 @@
     return NULL;
 }
 
+PyObject *PyUnicode_EncodeUnicodeEscapeEx(PyObject *unicode)
+{
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+
+    return unicodeescape_string(
+	PyUnicode_AS_UNICODE(unicode),
+	PyUnicode_GET_SIZE(unicode), 0);
+}
+
 PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
 					int size)
 {
@@ -1438,12 +1530,7 @@
 
 PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
 {
-    if (!PyUnicode_Check(unicode)) {
-        PyErr_BadArgument();
-        return NULL;
-    }
-    return PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
-					 PyUnicode_GET_SIZE(unicode));
+    return PyUnicode_EncodeUnicodeEscapeEx(unicode);
 }
 
 /* --- Raw Unicode Escape Codec ------------------------------------------- */
@@ -1524,15 +1611,22 @@
     return NULL;
 }
 
-PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
-					   int size)
+PyObject *PyUnicode_EncodeRawUnicodeEscapeEx(PyObject *unicode)
 {
     PyObject *repr;
     char *p;
     char *q;
+    Py_UNICODE *s;
+    int size;
 
     static const char *hexdigit = "0123456789abcdef";
 
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+    s = PyUnicode_AS_UNICODE(unicode);
+    size = PyUnicode_GET_SIZE(unicode);
     repr = PyString_FromStringAndSize(NULL, 6 * size);
     if (repr == NULL)
         return NULL;
@@ -1566,14 +1660,19 @@
     return NULL;
 }
 
-PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
+PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
+					   int size)
 {
-    if (!PyUnicode_Check(unicode)) {
-	PyErr_BadArgument();
+    PyObject *unicode;
+    PyObject *res;
+
+    unicode = PyUnicode_FromUnicode(s, size);
+    if (!unicode)
 	return NULL;
-    }
-    return PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
-					    PyUnicode_GET_SIZE(unicode));
+
+    res = PyUnicode_EncodeRawUnicodeEscapeEx(unicode);
+    Py_DECREF(unicode);
+    return res;
 }
 
 /* --- Latin-1 Codec ------------------------------------------------------ */
@@ -1606,81 +1705,184 @@
     return NULL;
 }
 
-static
-int latin1_encoding_error(const Py_UNICODE **source,
-			  char **dest,
-			  const char *errors,
-			  const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "Latin-1 encoding error: %.400s",
-		     details);
-	return -1;
+/* Encode a Unicode object as ASCII (limit==128) or
+   latin-1 (limit==256)
+
+   For this and the other encode functions the loop through
+   the string is done in the following way: A stack with two
+   strings is kept and the loop always encodes a character from
+   the string at the stacktop. If an error is encountered and
+   the stack has only one entry (during encoding of the original
+   string) the callback is called and the unicode object returned
+   is pushed onto the stack, so the encoding continues with the
+   replacement string. If the stack has two entries when an
+   error is encountered, the replacement string itself has
+   an unencodable character and an exception will be raised.
+   When the encoder has reached the end of it's current string
+   there are two possibilities: when the stack contains two
+   entries, this was the replacement string, so the replacement
+   string will be popped from the stack and encoding continues
+   with the next character from the original string. If the
+   stack had only one entry, encoding is finished. */
+static PyObject *unicode_encode_ucs1ex(PyObject *unicode,
+                              PyObject *errors, int limit)
+{
+    /* current input position */
+    int unicodepos;
+    /* output object */
+    PyObject *res;
+    /* current output position */
+    int respos = 0;
+    /* the next two variables are used as a "micro stack":
+       during processing of a replacement string unicode2
+       and unicode2pos contain the values for the original
+       unicode object to be encoded */
+    PyObject *unicode2 = NULL;
+    int unicode2pos = 0;
+    char *encoding = (limit == 256) ? "latin-1" : "ascii";
+
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
     }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (errors == NULL)
+	return NULL;
+    /* allocate enough for a simple encoding without
+       replacements, if we need more, we'll resize */
+    res = PyString_FromStringAndSize(NULL, PyUnicode_GET_SIZE(unicode));
+    if (res == NULL)
+        goto onError;
+    if (PyUnicode_GET_SIZE(unicode) == 0) {
+	Py_DECREF(errors);
+	return res;
+    }
+
+    for (unicodepos = 0;;++unicodepos) {
+	Py_UNICODE c;
+	/* finished with the string? */
+	if (unicodepos == PyUnicode_GET_SIZE(unicode)) {
+	    /* processing replacement? */
+	    if (unicode2) {
+		/* forget replacement */
+		Py_DECREF(unicode);
+		/* switch back to original */
+		unicode = unicode2;
+		unicodepos = unicode2pos;
+		unicode2 = NULL;
+		unicode2pos = 0;
+		/* maybe original is finished too? */
+		continue;
+	    }
+	    else
+		/* processing original => finished */
+		break;
+	}
+	c = PyUnicode_AS_UNICODE(unicode)[unicodepos];
+
+	/* we can't encode this */
+	if (c>=limit) {
+	    if ((c == Py_UNICODE_REPLACEMENT_CHARACTER) && unicode2)
+		/* use our own replacement character, but only when processing replacements */
+		c = '?';
+	    else if (unicode2) {
+		/* error while replacing => report position in original */
+		PyCodec_RaiseUnicodeEncodeError(encoding, c, unicode2pos);
+		goto onError;
+	    } else {
+		/* use the callback */
+		PyObject *args = Py_BuildValue("sOi", encoding, unicode, unicodepos);
+		if (args == NULL)
+		    goto onError;
+		/* "push" original to secondary variables */
+		unicode2 = unicode;
+		unicode2pos = unicodepos;
+		/* switch to replacement */
+		unicode = PyEval_CallObject(errors, args);
+		Py_DECREF(args);
+		if (unicode == NULL)
+		    goto onError;
+		if (!PyUnicode_Check(unicode)) {
+		    PyErr_Format(PyExc_ValueError,
+		        "encoding error handler must return unicode");
+		    goto onError;
+		}
+		unicodepos = -1;
+		/* retry with the replacement string */
+		continue;
+	    }
+	}
+	/* need more space? */
+	if (respos == PyString_GET_SIZE(res)) {
+	    /* allocate twice the space */
+	    if (_PyString_Resize(&res, 2*PyString_GET_SIZE(res)))
+		goto onError;
+	}
+	PyString_AS_STRING(res)[respos++] = (char)c;
     }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
+    /* Resize if we allocated to much */
+    if (respos<PyString_GET_SIZE(res)) {
+	if (_PyString_Resize(&res, respos))
+	    goto onError;
     }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "Latin-1 encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
+    return res;
+
+    onError:
+    Py_DECREF(res);
+    /* free replacement */
+    if (unicode2) {
+	Py_XDECREF(unicode);
     }
+    Py_DECREF(errors);
+    return NULL;
 }
 
-PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
-				 int size,
-				 const char *errors)
+PyObject *PyUnicode_EncodeLatin1Ex(PyObject *unicode,
+                                   PyObject *errors)
 {
-    PyObject *repr;
-    char *s, *start;
+    return unicode_encode_ucs1ex(unicode, errors, 256);
+}
 
-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-	return repr;
+static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
+                              int size,
+                              const char *errors,
+                              int limit)
+{
+    PyObject *unicode;
+    PyObject *errorstr;
+    PyObject *res;
 
-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 256) {
-	    if (latin1_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(256)"))
-		goto onError;
+    unicode = PyUnicode_FromUnicode(p, size);
+    if (!unicode)
+	return NULL;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return NULL;
 	}
-	else
-            *s++ = (char)ch;
     }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	if (_PyString_Resize(&repr, s - start))
-	    goto onError;
-    return repr;
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
 
- onError:
-    Py_DECREF(repr);
-    return NULL;
+    res = unicode_encode_ucs1ex(unicode, errorstr, limit);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
+}
+
+PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+                                 int size,
+                                 const char *errors)
+{
+    return unicode_encode_ucs1(p, size, errors, 256);
 }
 
 PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
 {
-    if (!PyUnicode_Check(unicode)) {
-	PyErr_BadArgument();
-	return NULL;
-    }
-    return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
-				  PyUnicode_GET_SIZE(unicode),
-				  NULL);
+    return PyUnicode_EncodeLatin1Ex(unicode, NULL);
 }
 
 /* --- 7-bit ASCII Codec -------------------------------------------------- */
@@ -1754,81 +1956,22 @@
     return NULL;
 }
 
-static
-int ascii_encoding_error(const Py_UNICODE **source,
-			 char **dest,
-			 const char *errors,
-			 const char *details) 
+PyObject *PyUnicode_EncodeASCIIEx(PyObject *unicode,
+                                  PyObject *errors)
 {
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "ASCII encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "ASCII encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
+    return unicode_encode_ucs1ex(unicode, errors, 128);
 }
 
 PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
-				int size,
-				const char *errors)
+                                int size,
+                                const char *errors)
 {
-    PyObject *repr;
-    char *s, *start;
-
-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-	return repr;
-
-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 128) {
-	    if (ascii_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(128)"))
-		goto onError;
-	}
-	else
-            *s++ = (char)ch;
-    }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	if (_PyString_Resize(&repr, s - start))
-	    goto onError;
-    return repr;
-
- onError:
-    Py_DECREF(repr);
-    return NULL;
+    return unicode_encode_ucs1(p, size, errors, 128);
 }
 
 PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
 {
-    if (!PyUnicode_Check(unicode)) {
-	PyErr_BadArgument();
-	return NULL;
-    }
-    return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
-				 PyUnicode_GET_SIZE(unicode),
-				 NULL);
+    return PyUnicode_EncodeASCIIEx(unicode, NULL);
 }
 
 #ifdef MS_WIN32
@@ -1861,20 +2004,26 @@
     return (PyObject *)v;
 }
 
-PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
-				int size,
-				const char *errors)
+PyObject *PyUnicode_EncodeMBCSEx(PyObject *unicode,
+                                 PyObject *errors)
 {
     PyObject *repr;
-    char *s;
     DWORD mbcssize;
 
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+
     /* If there are no characters, bail now! */
-    if (size==0)
-	    return PyString_FromString("");
+    if (PyUNICODE_GET_SIZE(unicode) == 0)
+	return PyString_FromString("");
 
     /* First get the size of the result */
-    mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL);
+    mbcssize = WideCharToMultiByte(CP_ACP, 0,
+	PyUnicode_AS_UNICODE(unicode),
+	PyUnicode_GET_SIZE(unicode),
+	NULL, 0, NULL, NULL);
     if (mbcssize==0)
         return PyErr_SetFromWindowsErrWithFilename(0, NULL);
 
@@ -1885,14 +2034,46 @@
         return repr;
 
     /* Do the conversion */
-    s = PyString_AS_STRING(repr);
-    if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
+    if (0 == WideCharToMultiByte(CP_ACP, 0,
+	PyUnicode_AS_UNICODE(unicode),
+	PyUnicode_GET_SIZE(unicode),
+	PyString_AS_STRING(repr),
+	mbcssize, NULL, NULL)) {
         Py_DECREF(repr);
         return PyErr_SetFromWindowsErrWithFilename(0, NULL);
     }
     return repr;
 }
 
+PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
+				int size,
+				const char *errors)
+{
+    PyObject *unicode;
+    PyObject *errorstr;
+    PyObject *res;
+
+    unicode = PyUnicode_FromUnicode(p, size);
+    if (!unicode)
+	return NULL;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return NULL;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
+
+    res = PyUnicode_EncodeMBCSEx(unicode, errorstr);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
+}
+
 #endif /* MS_WIN32 */
 
 /* --- Character Mapping Codec -------------------------------------------- */
@@ -2034,61 +2215,73 @@
     return NULL;
 }
 
-static
-int charmap_encoding_error(const Py_UNICODE **source,
-			   char **dest,
-			   const char *errors,
-			   const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "charmap encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "charmap encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
+PyObject *PyUnicode_EncodeCharmapEx(PyObject *unicode,
+                                    PyObject *mapping,
+                                    PyObject *errors)
+{
+    /* current input position */
+    int unicodepos;
+    /* output object */
+    PyObject *res;
+    /* current output position */
+    int respos = 0;
+    /* the next two variables are used as a "micro stack":
+       during processing of a replacement string unicode2
+       and unicode2pos contain the values for the original
+       unicode object to be encoded */
+    PyObject *unicode2 = NULL;
+    int unicode2pos = 0;
 
-PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
-				  int size,
-				  PyObject *mapping,
-				  const char *errors)
-{
-    PyObject *v;
-    char *s;
     int extrachars = 0;
 
     /* Default to Latin-1 */
     if (mapping == NULL)
-	return PyUnicode_EncodeLatin1(p, size, errors);
+	return PyUnicode_EncodeLatin1Ex(unicode, errors);
 
-    v = PyString_FromStringAndSize(NULL, size);
-    if (v == NULL)
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return NULL;
+    }
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (errors == NULL)
+	return NULL;
+
+    res = PyString_FromStringAndSize(NULL, PyUnicode_GET_SIZE(unicode));
+    if (res == NULL)
         return NULL;
-    if (size == 0)
-	return v;
-    s = PyString_AS_STRING(v);
-    while (size-- > 0) {
-	Py_UNICODE ch = *p++;
+    if (PyUnicode_GET_SIZE(unicode) == 0)
+	return res;
+    for (unicodepos = 0;;++unicodepos) {
+	Py_UNICODE c;
 	PyObject *w, *x;
 
+	/* finished with current string? */
+	if (unicodepos == PyUnicode_GET_SIZE(unicode)) {
+	    /* currently processing replacement? */
+	    if (unicode2) {
+		/* forget replacement string */
+		Py_DECREF(unicode);
+		/* switch back to original */
+		unicode = unicode2;
+		unicodepos = unicode2pos;
+		unicode2 = NULL;
+		unicode2pos = 0;
+		/* maybe original is finished too? */
+		continue;
+	    }
+	    else
+		/* currently processing original => finished */
+		break;
+	}
+
+	c = PyUnicode_AS_UNICODE(unicode)[unicodepos];
+
+	/* use our own replacement character, but only when processing replacements */
+	if ((c == Py_UNICODE_REPLACEMENT_CHARACTER) && unicode2)
+	    c = '?';
+
 	/* Get mapping (Unicode ordinal -> string char, integer or None) */
-	w = PyInt_FromLong((long)ch);
+	w = PyInt_FromLong((long)c);
 	if (w == NULL)
 	    goto onError;
 	x = PyObject_GetItem(mapping, w);
@@ -2112,39 +2305,67 @@
 		Py_DECREF(x);
 		goto onError;
 	    }
-	    *s++ = (char)value;
+	    PyString_AS_STRING(res)[respos++] = (char)value;
 	}
+	/* undefined mapping */
 	else if (x == Py_None) {
-	    /* undefined mapping */
-	    if (charmap_encoding_error(&p, &s, errors, 
-				       "character maps to <undefined>")) {
+	    /* error while replacing */
+	    if (unicode2) {
+		/* report original position; FIXME should we give a better name? */
+		PyCodec_RaiseUnicodeEncodeError("charmap", c, unicode2pos);
 		Py_DECREF(x);
 		goto onError;
 	    }
+	    else {
+		/* use the callback */
+		PyObject *args = Py_BuildValue("sOi", "charmap", unicode, unicodepos);
+		if (args == NULL) {
+		    Py_DECREF(x);
+		    goto onError;
+		}
+		/* "push" original to secondary variables */
+		unicode2 = unicode;
+		unicode2pos = unicodepos;
+		/* switch to replacement */
+		unicode = PyEval_CallObject(errors, args);
+		Py_DECREF(args);
+		if (unicode == NULL) {
+		    Py_DECREF(x);
+		    goto onError;
+		}
+		if (!PyUnicode_Check(unicode)) {
+		    PyErr_Format(PyExc_ValueError,
+		       "encoding error handler must return unicode");
+		    Py_DECREF(x);
+		    Py_DECREF(unicode);
+		    goto onError;
+		}
+		unicodepos = -1;
+		/* retry with the replacement string */
+		continue;
+	    }
 	}
 	else if (PyString_Check(x)) {
 	    int targetsize = PyString_GET_SIZE(x);
 
 	    if (targetsize == 1)
 		/* 1-1 mapping */
-		*s++ = *PyString_AS_STRING(x);
+		PyString_AS_STRING(res)[respos++] = *PyString_AS_STRING(x);
 
 	    else if (targetsize > 1) {
 		/* 1-n mapping */
 		if (targetsize > extrachars) {
 		    /* resize first */
-		    int oldpos = (int)(s - PyString_AS_STRING(v));
 		    int needed = (targetsize - extrachars) + \
-			         (targetsize << 2);
+		                 (targetsize << 2);
 		    extrachars += needed;
-		    if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) {
+		    if (_PyString_Resize(&res, PyString_GET_SIZE(res) + needed)) {
 			Py_DECREF(x);
 			goto onError;
 		    }
-		    s = PyString_AS_STRING(v) + oldpos;
 		}
-		memcpy(s, PyString_AS_STRING(x), targetsize);
-		s += targetsize;
+		memcpy(&PyString_AS_STRING(res)[respos], PyString_AS_STRING(x), targetsize);
+		respos += targetsize;
 		extrachars -= targetsize;
 	    }
 	    /* 1-0 mapping: skip the character */
@@ -2158,29 +2379,57 @@
 	}
 	Py_DECREF(x);
     }
-    if (s - PyString_AS_STRING(v) < PyString_GET_SIZE(v))
-	if (_PyString_Resize(&v, (int)(s - PyString_AS_STRING(v))))
+    /* Resize if we allocated to much */
+    if (respos < PyString_GET_SIZE(res))
+	if (_PyString_Resize(&res, respos))
 	    goto onError;
-    return v;
+    return res;
 
  onError:
-    Py_DECREF(v);
+    Py_DECREF(res);
+    /* free replacement */
+    if (unicode2) {
+	Py_XDECREF(unicode);
+    }
+    Py_DECREF(errors);
     return NULL;
 }
 
-PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
-				    PyObject *mapping)
+PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
+				  int size,
+				  PyObject *mapping,
+				  const char *errors)
 {
-    if (!PyUnicode_Check(unicode) || mapping == NULL) {
-	PyErr_BadArgument();
+    PyObject *unicode;
+    PyObject *errorstr;
+    PyObject *res;
+
+    unicode = PyUnicode_FromUnicode(p, size);
+    if (!unicode)
 	return NULL;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return NULL;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
     }
-    return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
-				   PyUnicode_GET_SIZE(unicode),
-				   mapping,
-				   NULL);
+
+    res = PyUnicode_EncodeCharmapEx(unicode, mapping, errorstr);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
 }
 
+PyObject *PyUnicode_AsCharmapString(PyObject *unicode, PyObject *mapping)
+{
+    return PyUnicode_EncodeCharmapEx(unicode, mapping, NULL);
+}
+
 static
 int translate_error(const Py_UNICODE **source,
 		    Py_UNICODE **dest,
@@ -2316,58 +2565,137 @@
     
 /* --- Decimal Encoder ---------------------------------------------------- */
 
-int PyUnicode_EncodeDecimal(Py_UNICODE *s,
-			    int length,
-			    char *output,
-			    const char *errors)
-{
-    Py_UNICODE *p, *end;
+int PyUnicode_EncodeDecimalEx(PyObject *unicode,
+                              char *output,
+                              PyObject *errors)
+{
+    /* current input position */
+    int unicodepos;
+    /* the next two variables are used as a "micro stack":
+       during processing of a replacement string unicode2
+       and unicode2pos contain the values for the original
+       unicode object to be encoded */
+    PyObject *unicode2 = NULL;
+    int unicode2pos = 0;
 
     if (output == NULL) {
 	PyErr_BadArgument();
 	return -1;
     }
 
-    p = s;
-    end = s + length;
-    while (p < end) {
-	register Py_UNICODE ch = *p++;
+    if (!PyUnicode_Check(unicode)) {
+	PyErr_BadArgument();
+	return -1;
+    }
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (errors == NULL)
+	return -1;
+
+    for (unicodepos = 0;;++unicodepos) {
+	Py_UNICODE c;
 	int decimal;
-	
-	if (Py_UNICODE_ISSPACE(ch)) {
+	PyObject *args;
+	/* finished with the string? */
+	if (unicodepos == PyUnicode_GET_SIZE(unicode)) {
+	    /* processing replacement? */
+	    if (unicode2) {
+		/* forget replacement */
+		Py_DECREF(unicode);
+		/* switch back to original */
+		unicode = unicode2;
+		unicodepos = unicode2pos;
+		unicode2 = NULL;
+		unicode2pos = 0;
+		/* maybe original is finished too? */
+		continue;
+	    }
+	    else
+		/* processing original => finished */
+		break;
+	}
+	c = PyUnicode_AS_UNICODE(unicode)[unicodepos];
+
+	if (Py_UNICODE_ISSPACE(c)) {
 	    *output++ = ' ';
 	    continue;
 	}
-	decimal = Py_UNICODE_TODECIMAL(ch);
+	decimal = Py_UNICODE_TODECIMAL(c);
 	if (decimal >= 0) {
 	    *output++ = '0' + decimal;
 	    continue;
 	}
-	if (0 < ch && ch < 256) {
-	    *output++ = (char)ch;
+	if (0 < c && c < 256) {
+	    *output++ = (char)c;
 	    continue;
-	}
-	/* All other characters are considered invalid */
-	if (errors == NULL || strcmp(errors, "strict") == 0) {
-	    PyErr_SetString(PyExc_ValueError,
-			    "invalid decimal Unicode string");
-	    goto onError;
 	}
-	else if (strcmp(errors, "ignore") == 0)
-	    continue;
-	else if (strcmp(errors, "replace") == 0) {
+	if ((c == Py_UNICODE_REPLACEMENT_CHARACTER) && unicode2) {
 	    *output++ = '?';
 	    continue;
 	}
+	/* All other characters are considered invalid */
+	args = Py_BuildValue("sOi", "decimal", unicode, unicodepos);
+	if (args == NULL)
+	    goto onError;
+	/* "push" original to secondary variables */
+	unicode2 = unicode;
+	unicode2pos = unicodepos;
+	/* switch to replacement */
+	unicode = PyEval_CallObject(errors, args);
+	Py_DECREF(args);
+	if (unicode == NULL)
+	    goto onError;
+	if (!PyUnicode_Check(unicode)) {
+	    PyErr_Format(PyExc_ValueError,
+		"encoding error handler must return unicode");
+	    goto onError;
+	}
+	/* retry with the replacement string */
+	unicodepos = -1;
     }
     /* 0-terminate the output string */
     *output++ = '\0';
     return 0;
 
  onError:
+    Py_DECREF(errors);
+    /* free replacement */
+    if (unicode2) {
+	Py_XDECREF(unicode);
+    }
+
     return -1;
 }
 
+int PyUnicode_EncodeDecimal(Py_UNICODE *s,
+			    int length,
+			    char *output,
+			    const char *errors)
+{
+    PyObject *unicode;
+    PyObject *errorstr;
+    int res;
+
+    unicode = PyUnicode_FromUnicode(s, length);
+    if (!unicode)
+	return -1;
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(unicode);
+	    return -1;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
+
+    res = PyUnicode_EncodeDecimalEx(unicode, output, errorstr);
+    Py_DECREF(unicode);
+    Py_DECREF(errorstr);
+    return res;
+}
+
 /* --- Helpers ------------------------------------------------------------ */
 
 static 
@@ -3475,17 +3803,21 @@
 \n\
 Return an encoded string version of S. Default encoding is the current\n\
 default string encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.";
+handling scheme. Default is None meaning that encoding errors raise\n\
+a UnicodeError. 'strict' does the same. Other possible values are\n\
+ignore' and 'replace' or a callable that will be called with the encoding,\n\
+the original string and the position of the unencodable character and must\n\
+return a unicode string that will be encoded instead of the unencodable\n\
+character.";
 
 static PyObject *
 unicode_encode(PyUnicodeObject *self, PyObject *args)
 {
     char *encoding = NULL;
-    char *errors = NULL;
-    if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
+    PyObject *errors = NULL;
+    if (!PyArg_ParseTuple(args, "|sO:encode", &encoding, &errors))
         return NULL;
-    return PyUnicode_AsEncodedString((PyObject *)self, encoding, errors);
+    return PyUnicode_AsEncodedStringEx((PyObject *)self, encoding, errors);
 }
 
 static char expandtabs__doc__[] =
Index: Python/codecs.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/codecs.c,v
retrieving revision 2.13
diff -u -r2.13 codecs.c
--- Python/codecs.c	2000/09/26 05:46:01	2.13
+++ Python/codecs.c	2001/06/13 13:14:39
@@ -236,11 +236,47 @@
     return args;
 }
 
+static
+PyObject *args_tupleex(PyObject *object,
+                       PyObject *errors)
+{
+    PyObject *args;
+    
+    args = PyTuple_New(2);
+    if (args == NULL)
+	return NULL;
+    Py_INCREF(object);
+    PyTuple_SET_ITEM(args,0,object);
+    errors = PyCodec_UnicodeEncodeHandlerForObject(errors);
+    if (!errors) {
+	Py_DECREF(args);
+	return NULL;
+    }
+    PyTuple_SET_ITEM(args, 1, errors);
+    return args;
+}
+
 /* Build a codec by calling factory(stream[,errors]) or just
    factory(errors) depending on whether the given parameters are
    non-NULL. */
 
 static
+PyObject *build_stream_codecex(PyObject *factory,
+                               PyObject *stream,
+                               PyObject *errors)
+{
+    PyObject *args, *codec;
+
+    args = args_tupleex(stream, errors);
+    if (args == NULL)
+	return NULL;
+    
+    codec = PyEval_CallObject(factory, args);
+    Py_DECREF(args);
+    return codec;
+}
+
+static
 PyObject *build_stream_codec(PyObject *factory,
 			     PyObject *stream,
 			     const char *errors)
@@ -309,29 +345,51 @@
     return NULL;
 }
 
-PyObject *PyCodec_StreamWriter(const char *encoding,
-			       PyObject *stream,
-			       const char *errors)
+PyObject *PyCodec_StreamWriterEx(const char *encoding,
+                                 PyObject *stream,
+                                 PyObject *errors)
 {
     PyObject *codecs;
 
     codecs = _PyCodec_Lookup(encoding);
     if (codecs == NULL)
 	goto onError;
-    return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
+    return build_stream_codecex(PyTuple_GET_ITEM(codecs,3),stream,errors);
 
  onError:
     return NULL;
 }
 
+PyObject *PyCodec_StreamWriter(const char *encoding,
+			       PyObject *stream,
+			       const char *errors)
+{
+    PyObject *errorstr;
+    PyObject *res;
+
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr)
+	    return NULL;
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
+
+    res = PyCodec_StreamWriterEx(encoding,stream,errorstr);
+    Py_DECREF(errorstr);
+    return res;
+}
+
 /* Encode an object (e.g. an Unicode object) using the given encoding
    and return the resulting encoded object (usually a Python string).
 
    errors is passed to the encoder factory as argument if non-NULL. */
 
-PyObject *PyCodec_Encode(PyObject *object,
-			 const char *encoding,
-			 const char *errors)
+PyObject *PyCodec_EncodeEx(PyObject *object,
+                           const char *encoding,
+                           PyObject *errors)
 {
     PyObject *encoder = NULL;
     PyObject *args = NULL, *result;
@@ -341,11 +399,11 @@
     if (encoder == NULL)
 	goto onError;
 
-    args = args_tuple(object, errors);
+    args = args_tupleex(object, errors);
     if (args == NULL)
 	goto onError;
     
-    result = PyEval_CallObject(encoder,args);
+    result = PyEval_CallObject(encoder, args);
     if (result == NULL)
 	goto onError;
 
@@ -370,6 +428,30 @@
     return NULL;
 }
 
+PyObject *PyCodec_Encode(PyObject *object,
+			 const char *encoding,
+			 const char *errors)
+{
+    PyObject *errorstr;
+    PyObject *res;
+
+    if (errors) {
+	errorstr = PyString_FromString(errors);
+	if (!errorstr) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+    }
+    else {
+	Py_INCREF(Py_None);
+	errorstr = Py_None;
+    }
+
+    res = PyCodec_EncodeEx(object, encoding, errorstr);
+    Py_DECREF(errorstr);
+    return res;
+}
+
 /* Decode an object (usually a Python string) using the given encoding
    and return an equivalent object (e.g. an Unicode object).
 
@@ -414,6 +496,156 @@
     Py_XDECREF(decoder);
     Py_XDECREF(result);
     return NULL;
+}
+
+/* return a new reference to one of the builtin unicode encode
+   error handlers or None.
+   error can be:
+
+      * NULL, Py_None, "strict" or u"strict" for
+        codecs.raise_unicodeencode_errors
+      * "ignore" or u"ignore" for codecs.ignore_unicodeencode_errors
+      * "replace" or u"replace" for codecs.replace_unicodeencode_errors
+      * a callable which will be returned directy
+
+      everything else will raise an exception */
+PyObject *PyCodec_UnicodeEncodeHandlerForObject(PyObject *error)
+{
+    static Py_UNICODE strict[] = { 's', 't', 'r', 'i', 'c', 't' };
+    static Py_UNICODE ignore[] = { 'i', 'g', 'n', 'o', 'r', 'e' };
+    static Py_UNICODE replace[] = { 'r', 'e', 'p', 'l', 'a', 'c', 'e' };
+    static PyMethodDef strictMethod = {
+	"raise_unicodeencode_errors",
+	PyCodec_RaiseUnicodeEncodeErrors,
+	METH_VARARGS
+    };
+    static PyMethodDef ignoreMethod = {
+	"ignore_unicodeencode_errors",
+	PyCodec_IgnoreUnicodeEncodeErrors,
+	METH_VARARGS
+    };
+    static PyMethodDef replaceMethod = {
+	"replace_unicodeencode_errors",
+	PyCodec_ReplaceUnicodeEncodeErrors,
+	METH_VARARGS
+    };
+    PyMethodDef *method = NULL;
+    PyObject *res = NULL;
+
+    if (error==NULL || error==Py_None)
+	method = &strictMethod;
+    else if (PyCallable_Check(error)) {
+	res = error;
+	Py_INCREF(error);
+    }
+    else if (PyString_Check(error)) {
+	char *s = PyString_AS_STRING(error);
+	int size = PyString_GET_SIZE(error);
+	if (size==6 && !memcmp(s, "strict", size))
+	    method = &strictMethod;
+	else if (size==6 && !memcmp(s, "ignore", size))
+	    method = &ignoreMethod;
+	else if (size==7 && !memcmp(s, "replace", size))
+	    method = &replaceMethod;
+	else
+	    PyErr_SetString(PyExc_ValueError, "unknown error handler name");
+    }
+    else if (PyUnicode_Check(error)) {
+	Py_UNICODE *s = PyUnicode_AS_UNICODE(error);
+	int size = PyUnicode_GET_SIZE(error);
+	if (size==sizeof(strict) && !memcmp(s, strict, sizeof(strict)))
+	    method = &strictMethod;
+	else if (size==sizeof(ignore) && !memcmp(s, ignore, sizeof(ignore)))
+	    method = &ignoreMethod;
+	else if (size==sizeof(replace) && !memcmp(s, replace, sizeof(replace)))
+	    method = &replaceMethod;
+	else
+	    PyErr_SetString(PyExc_ValueError, "unknown error handler name");
+    }
+    else
+	PyErr_SetString(PyExc_TypeError, "wrong type for error handler");
+    if (method)
+	res = PyCFunction_New(method, NULL);
+    return res;
+}
+
+
+void PyCodec_RaiseUnicodeEncodeError(const char *encoding, Py_UNICODE c, int pos)
+{
+    PyErr_Format(PyExc_UnicodeError,
+	"encoding '%.400s' can't encode character '\\u%x' in position %d",
+	encoding, (long)c, pos);
+}
+
+
+PyObject *PyCodec_RaiseUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    char *encoding;
+    Py_UNICODE *unicode;
+    int pos;
+
+    if (PyArg_ParseTuple(args, "sui:raise_unicodeencode_errors", &encoding, &unicode, &pos))
+	PyCodec_RaiseUnicodeEncodeError(encoding, unicode[pos], pos);
+    return NULL;
+}
+
+
+PyObject *PyCodec_IgnoreUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    char *encoding;
+    Py_UNICODE *unicode;
+    int pos;
+
+    if (!PyArg_ParseTuple(args, "sui:ignore_unicodeencode_errors", &encoding, &unicode, &pos))
+	return NULL;
+
+    return PyUnicode_FromUnicode(NULL, 0);
+}
+
+
+PyObject *PyCodec_ReplaceUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    char *encoding;
+    Py_UNICODE *unicode;
+    int pos;
+    Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
+
+    if (!PyArg_ParseTuple(args, "sui:replace_unicodeencode_errors", &encoding, &unicode, &pos))
+	return NULL;
+
+    return PyUnicode_FromUnicode(&res, 1);
+}
+
+PyObject *PyCodec_XMLCharRefReplaceUnicodeEncodeErrors(PyObject *self, PyObject *args)
+{
+    static Py_UNICODE hexdigits[] = {
+	'0', '1', '2', '3', '4', '5', '6', '7',
+	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+    };
+    char *encoding;
+    Py_UNICODE *unicode;
+    int pos;
+    Py_UNICODE buf[9];
+    Py_UNICODE *p = buf;
+    Py_UNICODE c;
+
+    if (!PyArg_ParseTuple(args, "sui:xmlcharrefreplace_unicodeencode_errors", &encoding, &unicode, &pos))
+	return NULL;
+
+    c = unicode[pos];
+    *p++ = '&';
+    *p++ = '#';
+    *p++ = 'x';
+    if (c>=0x1000)
+	*p++ = hexdigits[c>>12];
+    if (c>=0x0100)
+	*p++ = hexdigits[(c>>8)&0xf];
+    if (c>=0x0010)
+	*p++ = hexdigits[(c>>4)&0xf];
+    *p++ = hexdigits[c&0xf];
+    *p++ = ';';
+
+    return PyUnicode_FromUnicode(buf, p-buf);
 }
 
 void _PyCodecRegistry_Init(void)