Index: Include/codecs.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/codecs.h,v
retrieving revision 2.3
diff -u -c -5 -r2.3 codecs.h
*** Include/codecs.h	3 Aug 2000 16:24:24 -0000	2.3
--- Include/codecs.h	24 Jul 2002 18:52:18 -0000
***************
*** 115,123 ****
         const char *encoding,
         PyObject *stream,
         const char *errors
         );
  
  #ifdef __cplusplus
  }
  #endif
! #endif /* !Py_CODECREGISTRY_H */
--- 115,158 ----
         const char *encoding,
         PyObject *stream,
         const char *errors
         );
  
+ /* Unicode encoding error handling callback registry API */
+ 
+ /* Register the error handling callback function error under the name
+    name. This function will be called by the codec when it encounters
+    an unencodable characters/undecodable bytes and doesn't know the
+    callback name, when name is specified as the error parameter
+    in the call to the encode/decode function.
+    Return 0 on success, -1 on error */
+ extern DL_IMPORT(int) PyCodec_RegisterError(
+        const char *name,
+        PyObject *error
+        );
+ 
+ /* Lookup the error handling callback function registered under the
+    name error. As a special case NULL can be passed, in which case
+    the error handling callback for strict encoding will be returned. */
+ extern DL_IMPORT(PyObject *) PyCodec_LookupError(
+        const char *name
+        );
+ 
+ /* raise exc as an exception */
+ extern DL_IMPORT(PyObject *) PyCodec_StrictErrors(PyObject *exc);
+ 
+ /* ignore the unicode error, skipping the faulty input */
+ extern DL_IMPORT(PyObject *) PyCodec_IgnoreErrors(PyObject *exc);
+ 
+ /* replace the unicode error with ? or U+FFFD */
+ extern DL_IMPORT(PyObject *) PyCodec_ReplaceErrors(PyObject *exc);
+ 
+ /* replace the unicode encode error with XML character references */
+ extern DL_IMPORT(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
+ 
+ /* replace the unicode encode error with backslash escapes (\x, \u and \U) */
+ extern DL_IMPORT(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
+ 
  #ifdef __cplusplus
  }
  #endif
! #endif
Index: Include/pyerrors.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/pyerrors.h,v
retrieving revision 2.54
diff -u -c -5 -r2.54 pyerrors.h
*** Include/pyerrors.h	29 May 2002 15:54:54 -0000	2.54
--- Include/pyerrors.h	24 Jul 2002 18:52:18 -0000
***************
*** 52,61 ****
--- 52,64 ----
  extern DL_IMPORT(PyObject *) PyExc_SystemError;
  extern DL_IMPORT(PyObject *) PyExc_SystemExit;
  extern DL_IMPORT(PyObject *) PyExc_TypeError;
  extern DL_IMPORT(PyObject *) PyExc_UnboundLocalError;
  extern DL_IMPORT(PyObject *) PyExc_UnicodeError;
+ extern DL_IMPORT(PyObject *) PyExc_UnicodeEncodeError;
+ extern DL_IMPORT(PyObject *) PyExc_UnicodeDecodeError;
+ extern DL_IMPORT(PyObject *) PyExc_UnicodeTranslateError;
  extern DL_IMPORT(PyObject *) PyExc_ValueError;
  extern DL_IMPORT(PyObject *) PyExc_ZeroDivisionError;
  #ifdef MS_WINDOWS
  extern DL_IMPORT(PyObject *) PyExc_WindowsError;
  #endif
***************
*** 107,116 ****
--- 110,183 ----
  extern DL_IMPORT(void) PyErr_SetInterrupt(void);
  
  /* Support for adding program text to SyntaxErrors */
  extern DL_IMPORT(void) PyErr_SyntaxLocation(char *, int);
  extern DL_IMPORT(PyObject *) PyErr_ProgramText(char *, int);
+ 
+ /* The following functions are used to create and modify unicode
+    exceptions from C */
+ /* create a UnicodeDecodeError object */
+ extern DL_IMPORT(PyObject *) PyUnicodeDecodeError_Create(
+ 	const char *, const char *, int, int, int, const char *);
+ 
+ /* create a UnicodeEncodeError object */
+ extern DL_IMPORT(PyObject *) PyUnicodeEncodeError_Create(
+ 	const char *, const Py_UNICODE *, int, int, int, const char *);
+ 
+ /* create a UnicodeTranslateError object */
+ extern DL_IMPORT(PyObject *) PyUnicodeTranslateError_Create(
+ 	const Py_UNICODE *, int, int, int, const char *);
+ 
+ /* get the encoding attribute */
+ extern DL_IMPORT(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeTranslateError_GetEncoding(PyObject *);
+ 
+ /* get the object attribute */
+ extern DL_IMPORT(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *);
+ 
+ /* get the value of the start attribute (the int * may not be NULL)
+    return -1 on success, 0 on failure */
+ extern DL_IMPORT(int) PyUnicodeEncodeError_GetStart(PyObject *, int *);
+ extern DL_IMPORT(int) PyUnicodeDecodeError_GetStart(PyObject *, int *);
+ extern DL_IMPORT(int) PyUnicodeTranslateError_GetStart(PyObject *, int *);
+ 
+ /* assign a new value to the start attribute
+    return -1 on success, 0 on failure */
+ extern DL_IMPORT(int) PyUnicodeEncodeError_SetStart(PyObject *, int);
+ extern DL_IMPORT(int) PyUnicodeDecodeError_SetStart(PyObject *, int);
+ extern DL_IMPORT(int) PyUnicodeTranslateError_SetStart(PyObject *, int);
+ 
+ /* get the value of the end attribute (the int *may not be NULL)
+  return -1 on success, 0 on failure */
+ extern DL_IMPORT(int) PyUnicodeEncodeError_GetEnd(PyObject *, int *);
+ extern DL_IMPORT(int) PyUnicodeDecodeError_GetEnd(PyObject *, int *);
+ extern DL_IMPORT(int) PyUnicodeTranslateError_GetEnd(PyObject *, int *);
+ 
+ /* assign a new value to the end attribute
+    return -1 on success, 0 on failure */
+ extern DL_IMPORT(int) PyUnicodeEncodeError_SetEnd(PyObject *, int);
+ extern DL_IMPORT(int) PyUnicodeDecodeError_SetEnd(PyObject *, int);
+ extern DL_IMPORT(int) PyUnicodeTranslateError_SetEnd(PyObject *, int);
+ 
+ /* get the value of the reason attribute
+    return -1 on success, 0 on failure */
+ extern DL_IMPORT(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *);
+ extern DL_IMPORT(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *);
+ 
+ /* assign a new value to the reason attribute
+    return -1 on success, 0 on failure */
+ extern DL_IMPORT(int) PyUnicodeEncodeError_SetReason(
+ 	PyObject *, const char *);
+ extern DL_IMPORT(int) PyUnicodeDecodeError_SetReason(
+ 	PyObject *, const char *);
+ extern DL_IMPORT(int) PyUnicodeTranslateError_SetReason(
+ 	PyObject *, const char *);
+ 
  
  /* These APIs aren't really part of the error implementation, but
     often needed to format error messages; the native C lib APIs are
     not available on all platforms, which is why we provide emulations
     for those platforms in Python/mysnprintf.c,
Index: Lib/codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.26
diff -u -c -5 -r1.26 codecs.py
*** Lib/codecs.py	4 Jun 2002 15:16:29 -0000	1.26
--- Lib/codecs.py	24 Jul 2002 18:52:19 -0000
***************
*** 18,28 ****
            'Failed to load the builtin codecs: %s' % why
  
  __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
             "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
             "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
!            "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE"]
  
  ### Constants
  
  #
  # Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
--- 18,31 ----
            'Failed to load the builtin codecs: %s' % why
  
  __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
             "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
             "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
!            "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
!            "strict_errors", "ignore_errors", "replace_errors",
!            "xmlcharrefreplace_errors",
!            "register_error", "lookup_error"]
  
  ### Constants
  
  #
  # Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
Index: Modules/_codecsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/_codecsmodule.c,v
retrieving revision 2.12
diff -u -c -5 -r2.12 _codecsmodule.c
*** Modules/_codecsmodule.c	30 Jun 2002 15:26:09 -0000	2.12
--- Modules/_codecsmodule.c	24 Jul 2002 18:52:21 -0000
***************
*** 662,671 ****
--- 662,725 ----
  }
  
  #endif /* MS_WINDOWS */
  #endif /* Py_USING_UNICODE */
  
+ /* --- Error handler registry --------------------------------------------- */
+ 
+ static PyObject *register_error(PyObject *self, PyObject *args)
+ {
+     const char *name;
+     PyObject *handler;
+ 
+     if (!PyArg_ParseTuple(args, "sO:register_error",
+ 			  &name, &handler))
+ 	return NULL;
+     if (PyCodec_RegisterError(name, handler))
+         return NULL;
+     Py_INCREF(Py_None);
+     return Py_None;
+ }
+ 
+ static PyObject *lookup_error(PyObject *self, PyObject *args)
+ {
+     const char *name;
+ 
+     if (!PyArg_ParseTuple(args, "s:lookup_error",
+ 			  &name))
+ 	return NULL;
+     return PyCodec_LookupError(name);
+ }
+ 
+ static PyObject *strict_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_StrictErrors(exc);
+ }
+ 
+ 
+ static PyObject *ignore_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_IgnoreErrors(exc);
+ }
+ 
+ 
+ static PyObject *replace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_ReplaceErrors(exc);
+ }
+ 
+ 
+ static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_XMLCharRefReplaceErrors(exc);
+ }
+ 
+ 
+ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_BackslashReplaceErrors(exc);
+ }
  /* --- Module API --------------------------------------------------------- */
  
  static PyMethodDef _codecs_functions[] = {
      {"register",		codecregister,			METH_VARARGS},
      {"lookup",			codeclookup, 			METH_VARARGS},
***************
*** 698,707 ****
--- 752,768 ----
  #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
      {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
      {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
  #endif
  #endif /* Py_USING_UNICODE */
+     {"register_error", 		register_error,			METH_VARARGS},
+     {"lookup_error", 		lookup_error,			METH_VARARGS},
+     {"strict_errors", 		strict_errors,			METH_O},
+     {"ignore_errors", 		ignore_errors,			METH_O},
+     {"replace_errors", 		replace_errors,			METH_O},
+     {"xmlcharrefreplace_errors",xmlcharrefreplace_errors,	METH_O},
+     {"backslashreplace_errors",	backslashreplace_errors,	METH_O},
      {NULL, NULL}		/* sentinel */
  };
  
  DL_EXPORT(void)
  init_codecs(void)
Index: Objects/stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.170
diff -u -c -5 -r2.170 stringobject.c
*** Objects/stringobject.c	17 Jul 2002 16:30:38 -0000	2.170
--- Objects/stringobject.c	24 Jul 2002 18:52:22 -0000
***************
*** 2260,2270 ****
  "S.encode([encoding[,errors]]) -> object\n\
  \n\
  Encodes S using the codec registered for encoding. encoding defaults\n\
  to the default encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a ValueError. Other possible values are 'ignore' and 'replace'.");
  
  static PyObject *
  string_encode(PyStringObject *self, PyObject *args)
  {
      char *encoding = NULL;
--- 2260,2272 ----
  "S.encode([encoding[,errors]]) -> object\n\
  \n\
  Encodes S using the codec registered for encoding. encoding defaults\n\
  to the default encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a UnicodeError. Other possible values are 'ignore', 'replace' and\n\
! 'xmlcharrefreplace' as well as any other name registered with\n\
! codecs.register_error.");
  
  static PyObject *
  string_encode(PyStringObject *self, PyObject *args)
  {
      char *encoding = NULL;
***************
*** 2279,2289 ****
  "S.decode([encoding[,errors]]) -> object\n\
  \n\
  Decodes S using the codec registered for encoding. encoding defaults\n\
  to the default encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a ValueError. Other possible values are 'ignore' and 'replace'.");
  
  static PyObject *
  string_decode(PyStringObject *self, PyObject *args)
  {
      char *encoding = NULL;
--- 2281,2292 ----
  "S.decode([encoding[,errors]]) -> object\n\
  \n\
  Decodes S using the codec registered for encoding. encoding defaults\n\
  to the default encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a UnicodeError. Other possible values are 'ignore' and 'replace' as well\n\
! as any other name registerd with codecs.register_error.");
  
  static PyObject *
  string_decode(PyStringObject *self, PyObject *args)
  {
      char *encoding = NULL;
Index: Objects/unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.156
diff -u -c -5 -r2.156 unicodeobject.c
*** Objects/unicodeobject.c	17 Jul 2002 16:30:38 -0000	2.156
--- Objects/unicodeobject.c	24 Jul 2002 18:52:24 -0000
***************
*** 487,498 ****
  			   int size,
  			   const char *encoding,
  			   const char *errors)
  {
      PyObject *buffer = NULL, *unicode;
!     
!     if (encoding == NULL) 
  	encoding = PyUnicode_GetDefaultEncoding();
  
      /* Shortcuts for common default encodings */
      if (strcmp(encoding, "utf-8") == 0)
          return PyUnicode_DecodeUTF8(s, size, errors);
--- 487,498 ----
  			   int size,
  			   const char *encoding,
  			   const char *errors)
  {
      PyObject *buffer = NULL, *unicode;
! 
!     if (encoding == NULL)
  	encoding = PyUnicode_GetDefaultEncoding();
  
      /* Shortcuts for common default encodings */
      if (strcmp(encoding, "utf-8") == 0)
          return PyUnicode_DecodeUTF8(s, size, errors);
***************
*** 639,648 ****
--- 639,734 ----
  
   onError:
      return -1;
  }
  
+ /* error handling callback helper:
+    build arguments, call the callback and check the arguments,
+    if no exception occured, copy the replacement to the output
+    and adjust various state variables.
+    return 0 on success, -1 on error
+ */
+ 
+ static
+ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
+                  const char *encoding, const char *reason,
+                  const char *input, int insize, int *startinpos, int *endinpos, PyObject **exceptionObject, const char **inptr,
+                  PyObject **output, int *outpos, Py_UNICODE **outptr)
+ {
+     static char *argparse = "O!i;decoding error handler must return (unicode, int) tuple";
+ 
+     PyObject *restuple = NULL;
+     PyObject *repunicode = NULL;
+     int outsize = PyUnicode_GET_SIZE(*output);
+     int requiredsize;
+     int newpos;
+     Py_UNICODE *repptr;
+     int repsize;
+     int res = -1;
+ 
+     if (*errorHandler == NULL) {
+ 	*errorHandler = PyCodec_LookupError(errors);
+ 	if (*errorHandler == NULL)
+ 	   goto onError;
+     }
+ 
+     if (*exceptionObject == NULL) {
+     	*exceptionObject = PyUnicodeDecodeError_Create(
+ 	    encoding, input, insize, *startinpos, *endinpos, reason);
+ 	if (*exceptionObject == NULL)
+ 	   goto onError;
+     }
+     else {
+ 	if (!PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos))
+ 	    goto onError;
+ 	if (!PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos))
+ 	    goto onError;
+ 	if (!PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
+ 	    goto onError;
+     }
+ 
+     restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+     if (restuple == NULL)
+ 	goto onError;
+     if (!PyTuple_Check(restuple)) {
+ 	PyErr_Format(PyExc_TypeError, &argparse[4]);
+ 	goto onError;
+     }
+     if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+ 	goto onError;
+     if (newpos<0)
+ 	newpos = 0;
+     else if (newpos>insize)
+ 	newpos = insize;
+ 
+     /* need more space? (at least enough for what we
+        have+the replacement+the rest of the string (starting
+        at the new input position), so we won't have to check space
+        when there are no errors in the rest of the string) */
+     repptr = PyUnicode_AS_UNICODE(repunicode);
+     repsize = PyUnicode_GET_SIZE(repunicode);
+     requiredsize = *outpos + repsize + insize-newpos;
+     if (requiredsize > outsize) {
+ 	if (requiredsize<2*outsize)
+ 	    requiredsize = 2*outsize;
+ 	if (PyUnicode_Resize(output, requiredsize))
+ 	    goto onError;
+ 	*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
+     }
+     *endinpos = newpos;
+     *inptr = input + newpos;
+     Py_UNICODE_COPY(*outptr, repptr, repsize);
+     *outptr += repsize;
+     *outpos += repsize;
+     /* we made it! */
+     res = 0;
+ 
+     onError:
+     Py_XDECREF(restuple);
+     return res;
+ }
+ 
  /* --- UTF-7 Codec -------------------------------------------------------- */
  
  /* see RFC2152 for details */
  
  static 
***************
*** 697,748 ****
  		} else { \
  				*out++ = outCh; \
  		} \
      } \
  
- static
- int utf7_decoding_error(Py_UNICODE **dest,
-                         const char *errors,
-                         const char *details) 
- {
-     if ((errors == NULL) ||
-         (strcmp(errors,"strict") == 0)) {
-         PyErr_Format(PyExc_UnicodeError,
-                      "UTF-7 decoding error: %.400s",
-                      details);
-         return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
-         return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
-         if (dest != NULL) {
-             **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-             (*dest)++;
-         }
-         return 0;
-     }
-     else {
-         PyErr_Format(PyExc_ValueError,
-                      "UTF-7 decoding error; unknown error handling code: %.400s",
-                      errors);
-         return -1;
-     }
- }
- 
  PyObject *PyUnicode_DecodeUTF7(const char *s,
  			       int size,
  			       const char *errors)
  {
      const char *e;
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const char *errmsg = "";
      int inShift = 0;
      unsigned int bitsleft = 0;
      unsigned long charsleft = 0;
! 	int surrogate = 0;
  
      unicode = _PyUnicode_New(size);
      if (!unicode)
          return NULL;
      if (size == 0)
--- 783,810 ----
  		} else { \
  				*out++ = outCh; \
  		} \
      } \
  
  PyObject *PyUnicode_DecodeUTF7(const char *s,
  			       int size,
  			       const char *errors)
  {
+     const char *starts = s;
+     int startinpos;
+     int endinpos;
+     int outpos;
      const char *e;
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const char *errmsg = "";
      int inShift = 0;
      unsigned int bitsleft = 0;
      unsigned long charsleft = 0;
!     int surrogate = 0;
!     PyObject *errorHandler = NULL;
!     PyObject *exc = NULL;
  
      unicode = _PyUnicode_New(size);
      if (!unicode)
          return NULL;
      if (size == 0)
***************
*** 750,760 ****
  
      p = unicode->str;
      e = s + size;
  
      while (s < e) {
!         Py_UNICODE ch = *s;
  
          if (inShift) {
              if ((ch == '-') || !B64CHAR(ch)) {
                  inShift = 0;
                  s++;
--- 812,824 ----
  
      p = unicode->str;
      e = s + size;
  
      while (s < e) {
!         Py_UNICODE ch;
!         restart:
!         ch = *s;
  
          if (inShift) {
              if ((ch == '-') || !B64CHAR(ch)) {
                  inShift = 0;
                  s++;
***************
*** 795,804 ****
--- 859,869 ----
                  s++;
                  /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
              }
          }
          else if ( ch == '+' ) {
+             startinpos = s-starts;
              s++;
              if (s < e && *s == '-') {
                  s++;
                  *p++ = '+';
              } else
***************
*** 816,840 ****
              *p++ = ch;
              s++;
          }
          continue;
      utf7Error:
!       if (utf7_decoding_error(&p, errors, errmsg))
!           goto onError;
      }
  
      if (inShift) {
!         if (utf7_decoding_error(&p, errors, "unterminated shift sequence"))
              goto onError;
      }
  
!     if (_PyUnicode_Resize(&unicode, p - unicode->str))
          goto onError;
  
      return (PyObject *)unicode;
  
  onError:
      Py_DECREF(unicode);
      return NULL;
  }
  
  
--- 881,923 ----
              *p++ = ch;
              s++;
          }
          continue;
      utf7Error:
!         outpos = p-PyUnicode_AS_UNICODE(unicode);
!         endinpos = s-starts;
!         if (unicode_decode_call_errorhandler(
!              errors, &errorHandler,
!              "utf7", errmsg,
!              starts, size, &startinpos, &endinpos, &exc, &s,
!              (PyObject **)&unicode, &outpos, &p))
!         goto onError;
      }
  
      if (inShift) {
!         outpos = p-PyUnicode_AS_UNICODE(unicode);
!         endinpos = size;
!         if (unicode_decode_call_errorhandler(
!              errors, &errorHandler,
!              "utf7", "unterminated shift sequence",
!              starts, size, &startinpos, &endinpos, &exc, &s,
!              (PyObject **)&unicode, &outpos, &p))
              goto onError;
+         if (s < e)
+            goto restart;
      }
  
!     if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)))
          goto onError;
  
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)unicode;
  
  onError:
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      Py_DECREF(unicode);
      return NULL;
  }
  
  
***************
*** 960,1009 ****
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
  };
  
- static
- int utf8_decoding_error(const char **source,
-                         Py_UNICODE **dest,
-                         const char *errors,
-                         const char *details) 
- {
-     if ((errors == NULL) ||
-         (strcmp(errors,"strict") == 0)) {
-         PyErr_Format(PyExc_UnicodeError,
-                      "UTF-8 decoding error: %.400s",
-                      details);
-         return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
-         (*source)++;
-         return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
-         (*source)++;
-         **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-         (*dest)++;
-         return 0;
-     }
-     else {
-         PyErr_Format(PyExc_ValueError,
-                      "UTF-8 decoding error; unknown error handling code: %.400s",
-                      errors);
-         return -1;
-     }
- }
- 
  PyObject *PyUnicode_DecodeUTF8(const char *s,
  			       int size,
  			       const char *errors)
  {
      int n;
      const char *e;
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const char *errmsg = "";
  
      /* Note: size will always be longer than the resulting Unicode
         character count */
      unicode = _PyUnicode_New(size);
      if (!unicode)
--- 1043,1067 ----
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
  };
  
  PyObject *PyUnicode_DecodeUTF8(const char *s,
  			       int size,
  			       const char *errors)
  {
+     const char *starts = s;
      int n;
+     int startinpos;
+     int endinpos;
+     int outpos;
      const char *e;
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const char *errmsg = "";
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
  
      /* Note: size will always be longer than the resulting Unicode
         character count */
      unicode = _PyUnicode_New(size);
      if (!unicode)
***************
*** 1026,1055 ****
--- 1084,1123 ----
  
          n = utf8_code_length[ch];
  
          if (s + n > e) {
  	    errmsg = "unexpected end of data";
+ 	    startinpos = s-starts;
+ 	    endinpos = size;
  	    goto utf8Error;
  	}
  
          switch (n) {
  
          case 0:
              errmsg = "unexpected code byte";
+ 	    startinpos = s-starts;
+ 	    endinpos = startinpos+1;
  	    goto utf8Error;
  
          case 1:
              errmsg = "internal error";
+ 	    startinpos = s-starts;
+ 	    endinpos = startinpos+1;
  	    goto utf8Error;
  
          case 2:
              if ((s[1] & 0xc0) != 0x80) {
                  errmsg = "invalid data";
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+2;
  		goto utf8Error;
  	    }
              ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
              if (ch < 0x80) {
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+2;
                  errmsg = "illegal encoding";
  		goto utf8Error;
  	    }
  	    else
  		*p++ = (Py_UNICODE)ch;
***************
*** 1057,1066 ****
--- 1125,1136 ----
  
          case 3:
              if ((s[1] & 0xc0) != 0x80 || 
                  (s[2] & 0xc0) != 0x80) {
                  errmsg = "invalid data";
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+3;
  		goto utf8Error;
  	    }
              ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
              if (ch < 0x0800) {
  		/* Note: UTF-8 encodings of surrogates are considered
***************
*** 1069,1078 ****
--- 1139,1150 ----
  		   XXX For wide builds (UCS-4) we should probably try
  		       to recombine the surrogates into a single code
  		       unit.
  		*/
                  errmsg = "illegal encoding";
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+3;
  		goto utf8Error;
  	    }
  	    else
  		*p++ = (Py_UNICODE)ch;
              break;
***************
*** 1080,1089 ****
--- 1152,1163 ----
          case 4:
              if ((s[1] & 0xc0) != 0x80 ||
                  (s[2] & 0xc0) != 0x80 ||
                  (s[3] & 0xc0) != 0x80) {
                  errmsg = "invalid data";
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+4;
  		goto utf8Error;
  	    }
              ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
                   ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
              /* validate and convert to UTF-16 */
***************
*** 1091,1100 ****
--- 1165,1176 ----
  					 byte encoding */
                  || (ch > 0x10ffff))   /* maximum value allowed for
  					 UTF-16 */
  	    {
                  errmsg = "illegal encoding";
+ 		startinpos = s-starts;
+ 		endinpos = startinpos+4;
  		goto utf8Error;
  	    }
  #ifdef Py_UNICODE_WIDE
  	    *p++ = (Py_UNICODE)ch;
  #else
***************
*** 1112,1138 ****
              break;
  
          default:
              /* Other sizes are only needed for UCS-4 */
              errmsg = "unsupported Unicode code range";
  	    goto utf8Error;
          }
          s += n;
  	continue;
  	
      utf8Error:
!       if (utf8_decoding_error(&s, &p, errors, errmsg))
!           goto onError;
      }
  
      /* Adjust length */
      if (_PyUnicode_Resize(&unicode, p - unicode->str))
          goto onError;
  
      return (PyObject *)unicode;
  
  onError:
      Py_DECREF(unicode);
      return NULL;
  }
  
  /* Allocation strategy:  if the string is short, convert into a stack buffer
--- 1188,1225 ----
              break;
  
          default:
              /* Other sizes are only needed for UCS-4 */
              errmsg = "unsupported Unicode code range";
+ 	    startinpos = s-starts;
+ 	    endinpos = startinpos+n;
  	    goto utf8Error;
          }
          s += n;
  	continue;
  	
      utf8Error:
!     outpos = p-PyUnicode_AS_UNICODE(unicode);
!     if (unicode_decode_call_errorhandler(
! 	     errors, &errorHandler,
! 	     "utf8", errmsg,
! 	     starts, size, &startinpos, &endinpos, &exc, &s,
! 	     (PyObject **)&unicode, &outpos, &p))
! 	goto onError;
      }
  
      /* Adjust length */
      if (_PyUnicode_Resize(&unicode, p - unicode->str))
          goto onError;
  
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)unicode;
  
  onError:
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      Py_DECREF(unicode);
      return NULL;
  }
  
  /* Allocation strategy:  if the string is short, convert into a stack buffer
***************
*** 1246,1292 ****
  				NULL);
  }
  
  /* --- UTF-16 Codec ------------------------------------------------------- */
  
- static
- int utf16_decoding_error(Py_UNICODE **dest,
- 			 const char *errors,
- 			 const char *details) 
- {
-     if ((errors == NULL) ||
-         (strcmp(errors,"strict") == 0)) {
-         PyErr_Format(PyExc_UnicodeError,
-                      "UTF-16 decoding error: %.400s",
-                      details);
-         return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
-         return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
- 	if (dest) {
- 	    **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
- 	    (*dest)++;
- 	}
-         return 0;
-     }
-     else {
-         PyErr_Format(PyExc_ValueError,
-                      "UTF-16 decoding error; "
- 		     "unknown error handling code: %.400s",
-                      errors);
-         return -1;
-     }
- }
- 
  PyObject *
  PyUnicode_DecodeUTF16(const char *s,
  		      int size,
  		      const char *errors,
  		      int *byteorder)
  {
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const unsigned char *q, *e;
      int bo = 0;       /* assume native ordering by default */
      const char *errmsg = "";
--- 1333,1352 ----
  				NULL);
  }
  
  /* --- UTF-16 Codec ------------------------------------------------------- */
  
  PyObject *
  PyUnicode_DecodeUTF16(const char *s,
  		      int size,
  		      const char *errors,
  		      int *byteorder)
  {
+     const char *starts = s;
+     int startinpos;
+     int endinpos;
+     int outpos;
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
      const unsigned char *q, *e;
      int bo = 0;       /* assume native ordering by default */
      const char *errmsg = "";
***************
*** 1294,1310 ****
  #ifdef BYTEORDER_IS_LITTLE_ENDIAN
      int ihi = 1, ilo = 0;
  #else
      int ihi = 0, ilo = 1;
  #endif
! 
!     /* size should be an even number */
!     if (size & 1) {
!         if (utf16_decoding_error(NULL, errors, "truncated data"))
!             return NULL;
!         --size;  /* else ignore the oddball byte */
!     }
  
      /* Note: size will always be longer than the resulting Unicode
         character count */
      unicode = _PyUnicode_New(size);
      if (!unicode)
--- 1354,1365 ----
  #ifdef BYTEORDER_IS_LITTLE_ENDIAN
      int ihi = 1, ilo = 0;
  #else
      int ihi = 0, ilo = 1;
  #endif
!     PyObject *errorHandler = NULL;
!     PyObject *exc = NULL;
  
      /* Note: size will always be longer than the resulting Unicode
         character count */
      unicode = _PyUnicode_New(size);
      if (!unicode)
***************
*** 1357,1377 ****
          ihi = 0;
          ilo = 1;
      }
  
      while (q < e) {
! 	Py_UNICODE ch = (q[ihi] << 8) | q[ilo];
  	q += 2;
  
  	if (ch < 0xD800 || ch > 0xDFFF) {
  	    *p++ = ch;
  	    continue;
  	}
  
  	/* UTF-16 code pair: */
  	if (q >= e) {
  	    errmsg = "unexpected end of data";
  	    goto utf16Error;
  	}
  	if (0xD800 <= ch && ch <= 0xDBFF) {
  	    Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
  	    q += 2;
--- 1412,1445 ----
          ihi = 0;
          ilo = 1;
      }
  
      while (q < e) {
! 	Py_UNICODE ch;
! 	/* remaing bytes at the end? (size should be even) */
! 	if (e-q<2) {
! 	    errmsg = "truncated data";
! 	    startinpos = ((const char *)q)-starts;
! 	    endinpos = ((const char *)e)-starts;
! 	    goto utf16Error;
! 	    /* The remaining input chars are ignored if the callback
! 	       chooses to skip the input */
! 	}
! 	ch = (q[ihi] << 8) | q[ilo];
! 
  	q += 2;
  
  	if (ch < 0xD800 || ch > 0xDFFF) {
  	    *p++ = ch;
  	    continue;
  	}
  
  	/* UTF-16 code pair: */
  	if (q >= e) {
  	    errmsg = "unexpected end of data";
+ 	    startinpos = (((const char *)q)-2)-starts;
+ 	    endinpos = ((const char *)e)-starts;
  	    goto utf16Error;
  	}
  	if (0xD800 <= ch && ch <= 0xDBFF) {
  	    Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
  	    q += 2;
***************
*** 1384,1416 ****
  #endif
  		continue;
  	    }
  	    else {
                  errmsg = "illegal UTF-16 surrogate";
  		goto utf16Error;
  	    }
  
  	}
  	errmsg = "illegal encoding";
  	/* Fall through to report the error */
  
      utf16Error:
! 	if (utf16_decoding_error(&p, errors, errmsg))
  	    goto onError;
      }
  
      if (byteorder)
          *byteorder = bo;
  
      /* Adjust length */
      if (_PyUnicode_Resize(&unicode, p - unicode->str))
          goto onError;
  
      return (PyObject *)unicode;
  
  onError:
      Py_DECREF(unicode);
      return NULL;
  }
  
  PyObject *
  PyUnicode_EncodeUTF16(const Py_UNICODE *s,
--- 1452,1497 ----
  #endif
  		continue;
  	    }
  	    else {
                  errmsg = "illegal UTF-16 surrogate";
+ 		startinpos = (((const char *)q)-4)-starts;
+ 		endinpos = startinpos+2;
  		goto utf16Error;
  	    }
  
  	}
  	errmsg = "illegal encoding";
+ 	startinpos = (((const char *)q)-2)-starts;
+ 	endinpos = startinpos+2;
  	/* Fall through to report the error */
  
      utf16Error:
! 	outpos = p-PyUnicode_AS_UNICODE(unicode);
! 	if (unicode_decode_call_errorhandler(
! 	         errors, &errorHandler,
! 	         "utf16", errmsg,
! 	         starts, size, &startinpos, &endinpos, &exc, (const char **)&q,
! 	         (PyObject **)&unicode, &outpos, &p))
  	    goto onError;
      }
  
      if (byteorder)
          *byteorder = bo;
  
      /* Adjust length */
      if (_PyUnicode_Resize(&unicode, p - unicode->str))
          goto onError;
  
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)unicode;
  
  onError:
      Py_DECREF(unicode);
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return NULL;
  }
  
  PyObject *
  PyUnicode_EncodeUTF16(const Py_UNICODE *s,
***************
*** 1487,1560 ****
  				 0);
  }
  
  /* --- Unicode Escape Codec ----------------------------------------------- */
  
- static
- int unicodeescape_decoding_error(Py_UNICODE **x,
-                                  const char *errors,
-                                  const char *details) 
- {
-     if ((errors == NULL) ||
-         (strcmp(errors,"strict") == 0)) {
-         PyErr_Format(PyExc_UnicodeError,
-                      "Unicode-Escape decoding error: %.400s",
-                      details);
-         return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
-         return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
-         **x = Py_UNICODE_REPLACEMENT_CHARACTER;
- 	(*x)++;
-         return 0;
-     }
-     else {
-         PyErr_Format(PyExc_ValueError,
-                      "Unicode-Escape decoding error; "
-                      "unknown error handling code: %.400s",
-                      errors);
-         return -1;
-     }
- }
- 
  static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
  
  PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
  					int size,
  					const char *errors)
  {
      PyUnicodeObject *v;
!     Py_UNICODE *p, *buf;
      const char *end;
      char* message;
      Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
  
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
!        length after conversion to the true value. */
      v = _PyUnicode_New(size);
      if (v == NULL)
          goto onError;
      if (size == 0)
          return (PyObject *)v;
  
!     p = buf = PyUnicode_AS_UNICODE(v);
      end = s + size;
  
      while (s < end) {
          unsigned char c;
          Py_UNICODE x;
!         int i, digits;
  
          /* Non-escape characters are interpreted as Unicode ordinals */
          if (*s != '\\') {
              *p++ = (unsigned char) *s++;
              continue;
          }
! 
          /* \ - Escapes */
          s++;
          switch (*s++) {
  
          /* \x escapes */
--- 1568,1621 ----
  				 0);
  }
  
  /* --- Unicode Escape Codec ----------------------------------------------- */
  
  static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
  
  PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
  					int size,
  					const char *errors)
  {
+     const char *starts = s;
+     int startinpos;
+     int endinpos;
+     int outpos;
+     int i;
      PyUnicodeObject *v;
!     Py_UNICODE *p;
      const char *end;
      char* message;
      Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
  
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
!        length after conversion to the true value.
!        (but if the error callback returns a long replacement string
!        we'll have to allocate more space) */
      v = _PyUnicode_New(size);
      if (v == NULL)
          goto onError;
      if (size == 0)
          return (PyObject *)v;
  
!     p = PyUnicode_AS_UNICODE(v);
      end = s + size;
  
      while (s < end) {
          unsigned char c;
          Py_UNICODE x;
!         int digits;
  
          /* Non-escape characters are interpreted as Unicode ordinals */
          if (*s != '\\') {
              *p++ = (unsigned char) *s++;
              continue;
          }
!         startinpos = s-starts;
          /* \ - Escapes */
          s++;
          switch (*s++) {
  
          /* \x escapes */
***************
*** 1599,1616 ****
          case 'U':
              digits = 8;
              message = "truncated \\UXXXXXXXX escape";
          hexescape:
              chr = 0;
!             for (i = 0; i < digits; i++) {
                  c = (unsigned char) s[i];
                  if (!isxdigit(c)) {
!                     if (unicodeescape_decoding_error(&p, errors, message))
                          goto onError;
!                     chr = 0xffffffff;
!                     i++;
!                     break;
                  }
                  chr = (chr<<4) & ~0xF;
                  if (c >= '0' && c <= '9')
                      chr += c - '0';
                  else if (c >= 'a' && c <= 'f')
--- 1660,1691 ----
          case 'U':
              digits = 8;
              message = "truncated \\UXXXXXXXX escape";
          hexescape:
              chr = 0;
!             outpos = p-PyUnicode_AS_UNICODE(v);
!             if (s+digits>end) {
!                 endinpos = size;
!                 if (unicode_decode_call_errorhandler(
!                     errors, &errorHandler,
!                     "unicodeescape", "end of string in escape sequence",
!                     starts, size, &startinpos, &endinpos, &exc, &s,
!                     (PyObject **)&v, &outpos, &p))
!                     goto onError;
!                 goto nextByte;
!             }
!             for (i = 0; i < digits; ++i) {
                  c = (unsigned char) s[i];
                  if (!isxdigit(c)) {
!                     endinpos = (s+i+1)-starts;
!                     if (unicode_decode_call_errorhandler(
!                         errors, &errorHandler,
!                         "unicodeescape", message,
!                         starts, size, &startinpos, &endinpos, &exc, &s,
!                         (PyObject **)&v, &outpos, &p))
                          goto onError;
!                     goto nextByte;
                  }
                  chr = (chr<<4) & ~0xF;
                  if (c >= '0' && c <= '9')
                      chr += c - '0';
                  else if (c >= 'a' && c <= 'f')
***************
*** 1618,1630 ****
                  else
                      chr += 10 + c - 'A';
              }
              s += i;
              if (chr == 0xffffffff)
!                     /* _decoding_error will have already written into the
!                        target buffer. */
!                     break;
          store:
              /* when we get here, chr is a 32-bit unicode character */
              if (chr <= 0xffff)
                  /* UCS-2 character */
                  *p++ = (Py_UNICODE) chr;
--- 1693,1705 ----
                  else
                      chr += 10 + c - 'A';
              }
              s += i;
              if (chr == 0xffffffff)
!                 /* _decoding_error will have already written into the
!                    target buffer. */
!                 break;
          store:
              /* when we get here, chr is a 32-bit unicode character */
              if (chr <= 0xffff)
                  /* UCS-2 character */
                  *p++ = (Py_UNICODE) chr;
***************
*** 1637,1650 ****
                  chr -= 0x10000L;
                  *p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
                  *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
  #endif
              } else {
!                 if (unicodeescape_decoding_error(
!                     &p, errors,
!                     "illegal Unicode character")
!                     )
                      goto onError;
              }
              break;
  
          /* \N{name} */
--- 1712,1728 ----
                  chr -= 0x10000L;
                  *p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
                  *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
  #endif
              } else {
!                 endinpos = s-starts;
!                 outpos = p-PyUnicode_AS_UNICODE(v);
!                 if (unicode_decode_call_errorhandler(
!                     errors, &errorHandler,
!                     "unicodeescape", "illegal Unicode character",
!                     starts, size, &startinpos, &endinpos, &exc, &s,
!                     (PyObject **)&v, &outpos, &p))
                      goto onError;
              }
              break;
  
          /* \N{name} */
***************
*** 1676,1714 ****
                      s++;
                      if (ucnhash_CAPI->getcode(start, s-start-1, &chr))
                          goto store;
                  }
              }
!             if (unicodeescape_decoding_error(&p, errors, message))
                  goto onError;
              break;
  
          default:
              if (s > end) {
!                 if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
                      goto onError;
              }
              else {
                  *p++ = '\\';
                  *p++ = (unsigned char)s[-1];
              }
              break;
          }
      }
!     if (_PyUnicode_Resize(&v, (int)(p - buf)))
!                 goto onError;
      return (PyObject *)v;
  
  ucnhashError:
      PyErr_SetString(
          PyExc_UnicodeError,
          "\\N escapes not supported (can't load unicodedata module)"
          );
      return NULL;
  
  onError:
      Py_XDECREF(v);
      return NULL;
  }
  
  /* Return a Unicode-Escape string version of the Unicode object.
  
--- 1754,1813 ----
                      s++;
                      if (ucnhash_CAPI->getcode(start, s-start-1, &chr))
                          goto store;
                  }
              }
!             /* s--; */
!             endinpos = s-starts;
!             outpos = p-PyUnicode_AS_UNICODE(v);
!             if (unicode_decode_call_errorhandler(
!                 errors, &errorHandler,
!                 "unicodeescape", message,
!                 starts, size, &startinpos, &endinpos, &exc, &s,
!                 (PyObject **)&v, &outpos, &p))
                  goto onError;
              break;
  
          default:
              if (s > end) {
!                 message = "\\ at end of string";
!                 s--;
!                 endinpos = s-starts;
!                 outpos = p-PyUnicode_AS_UNICODE(v);
!                 if (unicode_decode_call_errorhandler(
!                     errors, &errorHandler,
!                     "unicodeescape", message,
!                     starts, size, &startinpos, &endinpos, &exc, &s,
!                     (PyObject **)&v, &outpos, &p))
                      goto onError;
              }
              else {
                  *p++ = '\\';
                  *p++ = (unsigned char)s[-1];
              }
              break;
          }
+         nextByte:
+         ;
      }
!     if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
!         goto onError;
      return (PyObject *)v;
  
  ucnhashError:
      PyErr_SetString(
          PyExc_UnicodeError,
          "\\N escapes not supported (can't load unicodedata module)"
          );
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return NULL;
  
  onError:
      Py_XDECREF(v);
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return NULL;
  }
  
  /* Return a Unicode-Escape string version of the Unicode object.
  
***************
*** 1868,1891 ****
  
  PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
  					   int size,
  					   const char *errors)
  {
      PyUnicodeObject *v;
!     Py_UNICODE *p, *buf;
      const char *end;
      const char *bs;
      
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
!        length after conversion to the true value. */
      v = _PyUnicode_New(size);
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
!     p = buf = PyUnicode_AS_UNICODE(v);
      end = s + size;
      while (s < end) {
  	unsigned char c;
  	Py_UCS4 x;
  	int i;
--- 1967,1997 ----
  
  PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
  					   int size,
  					   const char *errors)
  {
+     const char *starts = s;
+     int startinpos;
+     int endinpos;
+     int outpos;
      PyUnicodeObject *v;
!     Py_UNICODE *p;
      const char *end;
      const char *bs;
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
      
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
!        length after conversion to the true value. (But decoding error
!        handler might have to resize the string) */
      v = _PyUnicode_New(size);
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
!     p = PyUnicode_AS_UNICODE(v);
      end = s + size;
      while (s < end) {
  	unsigned char c;
  	Py_UCS4 x;
  	int i;
***************
*** 1893,1902 ****
--- 1999,2009 ----
  	/* Non-escape characters are interpreted as Unicode ordinals */
  	if (*s != '\\') {
  	    *p++ = (unsigned char)*s++;
  	    continue;
  	}
+ 	startinpos = s-starts;
  
  	/* \u-escapes are only interpreted iff the number of leading
  	   backslashes if odd */
  	bs = s;
  	for (;s < end;) {
***************
*** 1911,1948 ****
  	}
  	p--;
  	s++;
  
  	/* \uXXXX with 4 hex digits */
! 	for (x = 0, i = 0; i < 4; i++) {
! 	    c = (unsigned char)s[i];
  	    if (!isxdigit(c)) {
! 		if (unicodeescape_decoding_error(&p, errors,
! 						 "truncated \\uXXXX"))
  		    goto onError;
! 		x = 0xffffffff;
! 		i++;
! 		break;
  	    }
  	    x = (x<<4) & ~0xF;
  	    if (c >= '0' && c <= '9')
  		x += c - '0';
  	    else if (c >= 'a' && c <= 'f')
  		x += 10 + c - 'a';
  	    else
  		x += 10 + c - 'A';
  	}
! 	s += i;
! 	if (x != 0xffffffff)
! 		*p++ = x;
      }
!     if (_PyUnicode_Resize(&v, (int)(p - buf)))
  	goto onError;
      return (PyObject *)v;
      
   onError:
      Py_XDECREF(v);
      return NULL;
  }
  
  PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
  					   int size)
--- 2018,2062 ----
  	}
  	p--;
  	s++;
  
  	/* \uXXXX with 4 hex digits */
! 	outpos = p-PyUnicode_AS_UNICODE(v);
! 	for (x = 0, i = 0; i < 4; ++i, ++s) {
! 	    c = (unsigned char)*s;
  	    if (!isxdigit(c)) {
! 		endinpos = s-starts;
! 		if (unicode_decode_call_errorhandler(
! 		    errors, &errorHandler,
! 		    "rawunicodeescape", "truncated \\uXXXX",
! 		    starts, size, &startinpos, &endinpos, &exc, &s,
! 		    (PyObject **)&v, &outpos, &p))
  		    goto onError;
! 		goto nextByte;
  	    }
  	    x = (x<<4) & ~0xF;
  	    if (c >= '0' && c <= '9')
  		x += c - '0';
  	    else if (c >= 'a' && c <= 'f')
  		x += 10 + c - 'a';
  	    else
  		x += 10 + c - 'A';
  	}
! 	*p++ = x;
! 	nextByte:
! 	;
      }
!     if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	goto onError;
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)v;
      
   onError:
      Py_XDECREF(v);
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return NULL;
  }
  
  PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
  					   int size)
***************
*** 2018,2092 ****
   onError:
      Py_XDECREF(v);
      return NULL;
  }
  
! static
! int latin1_encoding_error(const Py_UNICODE **source,
! 			  char **dest,
! 			  const char *errors,
! 			  const char *details) 
! {
!     if ((errors == NULL) ||
! 	(strcmp(errors,"strict") == 0)) {
! 	PyErr_Format(PyExc_UnicodeError,
! 		     "Latin-1 encoding error: %.400s",
! 		     details);
! 	return -1;
      }
!     else if (strcmp(errors,"ignore") == 0) {
! 	return 0;
      }
!     else if (strcmp(errors,"replace") == 0) {
! 	**dest = '?';
! 	(*dest)++;
! 	return 0;
      }
!     else {
! 	PyErr_Format(PyExc_ValueError,
! 		     "Latin-1 encoding error; "
! 		     "unknown error handling code: %.400s",
! 		     errors);
! 	return -1;
      }
  }
  
! PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
  				 int size,
! 				 const char *errors)
  {
!     PyObject *repr;
!     char *s, *start;
! 
!     repr = PyString_FromStringAndSize(NULL, size);
!     if (repr == NULL)
!         return NULL;
      if (size == 0)
! 	return repr;
! 
!     s = PyString_AS_STRING(repr);
!     start = s;
!     while (size-- > 0) {
!         Py_UNICODE ch = *p++;
! 	if (ch >= 256) {
! 	    if (latin1_encoding_error(&p, &s, errors, 
! 				      "ordinal not in range(256)"))
! 		goto onError;
  	}
- 	else
-             *s++ = (char)ch;
      }
!     /* Resize if error handling skipped some characters */
!     if (s - start < PyString_GET_SIZE(repr))
! 	_PyString_Resize(&repr, s - start);
!     return repr;
  
!  onError:
!     Py_DECREF(repr);
      return NULL;
  }
  
  PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
  {
      if (!PyUnicode_Check(unicode)) {
  	PyErr_BadArgument();
  	return NULL;
--- 2132,2406 ----
   onError:
      Py_XDECREF(v);
      return NULL;
  }
  
! /* create or adjust a UnicodeEncodeError */
! static void make_encode_exception(PyObject **exceptionObject,
!     const char *encoding,
!     const Py_UNICODE *unicode, int size,
!     int startpos, int endpos,
!     const char *reason)
! {
!     if (*exceptionObject == NULL) {
!     	*exceptionObject = PyUnicodeEncodeError_Create(
! 	    encoding, unicode, size, startpos, endpos, reason);
      }
!     else {
! 	if (!PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
! 	    goto onError;
! 	if (!PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
! 	    goto onError;
! 	if (!PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
! 	    goto onError;
! 	return;
! 	onError:
! 	Py_DECREF(*exceptionObject);
! 	*exceptionObject = NULL;
      }
! }
! 
! /* raises a UnicodeEncodeError */
! static void raise_encode_exception(PyObject **exceptionObject,
!     const char *encoding,
!     const Py_UNICODE *unicode, int size,
!     int startpos, int endpos,
!     const char *reason)
! {
!     make_encode_exception(exceptionObject,
! 	encoding, unicode, size, startpos, endpos, reason);
!     if (*exceptionObject != NULL)
! 	PyCodec_StrictErrors(*exceptionObject);
! }
! 
! /* error handling callback helper:
!    build arguments, call the callback and check the arguments,
!    put the result into newpos and return the replacement string, which
!    has to be freed by the caller */
! static PyObject *unicode_encode_call_errorhandler(const char *errors,
!     PyObject **errorHandler,
!     const char *encoding, const char *reason,
!     const Py_UNICODE *unicode, int size, PyObject **exceptionObject,
!     int startpos, int endpos,
!     int *newpos)
! {
!     static char *argparse = "O!i;encoding error handler must return (unicode, int) tuple";
! 
!     PyObject *restuple;
!     PyObject *resunicode;
! 
!     if (*errorHandler == NULL) {
! 	*errorHandler = PyCodec_LookupError(errors);
!         if (*errorHandler == NULL)
! 	    return NULL;
!     }
! 
!     make_encode_exception(exceptionObject,
! 	encoding, unicode, size, startpos, endpos, reason);
!     if (*exceptionObject == NULL)
! 	return NULL;
! 
!     restuple = PyObject_CallFunctionObjArgs(
! 	*errorHandler, *exceptionObject, NULL);
!     if (restuple == NULL)
! 	return NULL;
!     if (!PyTuple_Check(restuple)) {
! 	PyErr_Format(PyExc_TypeError, &argparse[4]);
! 	Py_DECREF(restuple);
! 	return NULL;
      }
!     if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
! 	&resunicode, newpos)) {
! 	Py_DECREF(restuple);
! 	return NULL;
      }
+     if (*newpos<0)
+ 	*newpos = 0;
+     else if (*newpos>size)
+ 	*newpos = size;
+     Py_INCREF(resunicode);
+     Py_DECREF(restuple);
+     return resunicode;
  }
  
! static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
  				 int size,
! 				 const char *errors,
! 				 int limit)
  {
!     /* output object */
!     PyObject *res;
!     /* pointers to the beginning and end+1 of input */
!     const Py_UNICODE *startp = p;
!     const Py_UNICODE *endp = p + size;
!     /* pointer to the beginning of the unencodable characters */
!     /* const Py_UNICODE *badp = NULL; */
!     /* pointer into the output */
!     char *str;
!     /* current output position */
!     int respos = 0;
!     int ressize;
!     char *encoding = (limit == 256) ? "latin-1" : "ascii";
!     char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
!     PyObject *errorHandler = NULL;
!     PyObject *exc = NULL;
!     /* the following variable is used for caching string comparisons
!      * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
!     int known_errorHandler = -1;
! 
!     /* allocate enough for a simple encoding without
!        replacements, if we need more, we'll resize */
!     res = PyString_FromStringAndSize(NULL, size);
!     if (res == NULL)
!         goto onError;
      if (size == 0)
! 	return res;
!     str = PyString_AS_STRING(res);
!     ressize = size;
! 
!     while (p<endp) {
! 	Py_UNICODE c = *p;
! 
! 	/* can we encode this? */
! 	if (c<limit) {
! 	    /* no overflow check, because we know that the space is enough */
! 	    *str++ = (char)c;
! 	    ++p;
! 	}
! 	else {
! 	    int unicodepos = p-startp;
! 	    int requiredsize;
! 	    PyObject *repunicode;
! 	    int repsize;
! 	    int newpos;
! 	    int respos;
! 	    Py_UNICODE *uni2;
! 	    /* startpos for collecting unencodable chars */
! 	    const Py_UNICODE *collstart = p;
! 	    const Py_UNICODE *collend = p;
! 	    /* find all unecodable characters */
! 	    while ((collend < endp) && ((*collend)>=limit))
! 		++collend;
! 	    /* cache callback name lookup (if not done yet, i.e. it's the first error) */
! 	    if (known_errorHandler==-1) {
! 		if ((errors==NULL) || (!strcmp(errors, "strict")))
! 		    known_errorHandler = 1;
! 		else if (!strcmp(errors, "replace"))
! 		    known_errorHandler = 2;
! 		else if (!strcmp(errors, "ignore"))
! 		    known_errorHandler = 3;
! 		else if (!strcmp(errors, "xmlcharrefreplace"))
! 		    known_errorHandler = 4;
! 		else
! 		    known_errorHandler = 0;
! 	    }
! 	    switch (known_errorHandler) {
! 		case 1: /* strict */
! 		    raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
! 		    goto onError;
! 		case 2: /* replace */
! 		    while (collstart++<collend)
! 			*str++ = '?'; /* fall through */
! 		case 3: /* ignore */
! 		    p = collend;
! 		    break;
! 		case 4: /* xmlcharrefreplace */
! 		    respos = str-PyString_AS_STRING(res);
! 		    /* determine replacement size (temporarily (mis)uses p) */
! 		    for (p = collstart, repsize = 0; p < collend; ++p) {
! 			if (*p<10)
! 			    repsize += 2+1+1;
! 			else if (*p<100)
! 			    repsize += 2+2+1;
! 			else if (*p<1000)
! 			    repsize += 2+3+1;
! 			else if (*p<10000)
! 			    repsize += 2+4+1;
! 			else if (*p<100000)
! 			    repsize += 2+5+1;
! 			else if (*p<1000000)
! 			    repsize += 2+6+1;
! 			else
! 			    repsize += 2+7+1;
! 		    }
! 		    requiredsize = respos+repsize+(endp-collend);
! 		    if (requiredsize > ressize) {
! 			if (requiredsize<2*ressize)
! 			    requiredsize = 2*ressize;
! 			if (_PyString_Resize(&res, requiredsize))
! 			    goto onError;
! 			str = PyString_AS_STRING(res) + respos;
! 			ressize = requiredsize;
! 		    }
! 		    /* generate replacement (temporarily (mis)uses p) */
! 		    for (p = collstart; p < collend; ++p) {
! 			str += sprintf(str, "&#%d;", (int)*p);
! 		    }
! 		    p = collend;
! 		    break;
! 		default:
! 		    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
! 			encoding, reason, startp, size, &exc,
! 			collstart-startp, collend-startp, &newpos);
! 		    if (repunicode == NULL)
! 			goto onError;
! 		    /* need more space? (at least enough for what we
! 		       have+the replacement+the rest of the string, so
! 		       we won't have to check space for encodable characters) */
! 		    respos = str-PyString_AS_STRING(res);
! 		    repsize = PyUnicode_GET_SIZE(repunicode);
! 		    requiredsize = respos+repsize+(endp-collend);
! 		    if (requiredsize > ressize) {
! 			if (requiredsize<2*ressize)
! 			    requiredsize = 2*ressize;
! 			if (_PyString_Resize(&res, requiredsize)) {
! 			    Py_DECREF(repunicode);
! 			    goto onError;
! 			}
! 			str = PyString_AS_STRING(res) + respos;
! 			ressize = requiredsize;
! 		    }
! 		    /* check if there is anything unencodable in the replacement
! 		       and copy it to the output */
! 		    for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
! 			c = *uni2;
! 			if (c >= limit) {
! 			    raise_encode_exception(&exc, encoding, startp, size,
! 				unicodepos, unicodepos+1, reason);
! 			    Py_DECREF(repunicode);
! 			    goto onError;
! 			}
! 			*str = (char)c;
! 		    }
! 		    p = startp + newpos;
! 		    Py_DECREF(repunicode);
! 	    }
  	}
      }
!     /* Resize if we allocated to much */
!     respos = str-PyString_AS_STRING(res);
!     if (respos<ressize)
!        /* If this falls res will be NULL */
! 	_PyString_Resize(&res, respos);
!     Py_XDECREF(errorHandler);
!     Py_XDECREF(exc);
!     return res;
  
!     onError:
!     Py_XDECREF(res);
!     Py_XDECREF(errorHandler);
!     Py_XDECREF(exc);
      return NULL;
  }
  
+ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+ 				 int size,
+ 				 const char *errors)
+ {
+     return unicode_encode_ucs1(p, size, errors, 256);
+ }
+ 
  PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
  {
      if (!PyUnicode_Check(unicode)) {
  	PyErr_BadArgument();
  	return NULL;
***************
*** 2096,2141 ****
  				  NULL);
  }
  
  /* --- 7-bit ASCII Codec -------------------------------------------------- */
  
- static
- int ascii_decoding_error(const char **source,
- 			 Py_UNICODE **dest,
- 			 const char *errors,
- 			 const char *details) 
- {
-     if ((errors == NULL) ||
- 	(strcmp(errors,"strict") == 0)) {
- 	PyErr_Format(PyExc_UnicodeError,
- 		     "ASCII decoding error: %.400s",
- 		     details);
- 	return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
- 	return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
- 	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
- 	(*dest)++;
- 	return 0;
-     }
-     else {
- 	PyErr_Format(PyExc_ValueError,
- 		     "ASCII decoding error; "
- 		     "unknown error handling code: %.400s",
- 		     errors);
- 	return -1;
-     }
- }
- 
  PyObject *PyUnicode_DecodeASCII(const char *s,
  				int size,
  				const char *errors)
  {
      PyUnicodeObject *v;
      Py_UNICODE *p;
      
      /* ASCII is equivalent to the first 128 ordinals in Unicode. */
      if (size == 1 && *(unsigned char*)s < 128) {
  	Py_UNICODE r = *(unsigned char*)s;
  	return PyUnicode_FromUnicode(&r, 1);
--- 2410,2432 ----
  				  NULL);
  }
  
  /* --- 7-bit ASCII Codec -------------------------------------------------- */
  
  PyObject *PyUnicode_DecodeASCII(const char *s,
  				int size,
  				const char *errors)
  {
+     const char *starts = s;
      PyUnicodeObject *v;
      Py_UNICODE *p;
+     int startinpos;
+     int endinpos;
+     int outpos;
+     const char *e;
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
      
      /* ASCII is equivalent to the first 128 ordinals in Unicode. */
      if (size == 1 && *(unsigned char*)s < 128) {
  	Py_UNICODE r = *(unsigned char*)s;
  	return PyUnicode_FromUnicode(&r, 1);
***************
*** 2145,2237 ****
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
      p = PyUnicode_AS_UNICODE(v);
!     while (size-- > 0) {
! 	register unsigned char c;
! 
! 	c = (unsigned char)*s++;
! 	if (c < 128)
  	    *p++ = c;
! 	else if (ascii_decoding_error(&s, &p, errors, 
! 				      "ordinal not in range(128)"))
  		goto onError;
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
  	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	    goto onError;
      return (PyObject *)v;
      
   onError:
      Py_XDECREF(v);
      return NULL;
  }
  
- static
- int ascii_encoding_error(const Py_UNICODE **source,
- 			 char **dest,
- 			 const char *errors,
- 			 const char *details) 
- {
-     if ((errors == NULL) ||
- 	(strcmp(errors,"strict") == 0)) {
- 	PyErr_Format(PyExc_UnicodeError,
- 		     "ASCII encoding error: %.400s",
- 		     details);
- 	return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
- 	return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
- 	**dest = '?';
- 	(*dest)++;
- 	return 0;
-     }
-     else {
- 	PyErr_Format(PyExc_ValueError,
- 		     "ASCII encoding error; "
- 		     "unknown error handling code: %.400s",
- 		     errors);
- 	return -1;
-     }
- }
- 
  PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
  				int size,
  				const char *errors)
  {
!     PyObject *repr;
!     char *s, *start;
! 
!     repr = PyString_FromStringAndSize(NULL, size);
!     if (repr == NULL)
!         return NULL;
!     if (size == 0)
! 	return repr;
! 
!     s = PyString_AS_STRING(repr);
!     start = s;
!     while (size-- > 0) {
!         Py_UNICODE ch = *p++;
! 	if (ch >= 128) {
! 	    if (ascii_encoding_error(&p, &s, errors, 
! 				      "ordinal not in range(128)"))
! 		goto onError;
! 	}
! 	else
!             *s++ = (char)ch;
!     }
!     /* Resize if error handling skipped some characters */
!     if (s - start < PyString_GET_SIZE(repr))
! 	_PyString_Resize(&repr, s - start);
!     return repr;
! 
!  onError:
!     Py_DECREF(repr);
!     return NULL;
  }
  
  PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
  {
      if (!PyUnicode_Check(unicode)) {
--- 2436,2483 ----
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
      p = PyUnicode_AS_UNICODE(v);
!     e = s + size;
!     while (s < e) {
! 	register unsigned char c = (unsigned char)*s;
! 	if (c < 128) {
  	    *p++ = c;
! 	    ++s;
! 	}
! 	else {
! 	    startinpos = s-starts;
! 	    endinpos = startinpos + 1;
! 	    outpos = p-PyUnicode_AS_UNICODE(v);
! 	    if (unicode_decode_call_errorhandler(
! 		 errors, &errorHandler,
! 		 "ascii", "ordinal not in range(128)",
! 		 starts, size, &startinpos, &endinpos, &exc, &s,
! 		 (PyObject **)&v, &outpos, &p))
  		goto onError;
+ 	}
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
  	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	    goto onError;
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)v;
      
   onError:
      Py_XDECREF(v);
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return NULL;
  }
  
  PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
  				int size,
  				const char *errors)
  {
!     return unicode_encode_ucs1(p, size, errors, 128);
  }
  
  PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
  {
      if (!PyUnicode_Check(unicode)) {
***************
*** 2307,2354 ****
  
  #endif /* MS_WINDOWS */
  
  /* --- Character Mapping Codec -------------------------------------------- */
  
- static
- int charmap_decoding_error(const char **source,
- 			 Py_UNICODE **dest,
- 			 const char *errors,
- 			 const char *details) 
- {
-     if ((errors == NULL) ||
- 	(strcmp(errors,"strict") == 0)) {
- 	PyErr_Format(PyExc_UnicodeError,
- 		     "charmap decoding error: %.400s",
- 		     details);
- 	return -1;
-     }
-     else if (strcmp(errors,"ignore") == 0) {
- 	return 0;
-     }
-     else if (strcmp(errors,"replace") == 0) {
- 	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
- 	(*dest)++;
- 	return 0;
-     }
-     else {
- 	PyErr_Format(PyExc_ValueError,
- 		     "charmap decoding error; "
- 		     "unknown error handling code: %.400s",
- 		     errors);
- 	return -1;
-     }
- }
- 
  PyObject *PyUnicode_DecodeCharmap(const char *s,
  				  int size,
  				  PyObject *mapping,
  				  const char *errors)
  {
      PyUnicodeObject *v;
      Py_UNICODE *p;
      int extrachars = 0;
      
      /* Default to Latin-1 */
      if (mapping == NULL)
  	return PyUnicode_DecodeLatin1(s, size, errors);
  
--- 2553,2577 ----
  
  #endif /* MS_WINDOWS */
  
  /* --- Character Mapping Codec -------------------------------------------- */
  
  PyObject *PyUnicode_DecodeCharmap(const char *s,
  				  int size,
  				  PyObject *mapping,
  				  const char *errors)
  {
+     const char *starts = s;
+     int startinpos;
+     int endinpos;
+     int outpos;
+     const char *e;
      PyUnicodeObject *v;
      Py_UNICODE *p;
      int extrachars = 0;
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
      
      /* Default to Latin-1 */
      if (mapping == NULL)
  	return PyUnicode_DecodeLatin1(s, size, errors);
  
***************
*** 2356,2367 ****
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
      p = PyUnicode_AS_UNICODE(v);
!     while (size-- > 0) {
! 	unsigned char ch = *s++;
  	PyObject *w, *x;
  
  	/* Get mapping (char ordinal -> integer, Unicode char or None) */
  	w = PyInt_FromLong((long)ch);
  	if (w == NULL)
--- 2579,2591 ----
      if (v == NULL)
  	goto onError;
      if (size == 0)
  	return (PyObject *)v;
      p = PyUnicode_AS_UNICODE(v);
!     e = s + size;
!     while (s < e) {
! 	unsigned char ch = *s;
  	PyObject *w, *x;
  
  	/* Get mapping (char ordinal -> integer, Unicode char or None) */
  	w = PyInt_FromLong((long)ch);
  	if (w == NULL)
***************
*** 2389,2403 ****
  	    }
  	    *p++ = (Py_UNICODE)value;
  	}
  	else if (x == Py_None) {
  	    /* undefined mapping */
! 	    if (charmap_decoding_error(&s, &p, errors, 
! 				       "character maps to <undefined>")) {
  		Py_DECREF(x);
  		goto onError;
  	    }
  	}
  	else if (PyUnicode_Check(x)) {
  	    int targetsize = PyUnicode_GET_SIZE(x);
  
  	    if (targetsize == 1)
--- 2613,2634 ----
  	    }
  	    *p++ = (Py_UNICODE)value;
  	}
  	else if (x == Py_None) {
  	    /* undefined mapping */
! 	    outpos = p-PyUnicode_AS_UNICODE(v);
! 	    startinpos = s-starts;
! 	    endinpos = startinpos+1;
! 	    if (unicode_decode_call_errorhandler(
! 		 errors, &errorHandler,
! 		 "charmap", "character maps to <undefined>",
! 		 starts, size, &startinpos, &endinpos, &exc, &s,
! 		 (PyObject **)&v, &outpos, &p)) {
  		Py_DECREF(x);
  		goto onError;
  	    }
+ 	    continue;
  	}
  	else if (PyUnicode_Check(x)) {
  	    int targetsize = PyUnicode_GET_SIZE(x);
  
  	    if (targetsize == 1)
***************
*** 2433,2583 ****
  		  "character mapping must return integer, None or unicode");
  	    Py_DECREF(x);
  	    goto onError;
  	}
  	Py_DECREF(x);
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
  	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	    goto onError;
      return (PyObject *)v;
      
   onError:
      Py_XDECREF(v);
      return NULL;
  }
  
! static
! int charmap_encoding_error(const Py_UNICODE **source,
! 			   char **dest,
! 			   const char *errors,
! 			   const char *details) 
! {
!     if ((errors == NULL) ||
! 	(strcmp(errors,"strict") == 0)) {
! 	PyErr_Format(PyExc_UnicodeError,
! 		     "charmap encoding error: %.400s",
! 		     details);
! 	return -1;
      }
!     else if (strcmp(errors,"ignore") == 0) {
! 	return 0;
      }
!     else if (strcmp(errors,"replace") == 0) {
! 	**dest = '?';
! 	(*dest)++;
! 	return 0;
      }
      else {
! 	PyErr_Format(PyExc_ValueError,
! 		     "charmap encoding error; "
! 		     "unknown error handling code: %.400s",
! 		     errors);
! 	return -1;
      }
  }
  
  PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
  				  int size,
  				  PyObject *mapping,
  				  const char *errors)
  {
!     PyObject *v;
!     char *s;
!     int extrachars = 0;
  
      /* Default to Latin-1 */
      if (mapping == NULL)
  	return PyUnicode_EncodeLatin1(p, size, errors);
  
!     v = PyString_FromStringAndSize(NULL, size);
!     if (v == NULL)
!         return NULL;
      if (size == 0)
! 	return v;
!     s = PyString_AS_STRING(v);
!     while (size-- > 0) {
! 	Py_UNICODE ch = *p++;
! 	PyObject *w, *x;
  
! 	/* Get mapping (Unicode ordinal -> string char, integer or None) */
! 	w = PyInt_FromLong((long)ch);
! 	if (w == NULL)
  	    goto onError;
! 	x = PyObject_GetItem(mapping, w);
! 	Py_DECREF(w);
! 	if (x == NULL) {
! 	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
! 		/* No mapping found means: mapping is undefined. */
! 		PyErr_Clear();
! 		x = Py_None;
! 		Py_INCREF(x);
! 	    } else
! 		goto onError;
! 	}
! 
! 	/* Apply mapping */
! 	if (PyInt_Check(x)) {
! 	    long value = PyInt_AS_LONG(x);
! 	    if (value < 0 || value > 255) {
! 		PyErr_SetString(PyExc_TypeError,
! 				"character mapping must be in range(256)");
! 		Py_DECREF(x);
! 		goto onError;
! 	    }
! 	    *s++ = (char)value;
! 	}
! 	else if (x == Py_None) {
! 	    /* undefined mapping */
! 	    if (charmap_encoding_error(&p, &s, errors, 
! 				       "character maps to <undefined>")) {
! 		Py_DECREF(x);
  		goto onError;
- 	    }
  	}
! 	else if (PyString_Check(x)) {
! 	    int targetsize = PyString_GET_SIZE(x);
! 
! 	    if (targetsize == 1)
! 		/* 1-1 mapping */
! 		*s++ = *PyString_AS_STRING(x);
  
! 	    else if (targetsize > 1) {
! 		/* 1-n mapping */
! 		if (targetsize > extrachars) {
! 		    /* resize first */
! 		    int oldpos = (int)(s - PyString_AS_STRING(v));
! 		    int needed = (targetsize - extrachars) + \
! 			         (targetsize << 2);
! 		    extrachars += needed;
! 		    if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) {
! 			Py_DECREF(x);
! 			goto onError;
! 		    }
! 		    s = PyString_AS_STRING(v) + oldpos;
! 		}
! 		memcpy(s, PyString_AS_STRING(x), targetsize);
! 		s += targetsize;
! 		extrachars -= targetsize;
! 	    }
! 	    /* 1-0 mapping: skip the character */
! 	}
! 	else {
! 	    /* wrong return value */
! 	    PyErr_SetString(PyExc_TypeError,
! 		  "character mapping must return integer, None or unicode");
! 	    Py_DECREF(x);
  	    goto onError;
- 	}
- 	Py_DECREF(x);
      }
!     if (s - PyString_AS_STRING(v) < PyString_GET_SIZE(v))
! 	_PyString_Resize(&v, (int)(s - PyString_AS_STRING(v)));
!     return v;
  
!  onError:
!     Py_XDECREF(v);
      return NULL;
  }
  
  PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
  				    PyObject *mapping)
--- 2664,2963 ----
  		  "character mapping must return integer, None or unicode");
  	    Py_DECREF(x);
  	    goto onError;
  	}
  	Py_DECREF(x);
+ 	++s;
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
  	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	    goto onError;
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      return (PyObject *)v;
      
   onError:
+     Py_XDECREF(errorHandler);
+     Py_XDECREF(exc);
      Py_XDECREF(v);
      return NULL;
  }
  
! /* Lookup the character ch in the mapping. If the character
!    can't be found, Py_None is returned (or NULL, if another
!    error occured). */
! static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
! {
!     PyObject *w = PyInt_FromLong((long)c);
!     PyObject *x;
! 
!     if (w == NULL)
! 	 return NULL;
!     x = PyObject_GetItem(mapping, w);
!     Py_DECREF(w);
!     if (x == NULL) {
! 	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
! 	    /* No mapping found means: mapping is undefined. */
! 	    PyErr_Clear();
! 	    x = Py_None;
! 	    Py_INCREF(x);
! 	    return x;
! 	} else
! 	    return NULL;
      }
!     else if (PyInt_Check(x)) {
! 	long value = PyInt_AS_LONG(x);
! 	if (value < 0 || value > 255) {
! 	    PyErr_SetString(PyExc_TypeError,
! 			     "character mapping must be in range(256)");
! 	    Py_DECREF(x);
! 	    return NULL;
! 	}
! 	return x;
      }
!     else if (PyString_Check(x))
! 	return x;
!     else {
! 	/* wrong return value */
! 	PyErr_SetString(PyExc_TypeError,
! 	      "character mapping must return integer, None or str");
! 	Py_DECREF(x);
! 	return NULL;
      }
+ }
+ 
+ /* lookup the character, put the result in the output string and adjust
+    various state variables. Reallocate the output string if not enough
+    space is available. Return a new reference to the object that
+    was put in the output buffer, or Py_None, if the mapping was undefined
+    (in which case no character was written) or NULL, if a
+    reallocation error ocurred. The called must decref the result */
+ static
+ PyObject *charmapencode_output(Py_UNICODE c, PyObject *mapping,
+     PyObject **outobj, int *outpos)
+ {
+     PyObject *rep = charmapencode_lookup(c, mapping);
+ 
+     if (rep==NULL)
+ 	return NULL;
+     else if (rep==Py_None)
+ 	return rep;
      else {
! 	char *outstart = PyString_AS_STRING(*outobj);
! 	int outsize = PyString_GET_SIZE(*outobj);
! 	if (PyInt_Check(rep)) {
! 	    int requiredsize = *outpos+1;
! 	    if (outsize<requiredsize) {
! 		/* exponentially overallocate to minimize reallocations */
! 		if (requiredsize < 2*outsize)
! 		    requiredsize = 2*outsize;
! 		if (_PyString_Resize(outobj, requiredsize)) {
! 		    Py_DECREF(rep);
! 		    return NULL;
! 		}
! 		outstart = PyString_AS_STRING(*outobj);
! 	    }
! 	    outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep);
! 	}
! 	else {
! 	    const char *repchars = PyString_AS_STRING(rep);
! 	    int repsize = PyString_GET_SIZE(rep);
! 	    int requiredsize = *outpos+repsize;
! 	    if (outsize<requiredsize) {
! 		/* exponentially overallocate to minimize reallocations */
! 		if (requiredsize < 2*outsize)
! 		    requiredsize = 2*outsize;
! 		if (_PyString_Resize(outobj, requiredsize)) {
! 		    Py_DECREF(rep);
! 		    return NULL;
! 		}
! 		outstart = PyString_AS_STRING(*outobj);
! 	    }
! 	    memcpy(outstart + *outpos, repchars, repsize);
! 	    *outpos += repsize;
! 	}
      }
+     return rep;
+ }
+ 
+ /* handle an error in PyUnicode_EncodeCharmap
+    Return 0 on success, -1 on error */
+ static
+ int charmap_encoding_error(
+     const Py_UNICODE *p, int size, int *inpos, PyObject *mapping,
+     PyObject **exceptionObject,
+     int *known_errorHandler, PyObject *errorHandler, const char *errors,
+     PyObject **res, int *respos)
+ {
+     PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+     int repsize;
+     int newpos;
+     Py_UNICODE *uni2;
+     /* startpos for collecting unencodable chars */
+     int collstartpos = *inpos;
+     int collendpos = *inpos+1;
+     int collpos;
+     char *encoding = "charmap";
+     char *reason = "character maps to <undefined>";
+ 
+     PyObject *x;
+     /* find all unencodable characters */
+     while (collendpos < size) {
+ 	x = charmapencode_lookup(p[collendpos], mapping);
+ 	if (x==NULL)
+ 	    return -1;
+ 	else if (x!=Py_None) {
+ 	    Py_DECREF(x);
+ 	    break;
+ 	}
+ 	Py_DECREF(x);
+ 	++collendpos;
+     }
+     /* cache callback name lookup
+      * (if not done yet, i.e. it's the first error) */
+     if (*known_errorHandler==-1) {
+ 	if ((errors==NULL) || (!strcmp(errors, "strict")))
+ 	    *known_errorHandler = 1;
+ 	else if (!strcmp(errors, "replace"))
+ 	    *known_errorHandler = 2;
+ 	else if (!strcmp(errors, "ignore"))
+ 	    *known_errorHandler = 3;
+ 	else if (!strcmp(errors, "xmlcharrefreplace"))
+ 	    *known_errorHandler = 4;
+ 	else
+ 	    *known_errorHandler = 0;
+     }
+     switch (*known_errorHandler) {
+ 	case 1: /* strict */
+ 	    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ 	    return -1;
+ 	case 2: /* replace */
+ 	    for (collpos = collstartpos; collpos<collendpos; ++collpos) {
+ 		x = charmapencode_output('?', mapping, res, respos);
+ 		if (x==NULL) {
+ 		    return -1;
+ 		}
+ 		else if (x==Py_None) {
+ 		    Py_DECREF(x);
+ 		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ 		    return -1;
+ 		}
+ 		Py_DECREF(x);
+ 	    }
+ 	    /* fall through */
+ 	case 3: /* ignore */
+ 	    *inpos = collendpos;
+ 	    break;
+ 	case 4: /* xmlcharrefreplace */
+ 	    /* generate replacement (temporarily (mis)uses p) */
+ 	    for (collpos = collstartpos; collpos < collendpos; ++collpos) {
+ 		char buffer[2+29+1+1];
+ 		char *cp;
+ 		sprintf(buffer, "&#%d;", (int)p[collpos]);
+ 		for (cp = buffer; *cp; ++cp) {
+ 		    x = charmapencode_output(*cp, mapping, res, respos);
+ 		    if (x==NULL)
+ 			return -1;
+ 		    else if (x==Py_None) {
+ 			Py_DECREF(x);
+ 			raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ 			return -1;
+ 		    }
+ 		    Py_DECREF(x);
+ 		}
+ 	    }
+ 	    *inpos = collendpos;
+ 	    break;
+ 	default:
+ 	    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+ 		encoding, reason, p, size, exceptionObject,
+ 		collstartpos, collendpos, &newpos);
+ 	    if (repunicode == NULL)
+ 		return -1;
+ 	    /* generate replacement  */
+ 	    repsize = PyUnicode_GET_SIZE(repunicode);
+ 	    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+ 		x = charmapencode_output(*uni2, mapping, res, respos);
+ 		if (x==NULL) {
+ 		    Py_DECREF(repunicode);
+ 		    return -1;
+ 		}
+ 		else if (x==Py_None) {
+ 		    Py_DECREF(repunicode);
+ 		    Py_DECREF(x);
+ 		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ 		    return -1;
+ 		}
+ 		Py_DECREF(x);
+ 	    }
+ 	    *inpos = newpos;
+ 	    Py_DECREF(repunicode);
+     }
+     return 0;
  }
  
  PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
  				  int size,
  				  PyObject *mapping,
  				  const char *errors)
  {
!     /* output object */
!     PyObject *res = NULL;
!     /* current input position */
!     int inpos = 0;
!     /* current output position */
!     int respos = 0;
!     PyObject *errorHandler = NULL;
!     PyObject *exc = NULL;
!     /* the following variable is used for caching string comparisons
!      * -1=not initialized, 0=unknown, 1=strict, 2=replace,
!      * 3=ignore, 4=xmlcharrefreplace */
!     int known_errorHandler = -1;
  
      /* Default to Latin-1 */
      if (mapping == NULL)
  	return PyUnicode_EncodeLatin1(p, size, errors);
  
!     /* allocate enough for a simple encoding without
!        replacements, if we need more, we'll resize */
!     res = PyString_FromStringAndSize(NULL, size);
!     if (res == NULL)
!         goto onError;
      if (size == 0)
! 	return res;
  
!     while (inpos<size) {
! 	/* try to encode it */
! 	PyObject *x = charmapencode_output(p[inpos], mapping, &res, &respos);
! 	if (x==NULL) /* error */
  	    goto onError;
! 	if (x==Py_None) { /* unencodable character */
! 	    if (charmap_encoding_error(p, size, &inpos, mapping,
! 		&exc,
! 		&known_errorHandler, errorHandler, errors,
! 		&res, &respos))
  		goto onError;
  	}
! 	else
! 	    /* done with this character => adjust input position */
! 	    ++inpos;
! 	Py_DECREF(x);
!     }
  
!     /* Resize if we allocated to much */
!     if (respos<PyString_GET_SIZE(res)) {
! 	if (_PyString_Resize(&res, respos))
  	    goto onError;
      }
!     Py_XDECREF(exc);
!     Py_XDECREF(errorHandler);
!     return res;
  
!     onError:
!     Py_XDECREF(res);
!     Py_XDECREF(exc);
!     Py_XDECREF(errorHandler);
      return NULL;
  }
  
  PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
  				    PyObject *mapping)
***************
*** 2590,2708 ****
  				   PyUnicode_GET_SIZE(unicode),
  				   mapping,
  				   NULL);
  }
  
  static
! int translate_error(const Py_UNICODE **source,
! 		    Py_UNICODE **dest,
! 		    const char *errors,
! 		    const char *details) 
! {
!     if ((errors == NULL) ||
! 	(strcmp(errors,"strict") == 0)) {
! 	PyErr_Format(PyExc_UnicodeError,
! 		     "translate error: %.400s",
! 		     details);
! 	return -1;
      }
!     else if (strcmp(errors,"ignore") == 0) {
  	return 0;
      }
!     else if (strcmp(errors,"replace") == 0) {
! 	**dest = '?';
! 	(*dest)++;
  	return 0;
      }
      else {
! 	PyErr_Format(PyExc_ValueError,
! 		     "translate error; "
! 		     "unknown error handling code: %.400s",
! 		     errors);
  	return -1;
      }
  }
  
! PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s,
  				     int size,
  				     PyObject *mapping,
  				     const char *errors)
  {
!     PyUnicodeObject *v;
!     Py_UNICODE *p;
!     
      if (mapping == NULL) {
  	PyErr_BadArgument();
  	return NULL;
      }
-     
-     /* Output will never be longer than input */
-     v = _PyUnicode_New(size);
-     if (v == NULL)
- 	goto onError;
-     if (size == 0)
- 	goto done;
-     p = PyUnicode_AS_UNICODE(v);
-     while (size-- > 0) {
- 	Py_UNICODE ch = *s++;
- 	PyObject *w, *x;
  
! 	/* Get mapping */
! 	w = PyInt_FromLong(ch);
! 	if (w == NULL)
! 	    goto onError;
! 	x = PyObject_GetItem(mapping, w);
! 	Py_DECREF(w);
! 	if (x == NULL) {
! 	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
! 		/* No mapping found: default to 1-1 mapping */
! 		PyErr_Clear();
! 		*p++ = ch;
! 		continue;
! 	    }
  	    goto onError;
  	}
! 
! 	/* Apply mapping */
! 	if (PyInt_Check(x))
! 	    *p++ = (Py_UNICODE)PyInt_AS_LONG(x);
! 	else if (x == Py_None) {
! 	    /* undefined mapping */
! 	    if (translate_error(&s, &p, errors, 
! 				"character maps to <undefined>")) {
! 		Py_DECREF(x);
! 		goto onError;
! 	    }
! 	}
! 	else if (PyUnicode_Check(x)) {
! 	    if (PyUnicode_GET_SIZE(x) != 1) {
! 		/* 1-n mapping */
! 		PyErr_SetString(PyExc_NotImplementedError,
! 				"1-n mappings are currently not implemented");
! 		Py_DECREF(x);
! 		goto onError;
  	    }
- 	    *p++ = *PyUnicode_AS_UNICODE(x);
  	}
- 	else {
- 	    /* wrong return value */
- 	    PyErr_SetString(PyExc_TypeError,
- 		  "translate mapping must return integer, None or unicode");
- 	    Py_DECREF(x);
- 	    goto onError;
- 	}
- 	Py_DECREF(x);
      }
!     if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
! 	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
  	    goto onError;
  
!  done:
!     return (PyObject *)v;
!     
!  onError:
!     Py_XDECREF(v);
      return NULL;
  }
  
  PyObject *PyUnicode_Translate(PyObject *str,
  			      PyObject *mapping,
--- 2970,3317 ----
  				   PyUnicode_GET_SIZE(unicode),
  				   mapping,
  				   NULL);
  }
  
+ /* create or adjust a UnicodeTranslateError */
+ static void make_translate_exception(PyObject **exceptionObject,
+     const Py_UNICODE *unicode, int size,
+     int startpos, int endpos,
+     const char *reason)
+ {
+     if (*exceptionObject == NULL) {
+     	*exceptionObject = PyUnicodeTranslateError_Create(
+ 	    unicode, size, startpos, endpos, reason);
+     }
+     else {
+ 	if (!PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
+ 	    goto onError;
+ 	if (!PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
+ 	    goto onError;
+ 	if (!PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
+ 	    goto onError;
+ 	return;
+ 	onError:
+ 	Py_DECREF(*exceptionObject);
+ 	*exceptionObject = NULL;
+     }
+ }
+ 
+ /* raises a UnicodeTranslateError */
+ static void raise_translate_exception(PyObject **exceptionObject,
+     const Py_UNICODE *unicode, int size,
+     int startpos, int endpos,
+     const char *reason)
+ {
+     make_translate_exception(exceptionObject,
+ 	unicode, size, startpos, endpos, reason);
+     if (*exceptionObject != NULL)
+ 	PyCodec_StrictErrors(*exceptionObject);
+ }
+ 
+ /* error handling callback helper:
+    build arguments, call the callback and check the arguments,
+    put the result into newpos and return the replacement string, which
+    has to be freed by the caller */
+ static PyObject *unicode_translate_call_errorhandler(const char *errors,
+     PyObject **errorHandler,
+     const char *reason,
+     const Py_UNICODE *unicode, int size, PyObject **exceptionObject,
+     int startpos, int endpos,
+     int *newpos)
+ {
+     static char *argparse = "O!i;translating error handler must return (unicode, int) tuple";
+ 
+     PyObject *restuple;
+     PyObject *resunicode;
+ 
+     if (*errorHandler == NULL) {
+ 	*errorHandler = PyCodec_LookupError(errors);
+         if (*errorHandler == NULL)
+ 	    return NULL;
+     }
+ 
+     make_translate_exception(exceptionObject,
+ 	unicode, size, startpos, endpos, reason);
+     if (*exceptionObject == NULL)
+ 	return NULL;
+ 
+     restuple = PyObject_CallFunctionObjArgs(
+ 	*errorHandler, *exceptionObject, NULL);
+     if (restuple == NULL)
+ 	return NULL;
+     if (!PyTuple_Check(restuple)) {
+ 	PyErr_Format(PyExc_TypeError, &argparse[4]);
+ 	Py_DECREF(restuple);
+ 	return NULL;
+     }
+     if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
+ 	&resunicode, newpos)) {
+ 	Py_DECREF(restuple);
+ 	return NULL;
+     }
+     if (*newpos<0)
+ 	*newpos = 0;
+     else if (*newpos>size)
+ 	*newpos = size;
+     Py_INCREF(resunicode);
+     Py_DECREF(restuple);
+     return resunicode;
+ }
+ 
+ /* Lookup the character ch in the mapping and put the result in result,
+    which must be decrefed by the caller.
+    Return 0 on success, -1 on error */
  static
! int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
! {
!     PyObject *w = PyInt_FromLong((long)c);
!     PyObject *x;
! 
!     if (w == NULL)
! 	 return -1;
!     x = PyObject_GetItem(mapping, w);
!     Py_DECREF(w);
!     if (x == NULL) {
! 	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
! 	    /* No mapping found means: use 1:1 mapping. */
! 	    PyErr_Clear();
! 	    *result = NULL;
! 	    return 0;
! 	} else
! 	    return -1;
      }
!     else if (x == Py_None) {
! 	*result = x;
  	return 0;
      }
!     else if (PyInt_Check(x)) {
! 	long value = PyInt_AS_LONG(x);
! 	long max = PyUnicode_GetMax();
! 	if (value < 0 || value > max) {
! 	    PyErr_Format(PyExc_TypeError,
! 			     "character mapping must be in range(0x%lx)", max+1);
! 	    Py_DECREF(x);
! 	    return -1;
! 	}
! 	*result = x;
! 	return 0;
!     }
!     else if (PyUnicode_Check(x)) {
! 	*result = x;
  	return 0;
      }
      else {
! 	/* wrong return value */
! 	PyErr_SetString(PyExc_TypeError,
! 	      "character mapping must return integer, None or unicode");
  	return -1;
      }
  }
+ /* ensure that *outobj is at least requiredsize characters long,
+ if not reallocate and adjust various state variables.
+ Return 0 on success, -1 on error */
+ static
+ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
+     int requiredsize)
+ {
+     if (requiredsize > *outsize) {
+ 	/* remember old output position */
+ 	int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
+ 	/* exponentially overallocate to minimize reallocations */
+ 	if (requiredsize < 2 * *outsize)
+ 	    requiredsize = 2 * *outsize;
+ 	if (_PyUnicode_Resize(outobj, requiredsize))
+ 	    return -1;
+ 	*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
+ 	*outsize = requiredsize;
+     }
+     return 0;
+ }
+ /* lookup the character, put the result in the output string and adjust
+    various state variables. Return a new reference to the object that
+    was put in the output buffer in *result, or Py_None, if the mapping was
+    undefined (in which case no character was written).
+    The called must decref result.
+    Return 0 on success, -1 on error. */
+ static
+ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
+     PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
+ {
+     if (charmaptranslate_lookup(c, mapping, res))
+ 	return -1;
+     if (*res==NULL) {
+ 	/* not found => default to 1:1 mapping */
+ 	*(*outp)++ = (Py_UNICODE)c;
+     }
+     else if (*res==Py_None)
+ 	;
+     else if (PyInt_Check(*res)) {
+ 	/* no overflow check, because we know that the space is enough */
+ 	*(*outp)++ = (Py_UNICODE)PyInt_AS_LONG(*res);
+     }
+     else if (PyUnicode_Check(*res)) {
+ 	int repsize = PyUnicode_GET_SIZE(*res);
+ 	if (repsize==1) {
+ 	    /* no overflow check, because we know that the space is enough */
+ 	    *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
+ 	}
+ 	else if (repsize!=0) {
+ 	    /* more than one character */
+ 	    int requiredsize = *outsize + repsize - 1;
+ 	    if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
+ 		return -1;
+ 	    memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
+ 	    *outp += repsize;
+ 	}
+     }
+     else
+ 	return -1;
+     return 0;
+ }
  
! PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
  				     int size,
  				     PyObject *mapping,
  				     const char *errors)
  {
!     /* output object */
!     PyObject *res = NULL;
!     /* pointers to the beginning and end+1 of input */
!     const Py_UNICODE *startp = p;
!     const Py_UNICODE *endp = p + size;
!     /* pointer into the output */
!     Py_UNICODE *str;
!     /* current output position */
!     int respos = 0;
!     int ressize;
!     char *reason = "character maps to <undefined>";
!     PyObject *errorHandler = NULL;
!     PyObject *exc = NULL;
!     /* the following variable is used for caching string comparisons
!      * -1=not initialized, 0=unknown, 1=strict, 2=replace,
!      * 3=ignore, 4=xmlcharrefreplace */
!     int known_errorHandler = -1;
! 
      if (mapping == NULL) {
  	PyErr_BadArgument();
  	return NULL;
      }
  
!     /* allocate enough for a simple 1:1 translation without
!        replacements, if we need more, we'll resize */
!     res = PyUnicode_FromUnicode(NULL, size);
!     if (res == NULL)
!         goto onError;
!     if (size == 0)
! 	return res;
!     str = PyUnicode_AS_UNICODE(res);
!     ressize = size;
! 
!     while (p<endp) {
! 	/* try to encode it */
! 	PyObject *x = NULL;
! 	if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
! 	    Py_XDECREF(x);
  	    goto onError;
  	}
! 	if (x!=Py_None) /* it worked => adjust input pointer */
! 	    ++p;
! 	else { /* untranslatable character */
! 	    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
! 	    int repsize;
! 	    int newpos;
! 	    Py_UNICODE *uni2;
! 	    /* startpos for collecting untranslatable chars */
! 	    const Py_UNICODE *collstart = p;
! 	    const Py_UNICODE *collend = p+1;
! 	    const Py_UNICODE *coll;
! 
! 	    Py_XDECREF(x);
! 	    /* find all untranslatable characters */
! 	    while (collend < endp) {
! 	    	if (charmaptranslate_lookup(*collend, mapping, &x))
! 		    goto onError;
! 		Py_XDECREF(x);
! 		if (x!=Py_None)
! 		    break;
! 		++collend;
! 	    }
! 	    /* cache callback name lookup
! 	     * (if not done yet, i.e. it's the first error) */
! 	    if (known_errorHandler==-1) {
! 		if ((errors==NULL) || (!strcmp(errors, "strict")))
! 		    known_errorHandler = 1;
! 		else if (!strcmp(errors, "replace"))
! 		    known_errorHandler = 2;
! 		else if (!strcmp(errors, "ignore"))
! 		    known_errorHandler = 3;
! 		else if (!strcmp(errors, "xmlcharrefreplace"))
! 		    known_errorHandler = 4;
! 		else
! 		    known_errorHandler = 0;
! 	    }
! 	    switch (known_errorHandler) {
! 		case 1: /* strict */
! 		    raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
! 		    goto onError;
! 		case 2: /* replace */
! 		    /* No need to check for space, this is a 1:1 replacement */
! 		    for (coll = collstart; coll<collend; ++coll)
! 			*str++ = '?';
! 		    /* fall through */
! 		case 3: /* ignore */
! 		    p = collend;
! 		    break;
! 		case 4: /* xmlcharrefreplace */
! 		    /* generate replacement (temporarily (mis)uses p) */
! 		    for (p = collstart; p < collend; ++p) {
! 			char buffer[2+29+1+1];
! 			char *cp;
! 			sprintf(buffer, "&#%d;", (int)*p);
! 			if (charmaptranslate_makespace(&res, &str, &ressize,
! 			    (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
! 			    goto onError;
! 			for (cp = buffer; *cp; ++cp)
! 			    *str++ = *cp;
! 		    }
! 		    p = collend;
! 		    break;
! 		default:
! 		    repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
! 			reason, startp, size, &exc,
! 			collstart-startp, collend-startp, &newpos);
! 		    if (repunicode == NULL)
! 			goto onError;
! 		    /* generate replacement  */
! 		    repsize = PyUnicode_GET_SIZE(repunicode);
! 		    if (charmaptranslate_makespace(&res, &str, &ressize,
! 			(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
! 			Py_DECREF(repunicode);
! 			goto onError;
! 		    }
! 		    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
! 			*str++ = *uni2;
! 		    p = startp + newpos;
! 		    Py_DECREF(repunicode);
  	    }
  	}
      }
!     /* Resize if we allocated to much */
!     respos = str-PyUnicode_AS_UNICODE(res);
!     if (respos<ressize) {
! 	if (_PyUnicode_Resize(&res, respos))
  	    goto onError;
+     }
+     Py_XDECREF(exc);
+     Py_XDECREF(errorHandler);
+     return res;
  
!     onError:
!     Py_XDECREF(res);
!     Py_XDECREF(exc);
!     Py_XDECREF(errorHandler);
      return NULL;
  }
  
  PyObject *PyUnicode_Translate(PyObject *str,
  			      PyObject *mapping,
***************
*** 2731,2783 ****
  			    int length,
  			    char *output,
  			    const char *errors)
  {
      Py_UNICODE *p, *end;
  
      if (output == NULL) {
  	PyErr_BadArgument();
  	return -1;
      }
  
      p = s;
      end = s + length;
      while (p < end) {
! 	register Py_UNICODE ch = *p++;
  	int decimal;
  	
  	if (Py_UNICODE_ISSPACE(ch)) {
  	    *output++ = ' ';
  	    continue;
  	}
  	decimal = Py_UNICODE_TODECIMAL(ch);
  	if (decimal >= 0) {
  	    *output++ = '0' + decimal;
  	    continue;
  	}
  	if (0 < ch && ch < 256) {
  	    *output++ = (char)ch;
  	    continue;
  	}
! 	/* All other characters are considered invalid */
! 	if (errors == NULL || strcmp(errors, "strict") == 0) {
! 	    PyErr_SetString(PyExc_ValueError,
! 			    "invalid decimal Unicode string");
! 	    goto onError;
  	}
! 	else if (strcmp(errors, "ignore") == 0)
! 	    continue;
! 	else if (strcmp(errors, "replace") == 0) {
! 	    *output++ = '?';
! 	    continue;
  	}
      }
      /* 0-terminate the output string */
      *output++ = '\0';
      return 0;
  
   onError:
      return -1;
  }
  
  /* --- Helpers ------------------------------------------------------------ */
  
--- 3340,3469 ----
  			    int length,
  			    char *output,
  			    const char *errors)
  {
      Py_UNICODE *p, *end;
+     PyObject *errorHandler = NULL;
+     PyObject *exc = NULL;
+     const char *encoding = "decimal";
+     const char *reason = "invalid decimal Unicode string";
+     /* the following variable is used for caching string comparisons
+      * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
+     int known_errorHandler = -1;
  
      if (output == NULL) {
  	PyErr_BadArgument();
  	return -1;
      }
  
      p = s;
      end = s + length;
      while (p < end) {
! 	register Py_UNICODE ch = *p;
  	int decimal;
+ 	PyObject *repunicode;
+ 	int repsize;
+ 	int newpos;
+ 	Py_UNICODE *uni2;
+ 	Py_UNICODE *collstart;
+ 	Py_UNICODE *collend;
  	
  	if (Py_UNICODE_ISSPACE(ch)) {
  	    *output++ = ' ';
+ 	    ++p;
  	    continue;
  	}
  	decimal = Py_UNICODE_TODECIMAL(ch);
  	if (decimal >= 0) {
  	    *output++ = '0' + decimal;
+ 	    ++p;
  	    continue;
  	}
  	if (0 < ch && ch < 256) {
  	    *output++ = (char)ch;
+ 	    ++p;
  	    continue;
  	}
! 	/* All other characters are considered unencodable */
! 	collstart = p;
! 	collend = p+1;
! 	while (collend < end) {
! 	    if ((0 < *collend && *collend < 256) ||
! 	        !Py_UNICODE_ISSPACE(*collend) ||
! 	        Py_UNICODE_TODECIMAL(*collend))
! 		break;
  	}
! 	/* cache callback name lookup
! 	 * (if not done yet, i.e. it's the first error) */
! 	if (known_errorHandler==-1) {
! 	    if ((errors==NULL) || (!strcmp(errors, "strict")))
! 		known_errorHandler = 1;
! 	    else if (!strcmp(errors, "replace"))
! 		known_errorHandler = 2;
! 	    else if (!strcmp(errors, "ignore"))
! 		known_errorHandler = 3;
! 	    else if (!strcmp(errors, "xmlcharrefreplace"))
! 		known_errorHandler = 4;
! 	    else
! 		known_errorHandler = 0;
! 	}
! 	switch (known_errorHandler) {
! 	    case 1: /* strict */
! 		raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
! 		goto onError;
! 	    case 2: /* replace */
! 		for (p = collstart; p < collend; ++p)
! 		    *output++ = '?';
! 		/* fall through */
! 	    case 3: /* ignore */
! 		p = collend;
! 		break;
! 	    case 4: /* xmlcharrefreplace */
! 		/* generate replacement (temporarily (mis)uses p) */
! 		for (p = collstart; p < collend; ++p)
! 		    output += sprintf(output, "&#%d;", (int)*p);
! 		p = collend;
! 		break;
! 	    default:
! 		repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
! 		    encoding, reason, s, length, &exc,
! 		    collstart-s, collend-s, &newpos);
! 		if (repunicode == NULL)
! 		    goto onError;
! 		/* generate replacement  */
! 		repsize = PyUnicode_GET_SIZE(repunicode);
! 		for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
! 		    Py_UNICODE ch = *uni2;
! 		    if (Py_UNICODE_ISSPACE(ch))
! 			*output++ = ' ';
! 		    else {
! 			decimal = Py_UNICODE_TODECIMAL(ch);
! 			if (decimal >= 0)
! 			    *output++ = '0' + decimal;
! 			else if (0 < ch && ch < 256)
! 			    *output++ = (char)ch;
! 			else {
! 			    Py_DECREF(repunicode);
! 			    raise_encode_exception(&exc, encoding,
! 				s, length, collstart-s, collend-s, reason);
! 			    goto onError;
! 			}
! 		    }
! 		}
! 		p = s + newpos;
! 		Py_DECREF(repunicode);
  	}
      }
      /* 0-terminate the output string */
      *output++ = '\0';
+     Py_XDECREF(exc);
+     Py_XDECREF(errorHandler);
      return 0;
  
   onError:
+     Py_XDECREF(exc);
+     Py_XDECREF(errorHandler);
      return -1;
  }
  
  /* --- Helpers ------------------------------------------------------------ */
  
***************
*** 3863,3873 ****
  "S.encode([encoding[,errors]]) -> string\n\
  \n\
  Return an encoded string version of S. Default encoding is the current\n\
  default string encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a ValueError. Other possible values are 'ignore' and 'replace'.");
  
  static PyObject *
  unicode_encode(PyUnicodeObject *self, PyObject *args)
  {
      char *encoding = NULL;
--- 4549,4561 ----
  "S.encode([encoding[,errors]]) -> string\n\
  \n\
  Return an encoded string version of S. Default encoding is the current\n\
  default string encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
! a UnicodeError. Other possible values are 'ignore', 'replace' and\n\
! 'xmlcharrefreplace' as well as any other name registered with\n\
! codecs.register_error.");
  
  static PyObject *
  unicode_encode(PyUnicodeObject *self, PyObject *args)
  {
      char *encoding = NULL;
Index: Python/codecs.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/codecs.c,v
retrieving revision 2.14
diff -u -c -5 -r2.14 codecs.c
*** Python/codecs.c	18 Jul 2002 23:06:17 -0000	2.14
--- Python/codecs.c	24 Jul 2002 18:52:24 -0000
***************
*** 420,435 ****
--- 420,832 ----
      Py_XDECREF(decoder);
      Py_XDECREF(result);
      return NULL;
  }
  
+ static PyObject *_PyCodec_ErrorRegistry;
+ 
+ /* Register the error handling callback function error under the name
+    name. This function will be called by the codec when it encounters
+    an unencodable characters/undecodable bytes and doesn't know the
+    callback name, when name is specified as the error parameter
+    in the call to the encode/decode function.
+    Return 0 on success, -1 on error */
+ int PyCodec_RegisterError(const char *name, PyObject *error)
+ {
+     if (!PyCallable_Check(error)) {
+ 	PyErr_SetString(PyExc_TypeError, "handler must be callable");
+ 	return -1;
+     }
+     return PyDict_SetItemString( _PyCodec_ErrorRegistry, (char *)name, error);
+ }
+ 
+ /* Lookup the error handling callback function registered under the
+    name error. As a special case NULL can be passed, in which case
+    the error handling callback for strict encoding will be returned. */
+ PyObject *PyCodec_LookupError(const char *name)
+ {
+     PyObject *handler = NULL;
+ 
+     if (name==NULL)
+ 	name = "strict";
+     handler = PyDict_GetItemString(_PyCodec_ErrorRegistry, (char *)name);
+     if (!handler)
+ 	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
+     else
+ 	Py_INCREF(handler);
+     return handler;
+ }
+ 
+ static void wrong_exception_type(PyObject *exc)
+ {
+     PyObject *type = PyObject_GetAttrString(exc, "__class__");
+     if (type != NULL) {
+ 	PyObject *name = PyObject_GetAttrString(type, "__name__");
+ 	Py_DECREF(type);
+ 	if (name != NULL) {
+ 	    PyObject *string = PyObject_Str(name);
+ 	    Py_DECREF(name);
+ 	    PyErr_Format(PyExc_TypeError, "don't know how to handle %.400s in error callback",
+ 		PyString_AS_STRING(string));
+ 	    Py_DECREF(string);
+ 	}
+     }
+ }
+ 
+ PyObject *PyCodec_StrictErrors(PyObject *exc)
+ {
+     if (PyInstance_Check(exc))
+ 	PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
+ 	    exc);
+     else
+ 	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
+     return NULL;
+ }
+ 
+ 
+ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
+ {
+     int end;
+     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ 	if (!PyUnicodeEncodeError_GetEnd(exc, &end))
+ 	    return NULL;
+     }
+     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ 	if (!PyUnicodeDecodeError_GetEnd(exc, &end))
+ 	    return NULL;
+     }
+     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
+ 	if (!PyUnicodeTranslateError_GetEnd(exc, &end))
+ 	    return NULL;
+     }
+     else {
+ 	wrong_exception_type(exc);
+ 	return NULL;
+     }
+     /* ouch: passing NULL, 0, pos gives None instead of u'' */
+     return Py_BuildValue("(u#i)", &end, 0, end);
+ }
+ 
+ 
+ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
+ {
+     PyObject *restuple;
+     int start;
+     int end;
+     int i;
+ 
+     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ 	PyObject *res;
+ 	Py_UNICODE *p;
+ 	if (!PyUnicodeEncodeError_GetStart(exc, &start))
+ 	    return NULL;
+ 	if (!PyUnicodeEncodeError_GetEnd(exc, &end))
+ 	    return NULL;
+ 	res = PyUnicode_FromUnicode(NULL, end-start);
+ 	if (res == NULL)
+ 	    return NULL;
+ 	for (p = PyUnicode_AS_UNICODE(res), i = start;
+ 	    i<end; ++p, ++i)
+ 	    *p = '?';
+ 	restuple = Py_BuildValue("(Oi)", res, end);
+ 	Py_DECREF(res);
+ 	return restuple;
+     }
+     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ 	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
+ 	if (!PyUnicodeDecodeError_GetEnd(exc, &end))
+ 	    return NULL;
+ 	return Py_BuildValue("(u#i)", &res, 1, end);
+     }
+     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
+ 	PyObject *res;
+ 	Py_UNICODE *p;
+ 	if (!PyUnicodeTranslateError_GetStart(exc, &start))
+ 	    return NULL;
+ 	if (!PyUnicodeTranslateError_GetEnd(exc, &end))
+ 	    return NULL;
+ 	res = PyUnicode_FromUnicode(NULL, end-start);
+ 	if (res == NULL)
+ 	    return NULL;
+ 	for (p = PyUnicode_AS_UNICODE(res), i = start;
+ 	    i<end; ++p, ++i)
+ 	    *p = Py_UNICODE_REPLACEMENT_CHARACTER;
+ 	restuple = Py_BuildValue("(Oi)", res, end);
+ 	Py_DECREF(res);
+ 	return restuple;
+     }
+     else {
+ 	wrong_exception_type(exc);
+ 	return NULL;
+     }
+ }
+ 
+ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
+ {
+     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ 	PyObject *restuple;
+ 	PyObject *object;
+ 	int start;
+ 	int end;
+ 	PyObject *res;
+ 	Py_UNICODE *p;
+ 	Py_UNICODE *startp;
+ 	Py_UNICODE *outp;
+ 	int ressize;
+ 	if (!PyUnicodeEncodeError_GetStart(exc, &start))
+ 	    return NULL;
+ 	if (!PyUnicodeEncodeError_GetEnd(exc, &end))
+ 	    return NULL;
+ 	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+ 	    return NULL;
+ 	startp = PyUnicode_AS_UNICODE(object);
+ 	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
+ 	    if (*p<10)
+ 		ressize += 2+1+1;
+ 	    else if (*p<100)
+ 		ressize += 2+2+1;
+ 	    else if (*p<1000)
+ 		ressize += 2+3+1;
+ 	    else if (*p<10000)
+ 		ressize += 2+4+1;
+ 	    else if (*p<100000)
+ 		ressize += 2+5+1;
+ 	    else if (*p<1000000)
+ 		ressize += 2+6+1;
+ 	    else
+ 		ressize += 2+7+1;
+ 	}
+ 	/* allocate replacement */
+ 	res = PyUnicode_FromUnicode(NULL, ressize);
+ 	if (res == NULL) {
+ 	    Py_DECREF(object);
+ 	    return NULL;
+ 	}
+ 	/* generate replacement */
+ 	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
+ 	    p < startp+end; ++p) {
+ 	    Py_UNICODE c = *p;
+ 	    int digits;
+ 	    int base;
+ 	    *outp++ = '&';
+ 	    *outp++ = '#';
+ 	    if (*p<10) {
+ 		digits = 1;
+ 		base = 1;
+ 	    }
+ 	    else if (*p<100) {
+ 		digits = 2;
+ 		base = 10;
+ 	    }
+ 	    else if (*p<1000) {
+ 		digits = 3;
+ 		base = 100;
+ 	    }
+ 	    else if (*p<10000) {
+ 		digits = 4;
+ 		base = 1000;
+ 	    }
+ 	    else if (*p<100000) {
+ 		digits = 5;
+ 		base = 10000;
+ 	    }
+ 	    else if (*p<1000000) {
+ 		digits = 6;
+ 		base = 100000;
+ 	    }
+ 	    else {
+ 		digits = 7;
+ 		base = 1000000;
+ 	    }
+ 	    while (digits-->0) {
+ 		*outp++ = '0' + c/base;
+ 		c %= base;
+ 		base /= 10;
+ 	    }
+ 	    *outp++ = ';';
+ 	}
+ 	restuple = Py_BuildValue("(Oi)", res, end);
+ 	Py_DECREF(res);
+ 	Py_DECREF(object);
+ 	return restuple;
+     }
+     else {
+ 	wrong_exception_type(exc);
+ 	return NULL;
+     }
+ }
+ 
+ static Py_UNICODE hexdigits[] = {
+     '0', '1', '2', '3', '4', '5', '6', '7',
+     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+ };
+ 
+ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
+ {
+     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ 	PyObject *restuple;
+ 	PyObject *object;
+ 	int start;
+ 	int end;
+ 	PyObject *res;
+ 	Py_UNICODE *p;
+ 	Py_UNICODE *startp;
+ 	Py_UNICODE *outp;
+ 	int ressize;
+ 	if (!PyUnicodeEncodeError_GetStart(exc, &start))
+ 	    return NULL;
+ 	if (!PyUnicodeEncodeError_GetEnd(exc, &end))
+ 	    return NULL;
+ 	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+ 	    return NULL;
+ 	startp = PyUnicode_AS_UNICODE(object);
+ 	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
+ 	    if (*p >= 0x00010000)
+ 		ressize += 1+1+8;
+ 	    else if (*p >= 0x100) {
+ 		ressize += 1+1+4;
+ 	    }
+ 	    else
+ 		ressize += 1+1+2;
+ 	}
+ 	res = PyUnicode_FromUnicode(NULL, ressize);
+ 	if (res==NULL)
+ 	    return NULL;
+ 	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
+ 	    p < startp+end; ++p) {
+ 	    Py_UNICODE c = *p;
+ 	    *outp++ = '\\';
+ 	    if (c >= 0x00010000) {
+ 		*outp++ = 'U';
+ 		*outp++ = hexdigits[(c>>28)&0xf];
+ 		*outp++ = hexdigits[(c>>24)&0xf];
+ 		*outp++ = hexdigits[(c>>20)&0xf];
+ 		*outp++ = hexdigits[(c>>16)&0xf];
+ 		*outp++ = hexdigits[(c>>12)&0xf];
+ 		*outp++ = hexdigits[(c>>8)&0xf];
+ 	    }
+ 	    else if (c >= 0x100) {
+ 		*outp++ = 'u';
+ 		*outp++ = hexdigits[(c>>12)&0xf];
+ 		*outp++ = hexdigits[(c>>8)&0xf];
+ 	    }
+ 	    else
+ 		*outp++ = 'x';
+ 	    *outp++ = hexdigits[(c>>4)&0xf];
+ 	    *outp++ = hexdigits[c&0xf];
+ 	}
+ 
+ 	restuple = Py_BuildValue("(Oi)", res, end);
+ 	Py_DECREF(res);
+ 	Py_DECREF(object);
+ 	return restuple;
+     }
+     else {
+ 	wrong_exception_type(exc);
+ 	return NULL;
+     }
+ }
+ 
+ static PyObject *strict_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_StrictErrors(exc);
+ }
+ 
+ 
+ static PyObject *ignore_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_IgnoreErrors(exc);
+ }
+ 
+ 
+ static PyObject *replace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_ReplaceErrors(exc);
+ }
+ 
+ 
+ static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_XMLCharRefReplaceErrors(exc);
+ }
+ 
+ 
+ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
+ {
+     return PyCodec_BackslashReplaceErrors(exc);
+ }
+ 
+ 
  void _PyCodecRegistry_Init(void)
  {
+     static struct {
+ 	char *name;
+ 	PyMethodDef def;
+     } methods[] =
+     {
+ 	{
+ 	    "strict",
+ 	    {
+ 		"strict_errors",
+ 		strict_errors,
+ 		METH_O
+ 	    }
+ 	},
+ 	{
+ 	    "ignore",
+ 	    {
+ 		"ignore_errors",
+ 		ignore_errors,
+ 		METH_O
+ 	    }
+ 	},
+ 	{
+ 	    "replace",
+ 	    {
+ 		"replace_errors",
+ 		replace_errors,
+ 		METH_O
+ 	    }
+ 	},
+ 	{
+ 	    "xmlcharrefreplace",
+ 	    {
+ 		"xmlcharrefreplace_errors",
+ 		xmlcharrefreplace_errors,
+ 		METH_O
+ 	    }
+ 	},
+ 	{
+ 	    "backslashreplace",
+ 	    {
+ 		"backslashreplace_errors",
+ 		backslashreplace_errors,
+ 		METH_O
+ 	    }
+ 	}
+     };
      if (_PyCodec_SearchPath == NULL)
  	_PyCodec_SearchPath = PyList_New(0);
      if (_PyCodec_SearchCache == NULL)
  	_PyCodec_SearchCache = PyDict_New();
+     if (_PyCodec_ErrorRegistry == NULL) {
+ 	int i;
+ 	_PyCodec_ErrorRegistry = PyDict_New();
+ 
+ 	if (_PyCodec_ErrorRegistry) {
+ 	    for (i = 0; i < 5; ++i) {
+ 		PyObject *func = PyCFunction_New(&methods[i].def, NULL);
+ 		int res;
+ 		if (!func)
+ 		    Py_FatalError("can't initialize codec error registry");
+ 		res = PyCodec_RegisterError(methods[i].name, func);
+ 		Py_DECREF(func);
+ 		if (res)
+ 		    Py_FatalError("can't initialize codec error registry");
+ 	    }
+ 	}
+     }
      if (_PyCodec_SearchPath == NULL || 
  	_PyCodec_SearchCache == NULL)
  	Py_FatalError("can't initialize codec registry");
  }
  
***************
*** 437,442 ****
--- 834,841 ----
  {
      Py_XDECREF(_PyCodec_SearchPath);
      _PyCodec_SearchPath = NULL;
      Py_XDECREF(_PyCodec_SearchCache);
      _PyCodec_SearchCache = NULL;
+     Py_XDECREF(_PyCodec_ErrorRegistry);
+     _PyCodec_ErrorRegistry = NULL;
  }
Index: Python/exceptions.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/exceptions.c,v
retrieving revision 1.32
diff -u -c -5 -r1.32 exceptions.c
*** Python/exceptions.c	13 Jun 2002 20:33:02 -0000	1.32
--- Python/exceptions.c	24 Jul 2002 18:52:25 -0000
***************
*** 98,107 ****
--- 98,111 ----
   |    |    +-- FloatingPointError\n\
   |    |\n\
   |    +-- ValueError\n\
   |    |    |\n\
   |    |    +-- UnicodeError\n\
+  |    |        |\n\
+  |    |        +-- UnicodeEncodeError\n\
+  |    |        +-- UnicodeDecodeError\n\
+  |    |        +-- UnicodeTranslateError\n\
   |    |\n\
   |    +-- ReferenceError\n\
   |    +-- SystemError\n\
   |    +-- MemoryError\n\
   |\n\
***************
*** 837,846 ****
--- 841,1434 ----
      {"__str__",  SyntaxError__str__, METH_VARARGS},
      {NULL, NULL}
  };
  
  
+ static
+ int get_int(PyObject *exc, const char *name, int *value)
+ {
+     PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+ 
+     if (!attr)
+ 	return 0;
+     if (!PyInt_Check(attr)) {
+ 	PyErr_Format(PyExc_TypeError, "%s attribute must be int", name);
+ 	Py_DECREF(attr);
+ 	return 0;
+     }
+     *value = PyInt_AS_LONG(attr);
+     Py_DECREF(attr);
+     return -1;
+ }
+ 
+ 
+ static
+ int set_int(PyObject *exc, const char *name, int value)
+ {
+     PyObject *obj = PyInt_FromLong(value);
+     int result;
+ 
+     if (!obj)
+ 	return 0;
+     result = PyObject_SetAttrString(exc, (char *)name, obj);
+     Py_DECREF(obj);
+     return result ? 0 : -1;
+ }
+ 
+ 
+ static
+ PyObject *get_string(PyObject *exc, const char *name)
+ {
+     PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+ 
+     if (!attr)
+ 	return NULL;
+     if (!PyString_Check(attr)) {
+ 	PyErr_Format(PyExc_TypeError, "%s attribute must be str", name);
+ 	Py_DECREF(attr);
+ 	return NULL;
+     }
+     return attr;
+ }
+ 
+ 
+ static
+ int set_string(PyObject *exc, const char *name, const char *value)
+ {
+     PyObject *obj = PyString_FromString(value);
+     int result;
+ 
+     if (!obj)
+ 	return 0;
+     result = PyObject_SetAttrString(exc, (char *)name, obj);
+     Py_DECREF(obj);
+     return result ? 0 : -1;
+ }
+ 
+ 
+ static
+ PyObject *get_unicode(PyObject *exc, const char *name)
+ {
+     PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+ 
+     if (!attr)
+ 	return NULL;
+     if (!PyUnicode_Check(attr)) {
+ 	PyErr_Format(PyExc_TypeError, "%s attribute must be unicode", name);
+ 	Py_DECREF(attr);
+ 	return NULL;
+     }
+     return attr;
+ }
+ 
+ PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc)
+ {
+     return get_string(exc, "encoding");
+ }
+ 
+ PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *exc)
+ {
+     return get_string(exc, "encoding");
+ }
+ 
+ PyObject * PyUnicodeTranslateError_GetEncoding(PyObject *exc)
+ {
+     return get_string(exc, "encoding");
+ }
+ 
+ PyObject *PyUnicodeEncodeError_GetObject(PyObject *exc)
+ {
+     return get_unicode(exc, "object");
+ }
+ 
+ PyObject *PyUnicodeDecodeError_GetObject(PyObject *exc)
+ {
+     return get_string(exc, "object");
+ }
+ 
+ PyObject *PyUnicodeTranslateError_GetObject(PyObject *exc)
+ {
+     return get_unicode(exc, "object");
+ }
+ 
+ int PyUnicodeEncodeError_GetStart(PyObject *exc, int *start)
+ {
+     if (get_int(exc, "start", start)) {
+ 	PyObject *object = PyUnicodeEncodeError_GetObject(exc);
+ 	int size;
+ 	if (!object)
+ 	    return 0;
+ 	size = PyUnicode_GET_SIZE(object);
+ 	if (*start<0)
+ 	    *start = 0;
+ 	if (*start>=size)
+ 	    *start = size-1;
+ 	Py_DECREF(object);
+ 	return -1;
+     }
+     return 0;
+ }
+ 
+ 
+ int PyUnicodeDecodeError_GetStart(PyObject *exc, int *start)
+ {
+     if (get_int(exc, "start", start)) {
+ 	PyObject *object = PyUnicodeDecodeError_GetObject(exc);
+ 	int size;
+ 	if (!object)
+ 	    return 0;
+ 	size = PyString_GET_SIZE(object);
+ 	if (*start<0)
+ 	    *start = 0;
+ 	if (*start>=size)
+ 	    *start = size-1;
+ 	Py_DECREF(object);
+ 	return -1;
+     }
+     return 0;
+ }
+ 
+ 
+ int PyUnicodeTranslateError_GetStart(PyObject *exc, int *start)
+ {
+     return PyUnicodeEncodeError_GetStart(exc, start);
+ }
+ 
+ 
+ int PyUnicodeEncodeError_SetStart(PyObject *exc, int start)
+ {
+     return set_int(exc, "start", start);
+ }
+ 
+ 
+ int PyUnicodeDecodeError_SetStart(PyObject *exc, int start)
+ {
+     return set_int(exc, "start", start);
+ }
+ 
+ 
+ int PyUnicodeTranslateError_SetStart(PyObject *exc, int start)
+ {
+     return set_int(exc, "start", start);
+ }
+ 
+ 
+ int PyUnicodeEncodeError_GetEnd(PyObject *exc, int *end)
+ {
+     if (get_int(exc, "end", end)) {
+ 	PyObject *object = PyUnicodeEncodeError_GetObject(exc);
+ 	int size;
+ 	if (!object)
+ 	    return 0;
+ 	size = PyUnicode_GET_SIZE(object);
+ 	if (*end<1)
+ 	    *end = 1;
+ 	if (*end>size)
+ 	    *end = size;
+ 	Py_DECREF(object);
+ 	return -1;
+     }
+     return 0;
+ }
+ 
+ 
+ int PyUnicodeDecodeError_GetEnd(PyObject *exc, int *end)
+ {
+     if (get_int(exc, "end", end)) {
+ 	PyObject *object = PyUnicodeDecodeError_GetObject(exc);
+ 	int size;
+ 	if (!object)
+ 	    return 0;
+ 	size = PyString_GET_SIZE(object);
+ 	if (*end<1)
+ 	    *end = 1;
+ 	if (*end>size)
+ 	    *end = size;
+ 	Py_DECREF(object);
+ 	return -1;
+     }
+     return 0;
+ }
+ 
+ 
+ int PyUnicodeTranslateError_GetEnd(PyObject *exc, int *start)
+ {
+     return PyUnicodeEncodeError_GetEnd(exc, start);
+ }
+ 
+ 
+ int PyUnicodeEncodeError_SetEnd(PyObject *exc, int end)
+ {
+     return set_int(exc, "end", end);
+ }
+ 
+ 
+ int PyUnicodeDecodeError_SetEnd(PyObject *exc, int end)
+ {
+     return set_int(exc, "end", end);
+ }
+ 
+ 
+ int PyUnicodeTranslateError_SetEnd(PyObject *exc, int end)
+ {
+     return set_int(exc, "end", end);
+ }
+ 
+ 
+ PyObject *PyUnicodeEncodeError_GetReason(PyObject *exc)
+ {
+     return get_string(exc, "reason");
+ }
+ 
+ 
+ PyObject *PyUnicodeDecodeError_GetReason(PyObject *exc)
+ {
+     return get_string(exc, "reason");
+ }
+ 
+ 
+ PyObject *PyUnicodeTranslateError_GetReason(PyObject *exc)
+ {
+     return get_string(exc, "reason");
+ }
+ 
+ 
+ int PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
+ {
+     return set_string(exc, "reason", reason);
+ }
+ 
+ 
+ int PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
+ {
+     return set_string(exc, "reason", reason);
+ }
+ 
+ 
+ int PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
+ {
+     return set_string(exc, "reason", reason);
+ }
+ 
+ 
+ static PyObject *
+ UnicodeError__init__(PyObject *self, PyObject *args, PyTypeObject *objecttype)
+ {
+     PyObject *rtnval = NULL;
+     PyObject *encoding;
+     PyObject *object;
+     PyObject *start;
+     PyObject *end;
+     PyObject *reason;
+ 
+     if (!(self = get_self(args)))
+ 	return NULL;
+ 
+     if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
+ 	return NULL;
+ 
+     if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
+ 	&PyString_Type, &encoding,
+ 	objecttype, &object,
+ 	&PyInt_Type, &start,
+ 	&PyInt_Type, &end,
+ 	&PyString_Type, &reason))
+ 	return NULL;
+ 
+     if (PyObject_SetAttrString(self, "args", args))
+ 	goto finally;
+ 
+     if (PyObject_SetAttrString(self, "encoding", encoding))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "object", object))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "start", start))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "end", end))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "reason", reason))
+ 	goto finally;
+ 
+     Py_INCREF(Py_None);
+     rtnval = Py_None;
+ 
+   finally:
+     Py_DECREF(args);
+     return rtnval;
+ }
+ 
+ 
+ static PyObject *
+ UnicodeEncodeError__init__(PyObject *self, PyObject *args)
+ {
+     return UnicodeError__init__(self, args, &PyUnicode_Type);
+ }
+ 
+ static PyObject *
+ UnicodeEncodeError__str__(PyObject *self, PyObject *arg)
+ {
+     PyObject *encodingObj = NULL;
+     PyObject *objectObj = NULL;
+     int length;
+     int start;
+     int end;
+     PyObject *reasonObj = NULL;
+     char buffer[1000];
+     PyObject *result = NULL;
+ 
+     self = arg;
+ 
+     if (!(encodingObj = PyUnicodeEncodeError_GetEncoding(self)))
+ 	goto error;
+ 
+     if (!(objectObj = PyUnicodeEncodeError_GetObject(self)))
+ 	goto error;
+ 
+     length = PyUnicode_GET_SIZE(objectObj);
+ 
+     if (!PyUnicodeEncodeError_GetStart(self, &start))
+ 	goto error;
+ 
+     if (!PyUnicodeEncodeError_GetEnd(self, &end))
+ 	goto error;
+ 
+     if (!(reasonObj = PyUnicodeEncodeError_GetReason(self)))
+ 	goto error;
+ 
+     if (end==start+1) {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "'%.400s' codec can't encode character '\\u%x' in position %d: %.400s",
+ 	    PyString_AS_STRING(encodingObj),
+ 	    (int)PyUnicode_AS_UNICODE(objectObj)[start],
+ 	    start,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     else {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "'%.400s' codec can't encode characters in position %d-%d: %.400s",
+ 	    PyString_AS_STRING(encodingObj),
+ 	    start,
+ 	    end-1,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     result = PyString_FromString(buffer);
+ 
+ error:
+     Py_XDECREF(reasonObj);
+     Py_XDECREF(objectObj);
+     Py_XDECREF(encodingObj);
+     return result;
+ }
+ 
+ static PyMethodDef UnicodeEncodeError_methods[] = {
+     {"__init__", UnicodeEncodeError__init__, METH_VARARGS},
+     {"__str__",  UnicodeEncodeError__str__, METH_O},
+     {NULL, NULL}
+ };
+ 
+ 
+ PyObject * PyUnicodeEncodeError_Create(
+ 	const char *encoding, const Py_UNICODE *object, int length,
+ 	int start, int end, const char *reason)
+ {
+     return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#iis",
+ 	encoding, object, length, start, end, reason);
+ }
+ 
+ 
+ static PyObject *
+ UnicodeDecodeError__init__(PyObject *self, PyObject *args)
+ {
+     return UnicodeError__init__(self, args, &PyString_Type);
+ }
+ 
+ static PyObject *
+ UnicodeDecodeError__str__(PyObject *self, PyObject *arg)
+ {
+     PyObject *encodingObj = NULL;
+     PyObject *objectObj = NULL;
+     int length;
+     int start;
+     int end;
+     PyObject *reasonObj = NULL;
+     char buffer[1000];
+     PyObject *result = NULL;
+ 
+     self = arg;
+ 
+     if (!(encodingObj = PyUnicodeDecodeError_GetEncoding(self)))
+ 	goto error;
+ 
+     if (!(objectObj = PyUnicodeDecodeError_GetObject(self)))
+ 	goto error;
+ 
+     length = PyString_GET_SIZE(objectObj);
+ 
+     if (!PyUnicodeDecodeError_GetStart(self, &start))
+ 	goto error;
+ 
+     if (!PyUnicodeDecodeError_GetEnd(self, &end))
+ 	goto error;
+ 
+     if (!(reasonObj = PyUnicodeDecodeError_GetReason(self)))
+ 	goto error;
+ 
+     if (end==start+1) {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
+ 	    PyString_AS_STRING(encodingObj),
+ 	    ((int)PyString_AS_STRING(objectObj)[start])&0xff,
+ 	    start,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     else {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "'%.400s' codec can't decode bytes in position %d-%d: %.400s",
+ 	    PyString_AS_STRING(encodingObj),
+ 	    start,
+ 	    end-1,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     result = PyString_FromString(buffer);
+ 
+ error:
+     Py_XDECREF(reasonObj);
+     Py_XDECREF(objectObj);
+     Py_XDECREF(encodingObj);
+     return result;
+ }
+ 
+ static PyMethodDef UnicodeDecodeError_methods[] = {
+     {"__init__", UnicodeDecodeError__init__, METH_VARARGS},
+     {"__str__",  UnicodeDecodeError__str__, METH_O},
+     {NULL, NULL}
+ };
+ 
+ 
+ PyObject * PyUnicodeDecodeError_Create(
+ 	const char *encoding, const char *object, int length,
+ 	int start, int end, const char *reason)
+ {
+     return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#iis",
+ 	encoding, object, length, start, end, reason);
+ }
+ 
+ 
+ static PyObject *
+ UnicodeTranslateError__init__(PyObject *self, PyObject *args)
+ {
+     PyObject *rtnval = NULL;
+     PyObject *object;
+     PyObject *start;
+     PyObject *end;
+     PyObject *reason;
+ 
+     if (!(self = get_self(args)))
+ 	return NULL;
+ 
+     if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
+ 	return NULL;
+ 
+     if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ 	&PyUnicode_Type, &object,
+ 	&PyInt_Type, &start,
+ 	&PyInt_Type, &end,
+ 	&PyString_Type, &reason))
+ 	goto finally;
+ 
+     if (PyObject_SetAttrString(self, "args", args))
+ 	goto finally;
+ 
+     if (PyObject_SetAttrString(self, "object", object))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "start", start))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "end", end))
+ 	goto finally;
+     if (PyObject_SetAttrString(self, "reason", reason))
+ 	goto finally;
+ 
+     Py_INCREF(Py_None);
+     rtnval = Py_None;
+ 
+   finally:
+     Py_DECREF(args);
+     return rtnval;
+ }
+ 
+ 
+ static PyObject *
+ UnicodeTranslateError__str__(PyObject *self, PyObject *arg)
+ {
+     PyObject *objectObj = NULL;
+     int length;
+     int start;
+     int end;
+     PyObject *reasonObj = NULL;
+     char buffer[1000];
+     PyObject *result = NULL;
+ 
+     self = arg;
+ 
+     if (!(objectObj = PyUnicodeTranslateError_GetObject(self)))
+ 	goto error;
+ 
+     length = PyUnicode_GET_SIZE(objectObj);
+ 
+     if (!PyUnicodeTranslateError_GetStart(self, &start))
+ 	goto error;
+ 
+     if (!PyUnicodeTranslateError_GetEnd(self, &end))
+ 	goto error;
+ 
+     if (!(reasonObj = PyUnicodeTranslateError_GetReason(self)))
+ 	goto error;
+ 
+     if (end==start+1) {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "can't translate character '\\u%x' in position %d: %.400s",
+ 	    (int)PyUnicode_AS_UNICODE(objectObj)[start],
+ 	    start,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     else {
+ 	PyOS_snprintf(buffer, sizeof(buffer),
+ 	    "can't translate characters in position %d-%d: %.400s",
+ 	    start,
+ 	    end-1,
+ 	    PyString_AS_STRING(reasonObj)
+ 	);
+     }
+     result = PyString_FromString(buffer);
+ 
+ error:
+     Py_XDECREF(reasonObj);
+     Py_XDECREF(objectObj);
+     return result;
+ }
+ 
+ static PyMethodDef UnicodeTranslateError_methods[] = {
+     {"__init__", UnicodeTranslateError__init__, METH_VARARGS},
+     {"__str__",  UnicodeTranslateError__str__, METH_O},
+     {NULL, NULL}
+ };
+ 
+ 
+ PyObject * PyUnicodeTranslateError_Create(
+ 	const Py_UNICODE *object, int length,
+ 	int start, int end, const char *reason)
+ {
+     return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#iis",
+ 	object, length, start, end, reason);
+ }
+ 
+ 
  
  /* Exception doc strings */
  
  PyDoc_STRVAR(AssertionError__doc__, "Assertion failed.");
  
***************
*** 862,871 ****
--- 1450,1465 ----
  PyDoc_STRVAR(ValueError__doc__,
  "Inappropriate argument value (of correct type).");
  
  PyDoc_STRVAR(UnicodeError__doc__, "Unicode related error.");
  
+ PyDoc_STRVAR(UnicodeEncodeError__doc__, "Unicode encoding error.");
+ 
+ PyDoc_STRVAR(UnicodeDecodeError__doc__, "Unicode decoding error.");
+ 
+ PyDoc_STRVAR(UnicodeTranslateError__doc__, "Unicode translation error.");
+ 
  PyDoc_STRVAR(SystemError__doc__,
  "Internal error in the Python interpreter.\n\
  \n\
  Please report this to the Python maintainer, along with the traceback,\n\
  the Python version, and the hardware/OS platform and version.");
***************
*** 942,951 ****
--- 1536,1548 ----
  PyObject *PyExc_ReferenceError;
  PyObject *PyExc_SystemError;
  PyObject *PyExc_SystemExit;
  PyObject *PyExc_UnboundLocalError;
  PyObject *PyExc_UnicodeError;
+ PyObject *PyExc_UnicodeEncodeError;
+ PyObject *PyExc_UnicodeDecodeError;
+ PyObject *PyExc_UnicodeTranslateError;
  PyObject *PyExc_TypeError;
  PyObject *PyExc_ValueError;
  PyObject *PyExc_ZeroDivisionError;
  #ifdef MS_WINDOWS
  PyObject *PyExc_WindowsError;
***************
*** 1027,1036 ****
--- 1624,1639 ----
    ZeroDivisionError__doc__},
   {"FloatingPointError", &PyExc_FloatingPointError, &PyExc_ArithmeticError,
    FloatingPointError__doc__},
   {"ValueError",   &PyExc_ValueError,  0, ValueError__doc__},
   {"UnicodeError", &PyExc_UnicodeError, &PyExc_ValueError, UnicodeError__doc__},
+  {"UnicodeEncodeError", &PyExc_UnicodeEncodeError, &PyExc_UnicodeError,
+   UnicodeEncodeError__doc__, UnicodeEncodeError_methods},
+  {"UnicodeDecodeError", &PyExc_UnicodeDecodeError, &PyExc_UnicodeError,
+   UnicodeDecodeError__doc__, UnicodeDecodeError_methods},
+  {"UnicodeTranslateError", &PyExc_UnicodeTranslateError, &PyExc_UnicodeError,
+   UnicodeTranslateError__doc__, UnicodeTranslateError_methods},
   {"ReferenceError",  &PyExc_ReferenceError, 0, ReferenceError__doc__},
   {"SystemError",  &PyExc_SystemError, 0, SystemError__doc__},
   {"MemoryError",  &PyExc_MemoryError, 0, MemoryError__doc__},
   /* Warning categories */
   {"Warning", &PyExc_Warning, &PyExc_Exception, Warning__doc__},