diff -r 90d4153728f6 Include/codecs.h
--- a/Include/codecs.h	Thu Nov 21 23:01:59 2013 -0800
+++ b/Include/codecs.h	Fri Nov 22 21:15:49 2013 +1000
@@ -94,6 +94,31 @@
        const char *errors
        );
 
+#ifndef PY_LIMITED_API
+/* Text codec specific encoding and decoding API.
+
+   Checks the encoding against a blacklist of known non-Unicode codecs
+   before attempting the operation.
+
+   Private for now, we'll likely do something public for 3.5
+
+ */
+
+PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
+       PyObject *object,
+       const char *encoding,
+       const char *errors
+       );
+
+PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
+       PyObject *object,
+       const char *encoding,
+       const char *errors
+       );
+#endif
+
+
+
 /* --- Codec Lookup APIs -------------------------------------------------- 
 
    All APIs return a codec object with incremented refcount and are
diff -r 90d4153728f6 Lib/codecs.py
--- a/Lib/codecs.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/codecs.py	Fri Nov 22 21:15:49 2013 +1000
@@ -86,6 +86,16 @@
         self.streamreader = streamreader
         return self
 
+    _is_text_encoding = True # Assume codecs are text encodings by default
+
+    @classmethod
+    def _declare_transform(cls, *args, **kwds):
+        # Private helper to declare binary and text transforms
+        # See http://bugs.python.org/issue19619
+        self = cls(*args, **kwds)
+        self._is_text_encoding = False
+        return self
+
     def __repr__(self):
         return "<%s.%s object for encoding %s at 0x%x>" % \
                 (self.__class__.__module__, self.__class__.__name__,
diff -r 90d4153728f6 Lib/encodings/base64_codec.py
--- a/Lib/encodings/base64_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/base64_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -44,7 +44,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='base64',
         encode=base64_encode,
         decode=base64_decode,
diff -r 90d4153728f6 Lib/encodings/bz2_codec.py
--- a/Lib/encodings/bz2_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/bz2_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -66,7 +66,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name="bz2",
         encode=bz2_encode,
         decode=bz2_decode,
diff -r 90d4153728f6 Lib/encodings/hex_codec.py
--- a/Lib/encodings/hex_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/hex_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -44,7 +44,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='hex',
         encode=hex_encode,
         decode=hex_decode,
diff -r 90d4153728f6 Lib/encodings/quopri_codec.py
--- a/Lib/encodings/quopri_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/quopri_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -45,7 +45,7 @@
 # encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='quopri',
         encode=quopri_encode,
         decode=quopri_decode,
diff -r 90d4153728f6 Lib/encodings/rot_13.py
--- a/Lib/encodings/rot_13.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/rot_13.py	Fri Nov 22 21:15:49 2013 +1000
@@ -35,7 +35,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='rot-13',
         encode=Codec().encode,
         decode=Codec().decode,
diff -r 90d4153728f6 Lib/encodings/uu_codec.py
--- a/Lib/encodings/uu_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/uu_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -88,7 +88,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='uu',
         encode=uu_encode,
         decode=uu_decode,
diff -r 90d4153728f6 Lib/encodings/zlib_codec.py
--- a/Lib/encodings/zlib_codec.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/encodings/zlib_codec.py	Fri Nov 22 21:15:49 2013 +1000
@@ -66,7 +66,7 @@
 ### encodings module API
 
 def getregentry():
-    return codecs.CodecInfo(
+    return codecs.CodecInfo._declare_transform(
         name='zlib',
         encode=zlib_encode,
         decode=zlib_decode,
diff -r 90d4153728f6 Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py	Thu Nov 21 23:01:59 2013 -0800
+++ b/Lib/test/test_codecs.py	Fri Nov 22 21:15:49 2013 +1000
@@ -6,6 +6,7 @@
 import sys
 import unittest
 import warnings
+import encodings
 
 from test import support
 
@@ -2381,67 +2382,68 @@
                 view_decoded = codecs.decode(view, encoding)
                 self.assertEqual(view_decoded, data)
 
-    def test_type_error_for_text_input(self):
+    def test_text_to_binary_blacklists_binary_transforms(self):
         # Check binary -> binary codecs give a good error for str input
         bad_input = "bad input type"
         for encoding in bytes_transform_encodings:
             with self.subTest(encoding=encoding):
-                msg = "^encoding with '{}' codec failed".format(encoding)
-                with self.assertRaisesRegex(TypeError, msg) as failure:
+                fmt = ( "{!r} is not a text encoding; "
+                        "use codecs.encode\(\) to handle arbitrary codecs")
+                msg = fmt.format(encoding)
+                with self.assertRaisesRegex(LookupError, msg) as failure:
                     bad_input.encode(encoding)
-                self.assertTrue(isinstance(failure.exception.__cause__,
-                                           TypeError))
+                self.assertIsNone(failure.exception.__cause__)
 
-    def test_type_error_for_binary_input(self):
-        # Check str -> str codec gives a good error for binary input
-        for bad_input in (b"immutable", bytearray(b"mutable")):
-            with self.subTest(bad_input=bad_input):
-                msg = "^decoding with 'rot_13' codec failed"
-                with self.assertRaisesRegex(AttributeError, msg) as failure:
-                    bad_input.decode("rot_13")
-                self.assertTrue(isinstance(failure.exception.__cause__,
-                                           AttributeError))
+    def test_text_to_binary_blacklists_text_transforms(self):
+        # Check str.encode gives a good error message for str -> str codecs
+        msg = (r"^'rot_13' is not a text encoding; "
+                "use codecs.encode\(\) to handle arbitrary codecs")
+        with self.assertRaisesRegex(LookupError, msg):
+            "just an example message".encode("rot_13")
 
-    def test_custom_zlib_error_is_wrapped(self):
-        # Check zlib codec gives a good error for malformed input
-        msg = "^decoding with 'zlib_codec' codec failed"
-        with self.assertRaisesRegex(Exception, msg) as failure:
-            b"hello".decode("zlib_codec")
-        self.assertTrue(isinstance(failure.exception.__cause__,
-                                   type(failure.exception)))
-
-    def test_custom_hex_error_is_wrapped(self):
-        # Check hex codec gives a good error for malformed input
-        msg = "^decoding with 'hex_codec' codec failed"
-        with self.assertRaisesRegex(Exception, msg) as failure:
-            b"hello".decode("hex_codec")
-        self.assertTrue(isinstance(failure.exception.__cause__,
-                                   type(failure.exception)))
-
-    # Unfortunately, the bz2 module throws OSError, which the codec
-    # machinery currently can't wrap :(
-
-    def test_bad_decoding_output_type(self):
+    def test_binary_to_text_blacklists_binary_transforms(self):
         # Check bytes.decode and bytearray.decode give a good error
         # message for binary -> binary codecs
         data = b"encode first to ensure we meet any format restrictions"
         for encoding in bytes_transform_encodings:
             with self.subTest(encoding=encoding):
                 encoded_data = codecs.encode(data, encoding)
-                fmt = ("'{}' decoder returned 'bytes' instead of 'str'; "
-                       "use codecs.decode\(\) to decode to arbitrary types")
+                fmt = (r"{!r} is not a text encoding; "
+                        "use codecs.decode\(\) to handle arbitrary codecs")
                 msg = fmt.format(encoding)
-                with self.assertRaisesRegex(TypeError, msg):
+                with self.assertRaisesRegex(LookupError, msg):
                     encoded_data.decode(encoding)
-                with self.assertRaisesRegex(TypeError, msg):
+                with self.assertRaisesRegex(LookupError, msg):
                     bytearray(encoded_data).decode(encoding)
 
-    def test_bad_encoding_output_type(self):
-        # Check str.encode gives a good error message for str -> str codecs
-        msg = ("'rot_13' encoder returned 'str' instead of 'bytes'; "
-               "use codecs.encode\(\) to encode to arbitrary types")
-        with self.assertRaisesRegex(TypeError, msg):
-            "just an example message".encode("rot_13")
+    def test_binary_to_text_blacklists_text_transforms(self):
+        # Check str -> str codec gives a good error for binary input
+        for bad_input in (b"immutable", bytearray(b"mutable")):
+            with self.subTest(bad_input=bad_input):
+                msg = (r"^'rot_13' is not a text encoding; "
+                        "use codecs.decode\(\) to handle arbitrary codecs")
+                with self.assertRaisesRegex(LookupError, msg) as failure:
+                    bad_input.decode("rot_13")
+                self.assertIsNone(failure.exception.__cause__)
+
+    def test_custom_zlib_error_is_wrapped(self):
+        # Check zlib codec gives a good error for malformed input
+        msg = "^decoding with 'zlib_codec' codec failed"
+        with self.assertRaisesRegex(Exception, msg) as failure:
+            codecs.decode(b"hello", "zlib_codec")
+        self.assertIsInstance(failure.exception.__cause__,
+                                                type(failure.exception))
+
+    def test_custom_hex_error_is_wrapped(self):
+        # Check hex codec gives a good error for malformed input
+        msg = "^decoding with 'hex_codec' codec failed"
+        with self.assertRaisesRegex(Exception, msg) as failure:
+            codecs.decode(b"hello", "hex_codec")
+        self.assertIsInstance(failure.exception.__cause__,
+                                                type(failure.exception))
+
+    # Unfortunately, the bz2 module throws OSError, which the codec
+    # machinery currently can't wrap :(
 
 
 # The codec system tries to wrap exceptions in order to ensure the error
@@ -2466,27 +2468,27 @@
         # case finishes by using the test case repr as the codec name
         # The codecs module normalizes codec names, although this doesn't
         # appear to be formally documented...
-        self.codec_name = repr(self).lower().replace(" ", "-")
+        self.codec_name = encodings.normalize_encoding(repr(self)).lower()
 
     def tearDown(self):
         _TEST_CODECS.pop(self.codec_name, None)
 
-    def set_codec(self, obj_to_raise):
-        def raise_obj(*args, **kwds):
-            raise obj_to_raise
-        codec_info = codecs.CodecInfo(raise_obj, raise_obj,
+    def set_codec(self, encode, decode):
+        codec_info = codecs.CodecInfo(encode, decode,
                                       name=self.codec_name)
         _TEST_CODECS[self.codec_name] = codec_info
 
     @contextlib.contextmanager
     def assertWrapped(self, operation, exc_type, msg):
-        full_msg = "{} with '{}' codec failed \({}: {}\)".format(
+        full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
                   operation, self.codec_name, exc_type.__name__, msg)
         with self.assertRaisesRegex(exc_type, full_msg) as caught:
             yield caught
 
     def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
-        self.set_codec(obj_to_raise)
+        def raise_obj(*args, **kwds):
+            raise obj_to_raise
+        self.set_codec(raise_obj, raise_obj)
         with self.assertWrapped("encoding", exc_type, msg):
             "str_input".encode(self.codec_name)
         with self.assertWrapped("encoding", exc_type, msg):
@@ -2515,23 +2517,17 @@
             pass
         self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
 
-    @contextlib.contextmanager
-    def assertNotWrapped(self, operation, exc_type, msg_re, msg=None):
-        if msg is None:
-            msg = msg_re
-        with self.assertRaisesRegex(exc_type, msg) as caught:
-            yield caught
-        self.assertEqual(str(caught.exception), msg)
-
-    def check_not_wrapped(self, obj_to_raise, msg_re, msg=None):
-        self.set_codec(obj_to_raise)
-        with self.assertNotWrapped("encoding", RuntimeError, msg_re, msg):
+    def check_not_wrapped(self, obj_to_raise, msg):
+        def raise_obj(*args, **kwds):
+            raise obj_to_raise
+        self.set_codec(raise_obj, raise_obj)
+        with self.assertRaisesRegex(RuntimeError, msg):
             "str input".encode(self.codec_name)
-        with self.assertNotWrapped("encoding", RuntimeError, msg_re, msg):
+        with self.assertRaisesRegex(RuntimeError, msg):
             codecs.encode("str input", self.codec_name)
-        with self.assertNotWrapped("decoding", RuntimeError, msg_re, msg):
+        with self.assertRaisesRegex(RuntimeError, msg):
             b"bytes input".decode(self.codec_name)
-        with self.assertNotWrapped("decoding", RuntimeError, msg_re, msg):
+        with self.assertRaisesRegex(RuntimeError, msg):
             codecs.decode(b"bytes input", self.codec_name)
 
     def test_init_override_is_not_wrapped(self):
@@ -2550,29 +2546,56 @@
         msg = "This should NOT be wrapped"
         exc = RuntimeError(msg)
         exc.attr = 1
-        self.check_not_wrapped(exc, msg)
+        self.check_not_wrapped(exc, "^{}$".format(msg))
 
     def test_non_str_arg_is_not_wrapped(self):
         self.check_not_wrapped(RuntimeError(1), "1")
 
     def test_multiple_args_is_not_wrapped(self):
-        msg_re = "\('a', 'b', 'c'\)"
-        msg = "('a', 'b', 'c')"
-        self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re, msg)
+        msg_re = r"^\('a', 'b', 'c'\)$"
+        self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
 
     # http://bugs.python.org/issue19609
     def test_codec_lookup_failure_not_wrapped(self):
-        msg = "unknown encoding: %s" % self.codec_name
+        msg = "^unknown encoding: {}$".format(self.codec_name)
         # The initial codec lookup should not be wrapped
-        with self.assertNotWrapped("encoding", LookupError, msg):
+        with self.assertRaisesRegex(LookupError, msg):
             "str input".encode(self.codec_name)
-        with self.assertNotWrapped("encoding", LookupError, msg):
+        with self.assertRaisesRegex(LookupError, msg):
             codecs.encode("str input", self.codec_name)
-        with self.assertNotWrapped("decoding", LookupError, msg):
+        with self.assertRaisesRegex(LookupError, msg):
             b"bytes input".decode(self.codec_name)
-        with self.assertNotWrapped("decoding", LookupError, msg):
+        with self.assertRaisesRegex(LookupError, msg):
             codecs.decode(b"bytes input", self.codec_name)
 
+    def test_unflagged_non_text_codec_handling(self):
+        # The stdlib non-text codecs are now marked so they're
+        # pre-emptively skipped by the text model related methods
+        # However, third party codecs won't be flagged, so we still make
+        # sure the case where an inappropriate output type is produced is
+        # handled appropriately
+        def encode_to_str(*args, **kwds):
+            return "not bytes!", 0
+        def decode_to_bytes(*args, **kwds):
+            return b"not str!", 0
+        self.set_codec(encode_to_str, decode_to_bytes)
+        # No input or output type checks on the codecs module functions
+        encoded = codecs.encode(None, self.codec_name)
+        self.assertEqual(encoded, "not bytes!")
+        decoded = codecs.decode(None, self.codec_name)
+        self.assertEqual(decoded, b"not str!")
+        # Text model methods should complain
+        fmt = (r"^{!r} encoder returned 'str' instead of 'bytes'; "
+                "use codecs.encode\(\) to encode to arbitrary types$")
+        msg = fmt.format(self.codec_name)
+        with self.assertRaisesRegex(TypeError, msg):
+            "str_input".encode(self.codec_name)
+        fmt = (r"^{!r} decoder returned 'bytes' instead of 'str'; "
+                "use codecs.decode\(\) to decode to arbitrary types$")
+        msg = fmt.format(self.codec_name)
+        with self.assertRaisesRegex(TypeError, msg):
+            b"bytes input".decode(self.codec_name)
+
 
 
 @unittest.skipUnless(sys.platform == 'win32',
diff -r 90d4153728f6 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Thu Nov 21 23:01:59 2013 -0800
+++ b/Objects/unicodeobject.c	Fri Nov 22 21:15:49 2013 +1000
@@ -3044,7 +3044,7 @@
     buffer = PyMemoryView_FromBuffer(&info);
     if (buffer == NULL)
         goto onError;
-    unicode = PyCodec_Decode(buffer, encoding, errors);
+    unicode = _PyCodec_DecodeText(buffer, encoding, errors);
     if (unicode == NULL)
         goto onError;
     if (!PyUnicode_Check(unicode)) {
@@ -3410,7 +3410,7 @@
     }
 
     /* Encode via the codec registry */
-    v = PyCodec_Encode(unicode, encoding, errors);
+    v = _PyCodec_EncodeText(unicode, encoding, errors);
     if (v == NULL)
         return NULL;
 
diff -r 90d4153728f6 Python/codecs.c
--- a/Python/codecs.c	Thu Nov 21 23:01:59 2013 -0800
+++ b/Python/codecs.c	Fri Nov 22 21:15:49 2013 +1000
@@ -353,18 +353,15 @@
 
    errors is passed to the encoder factory as argument if non-NULL. */
 
-PyObject *PyCodec_Encode(PyObject *object,
-                         const char *encoding,
-                         const char *errors)
+static PyObject *
+_PyCodec_EncodeInternal(PyObject *object,
+                        PyObject *encoder,
+                        const char *encoding,
+                        const char *errors)
 {
-    PyObject *encoder = NULL;
     PyObject *args = NULL, *result = NULL;
     PyObject *v = NULL;
 
-    encoder = PyCodec_Encoder(encoding);
-    if (encoder == NULL)
-        goto onError;
-
     args = args_tuple(object, errors);
     if (args == NULL)
         goto onError;
@@ -402,18 +399,15 @@
 
    errors is passed to the decoder factory as argument if non-NULL. */
 
-PyObject *PyCodec_Decode(PyObject *object,
-                         const char *encoding,
-                         const char *errors)
+static PyObject *
+_PyCodec_DecodeInternal(PyObject *object,
+                        PyObject *decoder,
+                        const char *encoding,
+                        const char *errors)
 {
-    PyObject *decoder = NULL;
     PyObject *args = NULL, *result = NULL;
     PyObject *v;
 
-    decoder = PyCodec_Decoder(encoding);
-    if (decoder == NULL)
-        goto onError;
-
     args = args_tuple(object, errors);
     if (args == NULL)
         goto onError;
@@ -445,6 +439,118 @@
     return NULL;
 }
 
+/* Generic encoding/decoding API */
+PyObject *PyCodec_Encode(PyObject *object,
+                         const char *encoding,
+                         const char *errors)
+{
+    PyObject *encoder;
+
+    encoder = PyCodec_Encoder(encoding);
+    if (encoder == NULL)
+        return NULL;
+
+    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *PyCodec_Decode(PyObject *object,
+                         const char *encoding,
+                         const char *errors)
+{
+    PyObject *decoder;
+
+    decoder = PyCodec_Decoder(encoding);
+    if (decoder == NULL)
+        return NULL;
+
+    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
+/* Text encoding/decoding API */
+static
+PyObject *codec_getitem_checked(const char *encoding,
+                                const char *operation_name,
+                                int index)
+{
+    _Py_IDENTIFIER(_is_text_encoding);
+    PyObject *codec;
+    PyObject *attr;
+    PyObject *v;
+    int is_text_codec;
+
+    codec = _PyCodec_Lookup(encoding);
+    if (codec == NULL)
+        return NULL;
+
+    /* Backwards compatibility: assume any raw tuple describes a text
+     * encoding, and the same for anything lacking the private
+     * attribute.
+     */
+    if (!PyTuple_CheckExact(codec)) {
+        attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
+        if (attr == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+                PyErr_Clear();
+            } else {
+                Py_DECREF(codec);
+                return NULL;
+            }
+        } else {
+            is_text_codec = PyObject_IsTrue(attr);
+            Py_DECREF(attr);
+            if (!is_text_codec) {
+                Py_DECREF(codec);
+                PyErr_Format(PyExc_LookupError,
+                             "'%.400s' is not a text encoding; "
+                             "use codecs.%s() to handle arbitrary codecs",
+                             encoding, operation_name);
+                return NULL;
+            }
+        }
+    }
+
+    v = PyTuple_GET_ITEM(codec, index);
+    Py_DECREF(codec);
+    Py_INCREF(v);
+    return v;
+}
+
+static PyObject * _PyCodec_TextEncoder(const char *encoding)
+{
+    return codec_getitem_checked(encoding, "encode", 0);
+}
+
+static PyObject * _PyCodec_TextDecoder(const char *encoding)
+{
+    return codec_getitem_checked(encoding, "decode", 1);
+}
+
+PyObject *_PyCodec_EncodeText(PyObject *object,
+                              const char *encoding,
+                              const char *errors)
+{
+    PyObject *encoder;
+
+    encoder = _PyCodec_TextEncoder(encoding);
+    if (encoder == NULL)
+        return NULL;
+
+    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *_PyCodec_DecodeText(PyObject *object,
+                              const char *encoding,
+                              const char *errors)
+{
+    PyObject *decoder;
+
+    decoder = _PyCodec_TextDecoder(encoding);
+    if (decoder == NULL)
+        return NULL;
+
+    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
 /* Register the error handling callback function error under the name
    name. This function will be called by the codec when it encounters
    an unencodable characters/undecodable bytes and doesn't know the