Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (revision 61453) +++ Python/bltinmodule.c (working copy) @@ -5,6 +5,7 @@ #include "node.h" #include "code.h" #include "eval.h" +#include "abstract.h" #include @@ -1182,17 +1183,14 @@ static PyObject * builtin_intern(PyObject *self, PyObject *args) { - PyObject *s; - if (!PyArg_ParseTuple(args, "S:intern", &s)) + if (Py_Py3kWarningFlag && + PyErr_Warn(PyExc_DeprecationWarning, + "intern has been moved to the sys module.") < 0) return NULL; - if (!PyString_CheckExact(s)) { - PyErr_SetString(PyExc_TypeError, - "can't intern subclass of string"); + PyObject *intern = PySys_GetObject("intern"); + if (intern == NULL) return NULL; - } - Py_INCREF(s); - PyString_InternInPlace(&s); - return s; + return PyObject_Call(intern, args, NULL); } PyDoc_STRVAR(intern_doc, Index: Python/sysmodule.c =================================================================== --- Python/sysmodule.c (revision 61453) +++ Python/sysmodule.c (working copy) @@ -19,6 +19,7 @@ #include "code.h" #include "frameobject.h" #include "eval.h" +#include "string.h" #include "osdefs.h" @@ -269,6 +270,30 @@ #endif +static PyObject * +sys_intern(PyObject *self, PyObject *args) +{ + PyObject *s; + if (!PyArg_ParseTuple(args, "S:intern", &s)) + return NULL; + if (!PyString_CheckExact(s)) { + PyErr_SetString(PyExc_TypeError, + "can't intern subclass of string"); + return NULL; + } + Py_INCREF(s); + PyString_InternInPlace(&s); + return s; +} + +PyDoc_STRVAR(intern_doc, +"intern(string) -> string\n\ +\n\ +``Intern'' the given string. This enters the string in the (global)\n\ +table of interned strings whose purpose is to speed up dictionary lookups.\n\ +Return the string itself or the previously interned string object with the\n\ +same value."); + /* * Cached interned string objects used for calling the profile and * trace functions. Initialized by trace_init(). @@ -849,6 +874,7 @@ {"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS, getwindowsversion_doc}, #endif /* MS_WINDOWS */ + {"intern", sys_intern, METH_VARARGS, intern_doc}, #ifdef USE_MALLOPT {"mdebug", sys_mdebug, METH_VARARGS}, #endif Index: Doc/library/sys.rst =================================================================== --- Doc/library/sys.rst (revision 61453) +++ Doc/library/sys.rst (working copy) @@ -495,8 +495,23 @@ same information. .. versionadded:: 1.5.2 + +.. function:: intern(string) + Enter *string* in the table of "interned" strings and return the interned string + -- which is *string* itself or a copy. Interning strings is useful to gain a + little performance on dictionary lookup -- if the keys in a dictionary are + interned, and the lookup key is interned, the key comparisons (after hashing) + can be done by a pointer compare instead of a string compare. Normally, the + names used in Python programs are automatically interned, and the dictionaries + used to hold module, class or instance attributes have interned keys. + Interned strings are not immortal; you must keep a reference to the return + value of :func:`intern` around to benefit from it. + + .. versionadded:: 2.6 + + .. data:: last_type last_value last_traceback Index: Lib/test/test_builtin.py =================================================================== --- Lib/test/test_builtin.py (revision 61453) +++ Lib/test/test_builtin.py (working copy) @@ -1061,30 +1061,6 @@ self.fail("Failed to raise TypeError with %s" % ((base, trunc_result_base),)) - def test_intern(self): - self.assertRaises(TypeError, intern) - s = "never interned before" - self.assert_(intern(s) is s) - s2 = s.swapcase().swapcase() - self.assert_(intern(s2) is s) - - # Subclasses of string can't be interned, because they - # provide too much opportunity for insane things to happen. - # We don't want them in the interned dict and if they aren't - # actually interned, we don't want to create the appearance - # that they are by allowing intern() to succeeed. - class S(str): - def __hash__(self): - return 123 - - self.assertRaises(TypeError, intern, S("abc")) - - # It's still safe to pass these strings to routines that - # call intern internally, e.g. PyObject_SetAttr(). - s = S("abc") - setattr(s, s, s) - self.assertEqual(getattr(s, s), s) - def test_iter(self): self.assertRaises(TypeError, iter) self.assertRaises(TypeError, iter, 42, 42) Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (revision 61453) +++ Lib/test/test_sys.py (working copy) @@ -346,7 +346,31 @@ self.assert_(isinstance(vi[2], int)) self.assert_(vi[3] in ("alpha", "beta", "candidate", "final")) self.assert_(isinstance(vi[4], int)) + + def test_intern(self): + self.assertRaises(TypeError, sys.intern) + s = "never interned before" + self.assert_(sys.intern(s) is s) + s2 = s.swapcase().swapcase() + self.assert_(sys.intern(s2) is s) + # Subclasses of string can't be interned, because they + # provide too much opportunity for insane things to happen. + # We don't want them in the interned dict and if they aren't + # actually interned, we don't want to create the appearance + # that they are by allowing intern() to succeeed. + class S(str): + def __hash__(self): + return 123 + + self.assertRaises(TypeError, sys.intern, S("abc")) + + # It's still safe to pass these strings to routines that + # call intern internally, e.g. PyObject_SetAttr(). + s = S("abc") + setattr(s, s, s) + self.assertEqual(getattr(s, s), s) + def test_43581(self): # Can't use sys.stdout, as this is a cStringIO object when # the test runs under regrtest.