Index: Python/sysmodule.c =================================================================== --- Python/sysmodule.c (Revision 60436) +++ Python/sysmodule.c (Arbeitskopie) @@ -754,17 +754,6 @@ 10. Number of stack pops performed by call_function()" ); -static PyObject * -sys_cleartypecache(PyObject* self, PyObject* args) -{ - PyType_ClearCache(); - Py_RETURN_NONE; -} - -PyDoc_STRVAR(cleartypecache_doc, -"_cleartypecache() -> None\n\ -Clear the internal type lookup cache."); - #ifdef __cplusplus extern "C" { #endif @@ -783,12 +772,44 @@ } #endif +static PyObject * +sys_clear_type_cache(PyObject* self, PyObject* args) +{ + PyType_ClearCache(); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(sys_clear_type_cache__doc__, +"clear_type_cache() -> None\n\ +Clear the internal type lookup cache."); + + +static PyObject * +sys_compact_freelists(PyObject* self, PyObject* args) +{ + size_t isum, ibc, ibf; + size_t fsum, fbc, fbf; + + PyInt_CompactFreeList(&ibc, &ibf, &isum); + PyFloat_CompactFreeList(&fbc, &fbf, &fsum); + + return Py_BuildValue("(kkk)(kkk)", isum, ibc, ibf, + fsum, fbc, fbf); + +} + +PyDoc_STRVAR(sys_compact_freelists__doc__, +"compact_freelists() -> ((remaing_objects, total_blocks, freed_blocks), ...)\n\ +Compact the free lists of ints and floats."); + static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */ {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, callstats_doc}, - {"_cleartypecache", sys_cleartypecache, METH_NOARGS, - cleartypecache_doc}, + {"clear_type_cache", sys_clear_type_cache, METH_NOARGS, + sys_clear_type_cache__doc__}, + {"compact_freelists", sys_compact_freelists, METH_NOARGS, + sys_compact_freelists__doc__}, {"_current_frames", sys_current_frames, METH_NOARGS, current_frames_doc}, {"displayhook", sys_displayhook, METH_O, displayhook_doc}, Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (Revision 60436) +++ Include/unicodeobject.h (Arbeitskopie) @@ -348,8 +348,16 @@ #else -#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch) +extern const unsigned char _Py_ascii_whitespace[]; +extern const unsigned char _Py_ascii_linebreak[]; +/* Since splitting on whitespace is an important use case, and whitespace + in most situations is solely ASCII whitespace, we optimize for the common + case by using a quick look-up table with an inlined check. + */ +#define Py_UNICODE_ISSPACE(ch) \ + ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) + #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) Index: Include/intobject.h =================================================================== --- Include/intobject.h (Revision 60436) +++ Include/intobject.h (Arbeitskopie) @@ -59,6 +59,10 @@ PyAPI_FUNC(unsigned long) PyOS_strtoul(char *, char **, int); PyAPI_FUNC(long) PyOS_strtol(char *, char **, int); +/* free list api */ +PyAPI_FUNC(int) PyInt_CompactFreeList(size_t *, size_t *, size_t *); +PyAPI_FUNC(size_t) PyInt_FreeListSize(void); + #ifdef __cplusplus } #endif Index: Include/floatobject.h =================================================================== --- Include/floatobject.h (Revision 60436) +++ Include/floatobject.h (Arbeitskopie) @@ -101,6 +101,9 @@ PyAPI_FUNC(double) _PyFloat_Unpack4(const unsigned char *p, int le); PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le); +/* free list api */ +PyAPI_FUNC(int) PyFloat_CompactFreeList(size_t *, size_t *, size_t *); +PyAPI_FUNC(size_t) PyFloat_FreeListSize(void); #ifdef __cplusplus } Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (Revision 60436) +++ Objects/unicodeobject.c (Arbeitskopie) @@ -112,6 +112,64 @@ */ static char unicode_default_encoding[100]; +/* Fast detection of the most frequent whitespace characters */ +const unsigned char _Py_ascii_whitespace[] = { + 0, 0, 0, 0, 0, 0, 0, 0, +// case 0x0009: /* HORIZONTAL TABULATION */ +// case 0x000A: /* LINE FEED */ +// case 0x000B: /* VERTICAL TABULATION */ +// case 0x000C: /* FORM FEED */ +// case 0x000D: /* CARRIAGE RETURN */ + 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +// case 0x001C: /* FILE SEPARATOR */ +// case 0x001D: /* GROUP SEPARATOR */ +// case 0x001E: /* RECORD SEPARATOR */ +// case 0x001F: /* UNIT SEPARATOR */ + 0, 0, 0, 0, 1, 1, 1, 1, +// case 0x0020: /* SPACE */ + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Same for linebreaks */ +const unsigned char _Py_ascii_linebreak[] = { + 0, 0, 0, 0, 0, 0, 0, 0, +// 0x000A, /* LINE FEED */ +// 0x000D, /* CARRIAGE RETURN */ + 0, 0, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +// 0x001C, /* FILE SEPARATOR */ +// 0x001D, /* GROUP SEPARATOR */ +// 0x001E, /* RECORD SEPARATOR */ + 0, 0, 0, 0, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + + Py_UNICODE PyUnicode_GetMax(void) { @@ -138,8 +196,9 @@ #define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F)))) -#define BLOOM_LINEBREAK(ch)\ - (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch))) +#define BLOOM_LINEBREAK(ch) \ + ((ch) < 128U ? _Py_ascii_linebreak[(ch)] : \ + (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) { @@ -5505,25 +5564,26 @@ register Py_ssize_t j; Py_ssize_t len = self->length; PyObject *str; + register const Py_UNICODE *buf = self->str; for (i = j = 0; i < len; ) { /* find a token */ - while (i < len && Py_UNICODE_ISSPACE(self->str[i])) + while (i < len && Py_UNICODE_ISSPACE(buf[i])) i++; j = i; - while (i < len && !Py_UNICODE_ISSPACE(self->str[i])) + while (i < len && !Py_UNICODE_ISSPACE(buf[i])) i++; if (j < i) { if (maxcount-- <= 0) break; - SPLIT_APPEND(self->str, j, i); - while (i < len && Py_UNICODE_ISSPACE(self->str[i])) + SPLIT_APPEND(buf, j, i); + while (i < len && Py_UNICODE_ISSPACE(buf[i])) i++; j = i; } } if (j < len) { - SPLIT_APPEND(self->str, j, len); + SPLIT_APPEND(buf, j, len); } return list; @@ -5596,18 +5656,19 @@ register Py_ssize_t j; Py_ssize_t len = self->length; PyObject *str; + register const Py_UNICODE *buf = self->str; for (i = j = 0; i < len; ) { - if (self->str[i] == ch) { + if (buf[i] == ch) { if (maxcount-- <= 0) break; - SPLIT_APPEND(self->str, j, i); + SPLIT_APPEND(buf, j, i); i = j = i + 1; } else i++; } if (j <= len) { - SPLIT_APPEND(self->str, j, len); + SPLIT_APPEND(buf, j, len); } return list; @@ -5656,25 +5717,26 @@ register Py_ssize_t j; Py_ssize_t len = self->length; PyObject *str; + register const Py_UNICODE *buf = self->str; for (i = j = len - 1; i >= 0; ) { /* find a token */ - while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i])) + while (i >= 0 && Py_UNICODE_ISSPACE(buf[i])) i--; j = i; - while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i])) + while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i])) i--; if (j > i) { if (maxcount-- <= 0) break; - SPLIT_APPEND(self->str, i + 1, j + 1); - while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i])) + SPLIT_APPEND(buf, i + 1, j + 1); + while (i >= 0 && Py_UNICODE_ISSPACE(buf[i])) i--; j = i; } } if (j >= 0) { - SPLIT_APPEND(self->str, 0, j + 1); + SPLIT_APPEND(buf, 0, j + 1); } if (PyList_Reverse(list) < 0) goto onError; @@ -5695,18 +5757,19 @@ register Py_ssize_t j; Py_ssize_t len = self->length; PyObject *str; + register const Py_UNICODE *buf = self->str; for (i = j = len - 1; i >= 0; ) { - if (self->str[i] == ch) { + if (buf[i] == ch) { if (maxcount-- <= 0) break; - SPLIT_APPEND(self->str, i + 1, j + 1); + SPLIT_APPEND(buf, i + 1, j + 1); j = i = i - 1; } else i--; } if (j >= -1) { - SPLIT_APPEND(self->str, 0, j + 1); + SPLIT_APPEND(buf, 0, j + 1); } if (PyList_Reverse(list) < 0) goto onError; Index: Objects/intobject.c =================================================================== --- Objects/intobject.c (Revision 60436) +++ Objects/intobject.c (Arbeitskopie) @@ -42,6 +42,7 @@ static PyIntBlock *block_list = NULL; static PyIntObject *free_list = NULL; +static size_t block_list_length = 0; static PyIntObject * fill_free_list(void) @@ -53,6 +54,7 @@ return (PyIntObject *) PyErr_NoMemory(); ((PyIntBlock *)p)->next = block_list; block_list = (PyIntBlock *)p; + block_list_length++; /* Link the int objects together, from rear to front, then return the address of the last int object in the block. */ p = &((PyIntBlock *)p)->objects[0]; @@ -1201,34 +1203,22 @@ return 1; } -void -PyInt_Fini(void) +int +PyInt_CompactFreeList(size_t *pbc, size_t *pbf, size_t *bsum) { PyIntObject *p; PyIntBlock *list, *next; - int i; unsigned int ctr; - int bc, bf; /* block count, number of freed blocks */ - int irem, isum; /* remaining unfreed ints per block, total */ + size_t bc; /* block count before run */ + size_t isum; /* total unfreed ints */ + int irem; /* remaining unfreed ints per block */ -#if NSMALLNEGINTS + NSMALLPOSINTS > 0 - PyIntObject **q; - - i = NSMALLNEGINTS + NSMALLPOSINTS; - q = small_ints; - while (--i >= 0) { - Py_XDECREF(*q); - *q++ = NULL; - } -#endif - bc = 0; - bf = 0; + bc = block_list_length; isum = 0; list = block_list; block_list = NULL; free_list = NULL; while (list != NULL) { - bc++; irem = 0; for (ctr = 0, p = &list->objects[0]; ctr < N_INTOBJECTS; @@ -1263,11 +1253,45 @@ } else { PyMem_FREE(list); - bf++; + block_list_length--; } isum += irem; list = next; } + + *pbc = bc; + *pbf = bc - block_list_length; + *bsum = isum; + return 0; +} + +size_t +PyInt_FreeListSize(void) +{ + return block_list_length; +} + +void +PyInt_Fini(void) +{ + PyIntObject *p; + PyIntBlock *list; + unsigned int ctr; + size_t bc, bf; /* block count, number of freed blocks */ + size_t isum; /* total unfreed ints per block */ + +#if NSMALLNEGINTS + NSMALLPOSINTS > 0 + int i; + PyIntObject **q; + + i = NSMALLNEGINTS + NSMALLPOSINTS; + q = small_ints; + while (--i >= 0) { + Py_XDECREF(*q); + *q++ = NULL; + } +#endif + PyInt_CompactFreeList(&bc, &bf, &isum); if (!Py_VerboseFlag) return; fprintf(stderr, "# cleanup ints"); @@ -1276,9 +1300,9 @@ } else { fprintf(stderr, - ": %d unfreed int%s in %d out of %d block%s\n", - isum, isum == 1 ? "" : "s", - bc - bf, bc, bc == 1 ? "" : "s"); + ": %ld unfreed int%s in %ld out of %ld block%s\n", + (long)isum, isum == 1 ? "" : "s", + (long)(bc - bf), (long)bc, bc == 1 ? "" : "s"); } if (Py_VerboseFlag > 1) { list = block_list; Index: Objects/floatobject.c =================================================================== --- Objects/floatobject.c (Revision 60436) +++ Objects/floatobject.c (Arbeitskopie) @@ -35,6 +35,7 @@ static PyFloatBlock *block_list = NULL; static PyFloatObject *free_list = NULL; +static size_t block_list_length = 0; static PyFloatObject * fill_free_list(void) @@ -46,6 +47,7 @@ return (PyFloatObject *) PyErr_NoMemory(); ((PyFloatBlock *)p)->next = block_list; block_list = (PyFloatBlock *)p; + block_list_length++; p = &((PyFloatBlock *)p)->objects[0]; q = p + N_FLOATOBJECTS; while (--q > p) @@ -1672,23 +1674,22 @@ #endif } -void -PyFloat_Fini(void) +int +PyFloat_CompactFreeList(size_t *pbc, size_t *pbf, size_t *bsum) { PyFloatObject *p; PyFloatBlock *list, *next; unsigned i; - int bc, bf; /* block count, number of freed blocks */ - int frem, fsum; /* remaining unfreed floats per block, total */ + size_t bc; /* block count before run */ + size_t fsum; /* total unfreed ints */ + int frem; /* remaining unfreed ints per block */ - bc = 0; - bf = 0; + bc = block_list_length; fsum = 0; list = block_list; block_list = NULL; free_list = NULL; while (list != NULL) { - bc++; frem = 0; for (i = 0, p = &list->objects[0]; i < N_FLOATOBJECTS; @@ -1713,11 +1714,34 @@ } else { PyMem_FREE(list); /* XXX PyObject_FREE ??? */ - bf++; + block_list_length--; } fsum += frem; list = next; } + *pbc = bc; + *pbf = bc - block_list_length; + *bsum = fsum; + return 0; +} + +size_t +PyFloat_FreeListSize(void) +{ + return block_list_length; +} + +void +PyFloat_Fini(void) +{ + PyFloatObject *p; + PyFloatBlock *list; + unsigned i; + size_t bc, bf; /* block count, number of freed blocks */ + size_t fsum; /* total unfreed floats per block */ + + PyFloat_CompactFreeList(&bc, &bf, &fsum); + if (!Py_VerboseFlag) return; fprintf(stderr, "# cleanup floats"); @@ -1726,9 +1750,9 @@ } else { fprintf(stderr, - ": %d unfreed float%s in %d out of %d block%s\n", - fsum, fsum == 1 ? "" : "s", - bc - bf, bc, bc == 1 ? "" : "s"); + ": %ld unfreed float%s in %ld out of %ld block%s\n", + (long)fsum, fsum == 1 ? "" : "s", + (long)(bc - bf), (long)bc, bc == 1 ? "" : "s"); } if (Py_VerboseFlag > 1) { list = block_list; Index: Misc/NEWS =================================================================== --- Misc/NEWS (Revision 60436) +++ Misc/NEWS (Arbeitskopie) @@ -12,6 +12,9 @@ Core and builtins ----------------- +- Patch #1970 by Antoine Pitrou: Speedup unicode whitespace and linebreak + detection + - Added ``PyType_ClearCache()`` and ``sys._cleartypecache`` to clear the internal lookup cache for ref leak tests. Index: Doc/c-api/float.rst =================================================================== --- Doc/c-api/float.rst (Revision 60436) +++ Doc/c-api/float.rst (Arbeitskopie) @@ -84,3 +84,21 @@ Return the minimum normalized positive float *DBL_MIN* as C :ctype:`double`. .. versionadded:: 2.6 + + +.. cfunction:: int PyFloat_CompactFreeList(int *bc, int *bf, int *sum) + + Compact the float free list. *bc* is the number of allocated blocks before + blocks are freed, *bf* is the number of freed blocks and *sum* is the number + of remaining objects in the blocks. + + .. versionadded:: 2.6 + + +.. cfunction:: size_t PyFloat_FreeListSize(void) + + Return the number of allocated blocks. Each block has roughly the size of + 1kb. + + .. versionadded:: 2.6 + Index: Doc/c-api/int.rst =================================================================== --- Doc/c-api/int.rst (Revision 60436) +++ Doc/c-api/int.rst (Arbeitskopie) @@ -120,3 +120,21 @@ Return the system's idea of the largest integer it can handle (:const:`LONG_MAX`, as defined in the system header files). + + +.. cfunction:: int PyInt_CompactFreeList(int *bc, int *bf, int *sum) + + Compact the integer free list. *bc* is the number of allocated blocks before + blocks are freed, *bf* is the number of freed blocks and *sum* is the number + of remaining objects in the blocks. + + .. versionadded:: 2.6 + + +.. cfunction:: size_t PyInt_FreeListSize(void) + + Return the number of allocated blocks. Each block has roughly the size of + 1kb. + + .. versionadded:: 2.6 + Index: Doc/library/sys.rst =================================================================== --- Doc/library/sys.rst (Revision 60436) +++ Doc/library/sys.rst (Arbeitskopie) @@ -58,13 +58,22 @@ A string containing the copyright pertaining to the Python interpreter. -.. function:: _cleartypecache() +.. function:: compact_freelists() - Clear the internal type lookup cache. + Compact the free lists of integers and floats by deallocating unused blocks. .. versionadded:: 2.6 +.. function:: clear_type_cache() + + Clear the internal type cache. The type cache is used to speed up attribute + and method lookups. Use the function *only* to drop unnecessary references + during reference leak debugging. + + .. versionadded:: 2.6 + + .. function:: _current_frames() Return a dictionary mapping each thread's identifier to the topmost stack frame Index: Lib/test/regrtest.py =================================================================== --- Lib/test/regrtest.py (Revision 60436) +++ Lib/test/regrtest.py (Arbeitskopie) @@ -710,7 +710,7 @@ sys.path_importer_cache.update(pic) # clear type cache - sys._cleartypecache() + gc.clear_type_cache() # Clear ABC registries, restoring previously saved ABC registries. for abc in [getattr(_abcoll, a) for a in _abcoll.__all__]: Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (Revision 60436) +++ Lib/test/test_sys.py (Arbeitskopie) @@ -363,7 +363,25 @@ self.assertEqual(type(getattr(sys.flags, attr)), int, attr) self.assert_(repr(sys.flags)) + def test_clear_type_cache(self): + sys.clear_type_cache() + def test_compact_freelists(self): + sys.compact_freelists() + r = sys.compact_freelists() + # freed blocks shouldn't change + self.assertEqual(r[0][2], 0) + self.assertEqual(r[1][2], 0) + # fill freelists + ints = list(range(12000)) + floats = [float(i) for i in ints] + del ints + del floats + # should free more than 200 blocks each + r = sys.compact_freelists() + self.assert_(r[0][2] > 200, r[0][2]) + self.assert_(r[1][2] > 200, r[1][2]) + def test_main(): test.test_support.run_unittest(SysModuleTest)