Message402201
> OTOH on my Mac I still find that 3.10 with PGO is still
> more than twice as slow than 2.7.
> Thinking about it that's a bit odd, since (presumably)
> the majority of the work in sum() involves a long int result
> (even though the values returned by range() all fit in 30 bits,
> the sum quickly exceeds that).
The actual accumulation of a long int result is still as fast as it ever was.
The main difference from Py2.7 isn't the addition, it is that detecting and extracting a small int added has become expensive.
-- Python 2 fastpath --------------------------------------
if (PyInt_CheckExact(item)) { // Very cheap
long b = PyInt_AS_LONG(item); // Very cheap
long x = i_result + b; // Very cheap
if ((x^i_result) >= 0 || (x^b) >= 0) { // Semi cheap
i_result = x; // Zero cost
Py_DECREF(item); // Most expensive step, but still cheap
continue;
}
}
-- Python 3 fastpath --------------------------------------
if (PyLong_CheckExact(item) || PyBool_Check(item)) { // Cheap
long b = PyLong_AsLongAndOverflow(item, &overflow); // Super Expensive
if (overflow == 0 && // Branch predictable test
(i_result >= 0 ? (b <= LONG_MAX - i_result) // Slower but better test
: (b >= LONG_MIN - i_result)))
{
i_result += b; // Very cheap
Py_DECREF(item);
continue;
}
}
-- Supporting function ------------------------------------
long
PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) // OMG, this does a lot of work
{
/* This version by Tim Peters */
PyLongObject *v;
unsigned long x, prev;
long res;
Py_ssize_t i;
int sign;
int do_decref = 0; /* if PyNumber_Index was called */
*overflow = 0;
if (vv == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (PyLong_Check(vv)) {
v = (PyLongObject *)vv;
}
else {
v = (PyLongObject *)_PyNumber_Index(vv);
if (v == NULL)
return -1;
do_decref = 1;
}
res = -1;
i = Py_SIZE(v);
switch (i) {
case -1:
res = -(sdigit)v->ob_digit[0];
break;
case 0:
res = 0;
break;
case 1:
res = v->ob_digit[0];
break;
default:
sign = 1;
x = 0;
if (i < 0) {
sign = -1;
i = -(i);
}
while (--i >= 0) {
prev = x;
x = (x << PyLong_SHIFT) | v->ob_digit[i];
if ((x >> PyLong_SHIFT) != prev) {
*overflow = sign;
goto exit;
}
}
/* Haven't lost any bits, but casting to long requires extra
* care (see comment above).
*/
if (x <= (unsigned long)LONG_MAX) {
res = (long)x * sign;
}
else if (sign < 0 && x == PY_ABS_LONG_MIN) {
res = LONG_MIN;
}
else {
*overflow = sign;
/* res is already set to -1 */
}
}
exit:
if (do_decref) {
Py_DECREF(v);
}
return res;
} |
|
Date |
User |
Action |
Args |
2021-09-20 07:22:26 | rhettinger | set | recipients:
+ rhettinger, gvanrossum, mark.dickinson, scoder, steven.daprano, lukasz.langa, serhiy.storchaka |
2021-09-20 07:22:26 | rhettinger | set | messageid: <1632122546.84.0.68698495652.issue24076@roundup.psfhosted.org> |
2021-09-20 07:22:26 | rhettinger | link | issue24076 messages |
2021-09-20 07:22:26 | rhettinger | create | |
|