Message338491
Hi what do you think of a patch to this effect that would speed up operations without changing the current semantics?
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index cff75a48d6..fe5d5b2dca 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -561,6 +561,7 @@ class Counter(dict):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
super(Counter, self).__init__()
+ self._nonpositives = []
self.update(*args, **kwds)
def __missing__(self, key):
@@ -652,6 +653,12 @@ class Counter(dict):
self[elem] = count + self_get(elem, 0)
else:
super(Counter, self).update(iterable) # fast path when counter is empty
+ for elem, count in self.items():
+ try:
+ if not count > 0:
+ self._nonpositives.append(elem)
+ except TypeError:
+ pass
else:
_count_elements(self, iterable)
if kwds:
@@ -818,9 +825,10 @@ class Counter(dict):
def _keep_positive(self):
'''Internal method to strip elements with a negative or zero count'''
- nonpositive = [elem for elem, count in self.items() if not count > 0]
- for elem in nonpositive:
- del self[elem]
+ for elem in self._nonpositives:
+ if not self[elem] > 0:
+ del self[elem]
+ self._nonpositives.clear()
return self
def __iadd__(self, other):
@@ -833,7 +841,10 @@ class Counter(dict):
'''
for elem, count in other.items():
- self[elem] += count
+ count = self[elem] + count
+ self[elem] = count
+ if not count > 0:
+ self._nonpositives.append(elem)
return self._keep_positive()
def __isub__(self, other):
@@ -846,7 +857,10 @@ class Counter(dict):
'''
for elem, count in other.items():
- self[elem] -= count
+ count = self[elem] - count
+ self[elem] = count
+ if not count > 0:
+ del self[elem]
return self._keep_positive()
def __ior__(self, other):
@@ -858,10 +872,11 @@ class Counter(dict):
Counter({'b': 3, 'c': 2, 'a': 1})
'''
- for elem, other_count in other.items():
- count = self[elem]
- if other_count > count:
- self[elem] = other_count
+ for elem, count in other.items():
+ count = max(count, self[elem])
+ self[elem] = count
+ if not count > 0:
+ self._nonpositives.append(elem)
return self._keep_positive()
def __iand__(self, other):
@@ -874,9 +889,10 @@ class Counter(dict):
'''
for elem, count in self.items():
- other_count = other[elem]
- if other_count < count:
- self[elem] = other_count
+ count = min(other[elem], count)
+ self[elem] = count
+ if not count > 0:
+ self._nonpositives.append(elem)
return self._keep_positive()
It seems to give great improvements for the case Nikita Smetanin gave:
➜ cpython git:(speed-up-Counter) ✗ ./python.exe tests.py
Setup took: 9.101021380999999
Computation took: 1.634299999864197e-05
➜ cpython git:(speed-up-Counter) ✗ python3 tests.py
Setup took: 11.615437155
Computation took: 0.34183154800000004
Is there a performance test suite for Counter I could use to check this does not add regressions in some cases? |
|
Date |
User |
Action |
Args |
2019-03-20 16:10:33 | remi.lapeyre | set | recipients:
+ remi.lapeyre, rhettinger, josh.r, xtreak, Nikita Smetanin |
2019-03-20 16:10:33 | remi.lapeyre | set | messageid: <1553098233.94.0.899430066779.issue36380@roundup.psfhosted.org> |
2019-03-20 16:10:33 | remi.lapeyre | link | issue36380 messages |
2019-03-20 16:10:33 | remi.lapeyre | create | |
|