Message 338491 - Python tracker

➜

This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author	remi.lapeyre
Recipients	Nikita Smetanin, josh.r, remi.lapeyre, rhettinger, xtreak
Date	2019-03-20.16:10:33
SpamBayes Score	-1.0
Marked as misclassified	Yes
Message-id	<1553098233.94.0.899430066779.issue36380@roundup.psfhosted.org>
In-reply-to

Content
Hi what do you think of a patch to this effect that would speed up operations without changing the current semantics? diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index cff75a48d6..fe5d5b2dca 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -561,6 +561,7 @@ class Counter(dict): if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) super(Counter, self).__init__() + self._nonpositives = [] self.update(args, *kwds) def __missing__(self, key): @@ -652,6 +653,12 @@ class Counter(dict): self[elem] = count + self_get(elem, 0) else: super(Counter, self).update(iterable) # fast path when counter is empty + for elem, count in self.items(): + try: + if not count > 0: + self._nonpositives.append(elem) + except TypeError: + pass else: _count_elements(self, iterable) if kwds: @@ -818,9 +825,10 @@ class Counter(dict): def _keep_positive(self): '''Internal method to strip elements with a negative or zero count''' - nonpositive = [elem for elem, count in self.items() if not count > 0] - for elem in nonpositive: - del self[elem] + for elem in self._nonpositives: + if not self[elem] > 0: + del self[elem] + self._nonpositives.clear() return self def __iadd__(self, other): @@ -833,7 +841,10 @@ class Counter(dict): ''' for elem, count in other.items(): - self[elem] += count + count = self[elem] + count + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() def __isub__(self, other): @@ -846,7 +857,10 @@ class Counter(dict): ''' for elem, count in other.items(): - self[elem] -= count + count = self[elem] - count + self[elem] = count + if not count > 0: + del self[elem] return self._keep_positive() def __ior__(self, other): @@ -858,10 +872,11 @@ class Counter(dict): Counter({'b': 3, 'c': 2, 'a': 1}) ''' - for elem, other_count in other.items(): - count = self[elem] - if other_count > count: - self[elem] = other_count + for elem, count in other.items(): + count = max(count, self[elem]) + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() def __iand__(self, other): @@ -874,9 +889,10 @@ class Counter(dict): ''' for elem, count in self.items(): - other_count = other[elem] - if other_count < count: - self[elem] = other_count + count = min(other[elem], count) + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() It seems to give great improvements for the case Nikita Smetanin gave: ➜ cpython git:(speed-up-Counter) ✗ ./python.exe tests.py Setup took: 9.101021380999999 Computation took: 1.634299999864197e-05 ➜ cpython git:(speed-up-Counter) ✗ python3 tests.py Setup took: 11.615437155 Computation took: 0.34183154800000004 Is there a performance test suite for Counter I could use to check this does not add regressions in some cases?

Hi what do you think of a patch to this effect that would speed up operations without changing the current semantics?


diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index cff75a48d6..fe5d5b2dca 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -561,6 +561,7 @@ class Counter(dict):
         if len(args) > 1:
             raise TypeError('expected at most 1 arguments, got %d' % len(args))
         super(Counter, self).__init__()
+        self._nonpositives = []
         self.update(*args, **kwds)
 
     def __missing__(self, key):
@@ -652,6 +653,12 @@ class Counter(dict):
                         self[elem] = count + self_get(elem, 0)
                 else:
                     super(Counter, self).update(iterable) # fast path when counter is empty
+                for elem, count in self.items():
+                    try:
+                        if not count > 0:
+                            self._nonpositives.append(elem)
+                    except TypeError:
+                        pass
             else:
                 _count_elements(self, iterable)
         if kwds:
@@ -818,9 +825,10 @@ class Counter(dict):
 
     def _keep_positive(self):
         '''Internal method to strip elements with a negative or zero count'''
-        nonpositive = [elem for elem, count in self.items() if not count > 0]
-        for elem in nonpositive:
-            del self[elem]
+        for elem in self._nonpositives:
+            if not self[elem] > 0:
+                del self[elem]
+        self._nonpositives.clear()
         return self
 
     def __iadd__(self, other):
@@ -833,7 +841,10 @@ class Counter(dict):
 
         '''
         for elem, count in other.items():
-            self[elem] += count
+            count = self[elem] + count
+            self[elem] = count
+            if not count > 0:
+                self._nonpositives.append(elem)
         return self._keep_positive()
 
     def __isub__(self, other):
@@ -846,7 +857,10 @@ class Counter(dict):
 
         '''
         for elem, count in other.items():
-            self[elem] -= count
+            count = self[elem] - count
+            self[elem] = count
+            if not count > 0:
+                del self[elem]
         return self._keep_positive()
 
     def __ior__(self, other):
@@ -858,10 +872,11 @@ class Counter(dict):
         Counter({'b': 3, 'c': 2, 'a': 1})
 
         '''
-        for elem, other_count in other.items():
-            count = self[elem]
-            if other_count > count:
-                self[elem] = other_count
+        for elem, count in other.items():
+            count = max(count, self[elem])
+            self[elem] = count
+            if not count > 0:
+                self._nonpositives.append(elem)
         return self._keep_positive()
 
     def __iand__(self, other):
@@ -874,9 +889,10 @@ class Counter(dict):
 
         '''
         for elem, count in self.items():
-            other_count = other[elem]
-            if other_count < count:
-                self[elem] = other_count
+            count = min(other[elem], count)
+            self[elem] = count
+            if not count > 0:
+                self._nonpositives.append(elem)
         return self._keep_positive()
 
 



It seems to give great improvements for the case Nikita Smetanin gave:

➜  cpython git:(speed-up-Counter) ✗ ./python.exe tests.py
Setup took:  9.101021380999999
Computation took:  1.634299999864197e-05
➜  cpython git:(speed-up-Counter) ✗ python3 tests.py
Setup took:  11.615437155
Computation took:  0.34183154800000004


Is there a performance test suite for Counter I could use to check this does not add regressions in some cases?

History
Date	User	Action	Args
2019-03-20 16:10:33	remi.lapeyre	set	recipients: + remi.lapeyre, rhettinger, josh.r, xtreak, Nikita Smetanin
2019-03-20 16:10:33	remi.lapeyre	set	messageid: <1553098233.94.0.899430066779.issue36380@roundup.psfhosted.org>
2019-03-20 16:10:33	remi.lapeyre	link	issue36380 messages
2019-03-20 16:10:33	remi.lapeyre	create