This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author rhettinger
Recipients r.david.murray, rhettinger, serhiy.storchaka, vstinner
Date 2015-07-22.16:47:33
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1437583654.13.0.190471091461.issue24681@psf.upfronthosting.co.za>
In-reply-to
Content
FWIW, my approach is to look at the most important code
paths to see if there is any work being done that isn't
essential for the result being computed.

Next, I look at the generated assembly to estimate speed
by counting memory accesses (and whether they are cached
fresh accesses or stale random accesses) and I look at
the branches (and whether they are predictable or not).

The table=so->table assignment was being done for all code
paths but was only needed around the rich compare.  Here
is the before and after for the most important path
(the first lookup).  Note that the change saves one memory
spill and one reload.

Before:
-------
_set_add_entry:
pushq   %r15
pushq   %r14
movq    %rdx, %r14
pushq   %r13
pushq   %r12
movq    %rdi, %r12
pushq   %rbp
movq    %rsi, %rbp
pushq   %rbx
subq    $56, %rsp
movq    40(%rdi), %rax
addq    $1, (%rsi)
movq    %rax, 16(%rsp)        <-- spill
movq    32(%r12), %rdx        
movq    %rdx, %r15
andq    %r14, %r15
movq    %r15, %rbx
salq    $4, %rbx
addq    16(%rsp), %rbx        <-- reload
movq    (%rbx), %rcx
testq   %rcx, %rcx
je  L430


AFTER
-----
_set_add_entry:
	pushq	%r15
	movq	%rdx, %r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	movq	%rdi, %r12
	pushq	%rbp
	movq	%rsi, %rbp
	pushq	%rbx
	subq	$56, %rsp
	movq	40(%rdi), %rdx
	addq	$1, (%rsi)        <-- no spill
	movq	%rdx, %r11
L428:
	movq	32(%r12), %rcx
	movq	%rcx, %r13
	andq	%r15, %r13
	movq	%r13, %rbx
	salq	$4, %rbx
	addq	%r11, %rbx         <-- from register
	movq	(%rbx), %r14
	testq	%r14, %r14
	je	L429


The code around the rich compare used to do memory
loads that weren't necessary for the most likely case
(since the 64-bit hash values match, it is very likely
that the comparison will report a match).

BEFORE
------

call    _PyObject_RichCompareBool
movq    24(%rsp), %rcx
movq    (%rcx), %rdi
leaq    -1(%rdi), %rdx
testq   %rdx, %rdx
movq    %rdx, (%rcx)
je  L489
testl   %eax, %eax            
js  L437                      <--- predictable error branch
movq    40(%r12), %rdx        <--- memory load 
cmpq    16(%rsp), %rdx        <--- memory load
jne L460                 
cmpq    (%rbx), %rcx          <--- memory load  
jne L429                      <--- predictable restart branch
testl   %eax, %eax            <--- predictable found_active branch            
jne L432                      <--- most common exit point
movq    32(%r12), %rdx


AFTER
-----

	call	_PyObject_RichCompareBool
	movq	16(%rsp), %rcx
	movq	(%rcx), %rdi
	leaq	-1(%rdi), %rdx
	testq	%rdx, %rdx
	movq	%rdx, (%rcx)
	je	L485
	cmpl	$0, %eax
	jg	L431                  <-- common exit before the memory loads!
L490:
	jne	L434
	movq	40(%r12), %rdx    <--- memory load 
	cmpq	%rdx, 24(%rsp)    <--- memory load 
	movq	%rdx, %r11
	jne	L428
	cmpq	(%rbx), %rcx      <--- memory load 
	jne	L428
History
Date User Action Args
2015-07-22 16:47:34rhettingersetrecipients: + rhettinger, vstinner, r.david.murray, serhiy.storchaka
2015-07-22 16:47:34rhettingersetmessageid: <1437583654.13.0.190471091461.issue24681@psf.upfronthosting.co.za>
2015-07-22 16:47:34rhettingerlinkissue24681 messages
2015-07-22 16:47:33rhettingercreate