Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(257213)

Side by Side Diff: Modules/_blake2/impl/blake2s-load-sse41.h

Issue 26798: add BLAKE2 to hashlib
Patch Set: Created 2 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Modules/_blake2/impl/blake2s-load-sse2.h ('k') | Modules/_blake2/impl/blake2s-load-xop.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 BLAKE2 reference source code package - optimized C implementations
3
4 Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5 terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6 your option. The terms of these licenses can be found at:
7
8 - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9 - OpenSSL license : https://www.openssl.org/source/license.html
10 - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
12 More information about the BLAKE2 hash function can be found at
13 https://blake2.net.
14 */
15 #pragma once
16 #ifndef __BLAKE2S_LOAD_SSE41_H__
17 #define __BLAKE2S_LOAD_SSE41_H__
18
19 #define LOAD_MSG_0_1(buf) \
20 buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
21
22 #define LOAD_MSG_0_2(buf) \
23 buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
24
25 #define LOAD_MSG_0_3(buf) \
26 buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
27
28 #define LOAD_MSG_0_4(buf) \
29 buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
30
31 #define LOAD_MSG_1_1(buf) \
32 t0 = _mm_blend_epi16(m1, m2, 0x0C); \
33 t1 = _mm_slli_si128(m3, 4); \
34 t2 = _mm_blend_epi16(t0, t1, 0xF0); \
35 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
36
37 #define LOAD_MSG_1_2(buf) \
38 t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
39 t1 = _mm_blend_epi16(m1,m3,0xC0); \
40 t2 = _mm_blend_epi16(t0, t1, 0xF0); \
41 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
42
43 #define LOAD_MSG_1_3(buf) \
44 t0 = _mm_slli_si128(m1, 4); \
45 t1 = _mm_blend_epi16(m2, t0, 0x30); \
46 t2 = _mm_blend_epi16(m0, t1, 0xF0); \
47 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
48
49 #define LOAD_MSG_1_4(buf) \
50 t0 = _mm_unpackhi_epi32(m0,m1); \
51 t1 = _mm_slli_si128(m3, 4); \
52 t2 = _mm_blend_epi16(t0, t1, 0x0C); \
53 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
54
55 #define LOAD_MSG_2_1(buf) \
56 t0 = _mm_unpackhi_epi32(m2,m3); \
57 t1 = _mm_blend_epi16(m3,m1,0x0C); \
58 t2 = _mm_blend_epi16(t0, t1, 0x0F); \
59 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
60
61 #define LOAD_MSG_2_2(buf) \
62 t0 = _mm_unpacklo_epi32(m2,m0); \
63 t1 = _mm_blend_epi16(t0, m0, 0xF0); \
64 t2 = _mm_slli_si128(m3, 8); \
65 buf = _mm_blend_epi16(t1, t2, 0xC0);
66
67 #define LOAD_MSG_2_3(buf) \
68 t0 = _mm_blend_epi16(m0, m2, 0x3C); \
69 t1 = _mm_srli_si128(m1, 12); \
70 t2 = _mm_blend_epi16(t0,t1,0x03); \
71 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
72
73 #define LOAD_MSG_2_4(buf) \
74 t0 = _mm_slli_si128(m3, 4); \
75 t1 = _mm_blend_epi16(m0, m1, 0x33); \
76 t2 = _mm_blend_epi16(t1, t0, 0xC0); \
77 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
78
79 #define LOAD_MSG_3_1(buf) \
80 t0 = _mm_unpackhi_epi32(m0,m1); \
81 t1 = _mm_unpackhi_epi32(t0, m2); \
82 t2 = _mm_blend_epi16(t1, m3, 0x0C); \
83 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
84
85 #define LOAD_MSG_3_2(buf) \
86 t0 = _mm_slli_si128(m2, 8); \
87 t1 = _mm_blend_epi16(m3,m0,0x0C); \
88 t2 = _mm_blend_epi16(t1, t0, 0xC0); \
89 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
90
91 #define LOAD_MSG_3_3(buf) \
92 t0 = _mm_blend_epi16(m0,m1,0x0F); \
93 t1 = _mm_blend_epi16(t0, m3, 0xC0); \
94 buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
95
96 #define LOAD_MSG_3_4(buf) \
97 t0 = _mm_unpacklo_epi32(m0,m2); \
98 t1 = _mm_unpackhi_epi32(m1,m2); \
99 buf = _mm_unpacklo_epi64(t1,t0);
100
101 #define LOAD_MSG_4_1(buf) \
102 t0 = _mm_unpacklo_epi64(m1,m2); \
103 t1 = _mm_unpackhi_epi64(m0,m2); \
104 t2 = _mm_blend_epi16(t0,t1,0x33); \
105 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
106
107 #define LOAD_MSG_4_2(buf) \
108 t0 = _mm_unpackhi_epi64(m1,m3); \
109 t1 = _mm_unpacklo_epi64(m0,m1); \
110 buf = _mm_blend_epi16(t0,t1,0x33);
111
112 #define LOAD_MSG_4_3(buf) \
113 t0 = _mm_unpackhi_epi64(m3,m1); \
114 t1 = _mm_unpackhi_epi64(m2,m0); \
115 buf = _mm_blend_epi16(t1,t0,0x33);
116
117 #define LOAD_MSG_4_4(buf) \
118 t0 = _mm_blend_epi16(m0,m2,0x03); \
119 t1 = _mm_slli_si128(t0, 8); \
120 t2 = _mm_blend_epi16(t1,m3,0x0F); \
121 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
122
123 #define LOAD_MSG_5_1(buf) \
124 t0 = _mm_unpackhi_epi32(m0,m1); \
125 t1 = _mm_unpacklo_epi32(m0,m2); \
126 buf = _mm_unpacklo_epi64(t0,t1);
127
128 #define LOAD_MSG_5_2(buf) \
129 t0 = _mm_srli_si128(m2, 4); \
130 t1 = _mm_blend_epi16(m0,m3,0x03); \
131 buf = _mm_blend_epi16(t1,t0,0x3C);
132
133 #define LOAD_MSG_5_3(buf) \
134 t0 = _mm_blend_epi16(m1,m0,0x0C); \
135 t1 = _mm_srli_si128(m3, 4); \
136 t2 = _mm_blend_epi16(t0,t1,0x30); \
137 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
138
139 #define LOAD_MSG_5_4(buf) \
140 t0 = _mm_unpacklo_epi64(m1,m2); \
141 t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
142 buf = _mm_blend_epi16(t0,t1,0x33);
143
144 #define LOAD_MSG_6_1(buf) \
145 t0 = _mm_slli_si128(m1, 12); \
146 t1 = _mm_blend_epi16(m0,m3,0x33); \
147 buf = _mm_blend_epi16(t1,t0,0xC0);
148
149 #define LOAD_MSG_6_2(buf) \
150 t0 = _mm_blend_epi16(m3,m2,0x30); \
151 t1 = _mm_srli_si128(m1, 4); \
152 t2 = _mm_blend_epi16(t0,t1,0x03); \
153 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
154
155 #define LOAD_MSG_6_3(buf) \
156 t0 = _mm_unpacklo_epi64(m0,m2); \
157 t1 = _mm_srli_si128(m1, 4); \
158 buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
159
160 #define LOAD_MSG_6_4(buf) \
161 t0 = _mm_unpackhi_epi32(m1,m2); \
162 t1 = _mm_unpackhi_epi64(m0,t0); \
163 buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
164
165 #define LOAD_MSG_7_1(buf) \
166 t0 = _mm_unpackhi_epi32(m0,m1); \
167 t1 = _mm_blend_epi16(t0,m3,0x0F); \
168 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
169
170 #define LOAD_MSG_7_2(buf) \
171 t0 = _mm_blend_epi16(m2,m3,0x30); \
172 t1 = _mm_srli_si128(m0,4); \
173 t2 = _mm_blend_epi16(t0,t1,0x03); \
174 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
175
176 #define LOAD_MSG_7_3(buf) \
177 t0 = _mm_unpackhi_epi64(m0,m3); \
178 t1 = _mm_unpacklo_epi64(m1,m2); \
179 t2 = _mm_blend_epi16(t0,t1,0x3C); \
180 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
181
182 #define LOAD_MSG_7_4(buf) \
183 t0 = _mm_unpacklo_epi32(m0,m1); \
184 t1 = _mm_unpackhi_epi32(m1,m2); \
185 buf = _mm_unpacklo_epi64(t0,t1);
186
187 #define LOAD_MSG_8_1(buf) \
188 t0 = _mm_unpackhi_epi32(m1,m3); \
189 t1 = _mm_unpacklo_epi64(t0,m0); \
190 t2 = _mm_blend_epi16(t1,m2,0xC0); \
191 buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
192
193 #define LOAD_MSG_8_2(buf) \
194 t0 = _mm_unpackhi_epi32(m0,m3); \
195 t1 = _mm_blend_epi16(m2,t0,0xF0); \
196 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
197
198 #define LOAD_MSG_8_3(buf) \
199 t0 = _mm_blend_epi16(m2,m0,0x0C); \
200 t1 = _mm_slli_si128(t0,4); \
201 buf = _mm_blend_epi16(t1,m3,0x0F);
202
203 #define LOAD_MSG_8_4(buf) \
204 t0 = _mm_blend_epi16(m1,m0,0x30); \
205 buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
206
207 #define LOAD_MSG_9_1(buf) \
208 t0 = _mm_blend_epi16(m0,m2,0x03); \
209 t1 = _mm_blend_epi16(m1,m2,0x30); \
210 t2 = _mm_blend_epi16(t1,t0,0x0F); \
211 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
212
213 #define LOAD_MSG_9_2(buf) \
214 t0 = _mm_slli_si128(m0,4); \
215 t1 = _mm_blend_epi16(m1,t0,0xC0); \
216 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
217
218 #define LOAD_MSG_9_3(buf) \
219 t0 = _mm_unpackhi_epi32(m0,m3); \
220 t1 = _mm_unpacklo_epi32(m2,m3); \
221 t2 = _mm_unpackhi_epi64(t0,t1); \
222 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
223
224 #define LOAD_MSG_9_4(buf) \
225 t0 = _mm_blend_epi16(m3,m2,0xC0); \
226 t1 = _mm_unpacklo_epi32(m0,m3); \
227 t2 = _mm_blend_epi16(t0,t1,0x0F); \
228 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
229
230 #endif
231
OLDNEW
« no previous file with comments | « Modules/_blake2/impl/blake2s-load-sse2.h ('k') | Modules/_blake2/impl/blake2s-load-xop.h » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+