MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ripemd.cpp
1 /*
2  Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; see the file COPYING. If not, write to the
15  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
16  MA 02110-1301 USA.
17 */
18 
19 
20 /* based on Wei Dai's ripemd.cpp from CryptoPP */
21 
22 #include "runtime.hpp"
23 #include "ripemd.hpp"
24 #ifdef USE_SYS_STL
25  #include <algorithm>
26 #else
27  #include "algorithm.hpp"
28 #endif
29 
30 
31 namespace STL = STL_NAMESPACE;
32 
33 
34 
35 namespace TaoCrypt {
36 
37 void RIPEMD160::Init()
38 {
39  digest_[0] = 0x67452301L;
40  digest_[1] = 0xefcdab89L;
41  digest_[2] = 0x98badcfeL;
42  digest_[3] = 0x10325476L;
43  digest_[4] = 0xc3d2e1f0L;
44 
45  buffLen_ = 0;
46  loLen_ = 0;
47  hiLen_ = 0;
48 }
49 
50 
51 RIPEMD160::RIPEMD160(const RIPEMD160& that)
52  : HASHwithTransform(DIGEST_SIZE / sizeof(word32), BLOCK_SIZE)
53 {
54  buffLen_ = that.buffLen_;
55  loLen_ = that.loLen_;
56  hiLen_ = that.hiLen_;
57 
58  memcpy(digest_, that.digest_, DIGEST_SIZE);
59  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
60 }
61 
62 
63 RIPEMD160& RIPEMD160::operator= (const RIPEMD160& that)
64 {
65  RIPEMD160 tmp(that);
66  Swap(tmp);
67 
68  return *this;
69 }
70 
71 
72 void RIPEMD160::Swap(RIPEMD160& other)
73 {
74  STL::swap(loLen_, other.loLen_);
75  STL::swap(hiLen_, other.hiLen_);
76  STL::swap(buffLen_, other.buffLen_);
77 
78  memcpy(digest_, other.digest_, DIGEST_SIZE);
79  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
80 }
81 
82 
83 #ifdef DO_RIPEMD_ASM
84 
85 // Update digest with data of size len
86 void RIPEMD160::Update(const byte* data, word32 len)
87 {
88  if (!isMMX) {
89  HASHwithTransform::Update(data, len);
90  return;
91  }
92 
93  byte* local = reinterpret_cast<byte*>(buffer_);
94 
95  // remove buffered data if possible
96  if (buffLen_) {
97  word32 add = min(len, BLOCK_SIZE - buffLen_);
98  memcpy(&local[buffLen_], data, add);
99 
100  buffLen_ += add;
101  data += add;
102  len -= add;
103 
104  if (buffLen_ == BLOCK_SIZE) {
105  Transform();
106  AddLength(BLOCK_SIZE);
107  buffLen_ = 0;
108  }
109  }
110 
111  // all at once for asm
112  if (buffLen_ == 0) {
113  word32 times = len / BLOCK_SIZE;
114  if (times) {
115  AsmTransform(data, times);
116  const word32 add = BLOCK_SIZE * times;
117  AddLength(add);
118  len -= add;
119  data += add;
120  }
121  }
122 
123  // cache any data left
124  if (len) {
125  memcpy(&local[buffLen_], data, len);
126  buffLen_ += len;
127  }
128 }
129 
130 #endif // DO_RIPEMD_ASM
131 
132 
133 // for all
134 #define F(x, y, z) (x ^ y ^ z)
135 #define G(x, y, z) (z ^ (x & (y^z)))
136 #define H(x, y, z) (z ^ (x | ~y))
137 #define I(x, y, z) (y ^ (z & (x^y)))
138 #define J(x, y, z) (x ^ (y | ~z))
139 
140 #define k0 0
141 #define k1 0x5a827999
142 #define k2 0x6ed9eba1
143 #define k3 0x8f1bbcdc
144 #define k4 0xa953fd4e
145 #define k5 0x50a28be6
146 #define k6 0x5c4dd124
147 #define k7 0x6d703ef3
148 #define k8 0x7a6d76e9
149 #define k9 0
150 
151 // for 160 and 320
152 #define Subround(f, a, b, c, d, e, x, s, k) \
153  a += f(b, c, d) + x + k;\
154  a = rotlFixed((word32)a, s) + e;\
155  c = rotlFixed((word32)c, 10U)
156 
157 
158 void RIPEMD160::Transform()
159 {
160  unsigned long a1, b1, c1, d1, e1, a2, b2, c2, d2, e2;
161  a1 = a2 = digest_[0];
162  b1 = b2 = digest_[1];
163  c1 = c2 = digest_[2];
164  d1 = d2 = digest_[3];
165  e1 = e2 = digest_[4];
166 
167  Subround(F, a1, b1, c1, d1, e1, buffer_[ 0], 11, k0);
168  Subround(F, e1, a1, b1, c1, d1, buffer_[ 1], 14, k0);
169  Subround(F, d1, e1, a1, b1, c1, buffer_[ 2], 15, k0);
170  Subround(F, c1, d1, e1, a1, b1, buffer_[ 3], 12, k0);
171  Subround(F, b1, c1, d1, e1, a1, buffer_[ 4], 5, k0);
172  Subround(F, a1, b1, c1, d1, e1, buffer_[ 5], 8, k0);
173  Subround(F, e1, a1, b1, c1, d1, buffer_[ 6], 7, k0);
174  Subround(F, d1, e1, a1, b1, c1, buffer_[ 7], 9, k0);
175  Subround(F, c1, d1, e1, a1, b1, buffer_[ 8], 11, k0);
176  Subround(F, b1, c1, d1, e1, a1, buffer_[ 9], 13, k0);
177  Subround(F, a1, b1, c1, d1, e1, buffer_[10], 14, k0);
178  Subround(F, e1, a1, b1, c1, d1, buffer_[11], 15, k0);
179  Subround(F, d1, e1, a1, b1, c1, buffer_[12], 6, k0);
180  Subround(F, c1, d1, e1, a1, b1, buffer_[13], 7, k0);
181  Subround(F, b1, c1, d1, e1, a1, buffer_[14], 9, k0);
182  Subround(F, a1, b1, c1, d1, e1, buffer_[15], 8, k0);
183 
184  Subround(G, e1, a1, b1, c1, d1, buffer_[ 7], 7, k1);
185  Subround(G, d1, e1, a1, b1, c1, buffer_[ 4], 6, k1);
186  Subround(G, c1, d1, e1, a1, b1, buffer_[13], 8, k1);
187  Subround(G, b1, c1, d1, e1, a1, buffer_[ 1], 13, k1);
188  Subround(G, a1, b1, c1, d1, e1, buffer_[10], 11, k1);
189  Subround(G, e1, a1, b1, c1, d1, buffer_[ 6], 9, k1);
190  Subround(G, d1, e1, a1, b1, c1, buffer_[15], 7, k1);
191  Subround(G, c1, d1, e1, a1, b1, buffer_[ 3], 15, k1);
192  Subround(G, b1, c1, d1, e1, a1, buffer_[12], 7, k1);
193  Subround(G, a1, b1, c1, d1, e1, buffer_[ 0], 12, k1);
194  Subround(G, e1, a1, b1, c1, d1, buffer_[ 9], 15, k1);
195  Subround(G, d1, e1, a1, b1, c1, buffer_[ 5], 9, k1);
196  Subround(G, c1, d1, e1, a1, b1, buffer_[ 2], 11, k1);
197  Subround(G, b1, c1, d1, e1, a1, buffer_[14], 7, k1);
198  Subround(G, a1, b1, c1, d1, e1, buffer_[11], 13, k1);
199  Subround(G, e1, a1, b1, c1, d1, buffer_[ 8], 12, k1);
200 
201  Subround(H, d1, e1, a1, b1, c1, buffer_[ 3], 11, k2);
202  Subround(H, c1, d1, e1, a1, b1, buffer_[10], 13, k2);
203  Subround(H, b1, c1, d1, e1, a1, buffer_[14], 6, k2);
204  Subround(H, a1, b1, c1, d1, e1, buffer_[ 4], 7, k2);
205  Subround(H, e1, a1, b1, c1, d1, buffer_[ 9], 14, k2);
206  Subround(H, d1, e1, a1, b1, c1, buffer_[15], 9, k2);
207  Subround(H, c1, d1, e1, a1, b1, buffer_[ 8], 13, k2);
208  Subround(H, b1, c1, d1, e1, a1, buffer_[ 1], 15, k2);
209  Subround(H, a1, b1, c1, d1, e1, buffer_[ 2], 14, k2);
210  Subround(H, e1, a1, b1, c1, d1, buffer_[ 7], 8, k2);
211  Subround(H, d1, e1, a1, b1, c1, buffer_[ 0], 13, k2);
212  Subround(H, c1, d1, e1, a1, b1, buffer_[ 6], 6, k2);
213  Subround(H, b1, c1, d1, e1, a1, buffer_[13], 5, k2);
214  Subround(H, a1, b1, c1, d1, e1, buffer_[11], 12, k2);
215  Subround(H, e1, a1, b1, c1, d1, buffer_[ 5], 7, k2);
216  Subround(H, d1, e1, a1, b1, c1, buffer_[12], 5, k2);
217 
218  Subround(I, c1, d1, e1, a1, b1, buffer_[ 1], 11, k3);
219  Subround(I, b1, c1, d1, e1, a1, buffer_[ 9], 12, k3);
220  Subround(I, a1, b1, c1, d1, e1, buffer_[11], 14, k3);
221  Subround(I, e1, a1, b1, c1, d1, buffer_[10], 15, k3);
222  Subround(I, d1, e1, a1, b1, c1, buffer_[ 0], 14, k3);
223  Subround(I, c1, d1, e1, a1, b1, buffer_[ 8], 15, k3);
224  Subround(I, b1, c1, d1, e1, a1, buffer_[12], 9, k3);
225  Subround(I, a1, b1, c1, d1, e1, buffer_[ 4], 8, k3);
226  Subround(I, e1, a1, b1, c1, d1, buffer_[13], 9, k3);
227  Subround(I, d1, e1, a1, b1, c1, buffer_[ 3], 14, k3);
228  Subround(I, c1, d1, e1, a1, b1, buffer_[ 7], 5, k3);
229  Subround(I, b1, c1, d1, e1, a1, buffer_[15], 6, k3);
230  Subround(I, a1, b1, c1, d1, e1, buffer_[14], 8, k3);
231  Subround(I, e1, a1, b1, c1, d1, buffer_[ 5], 6, k3);
232  Subround(I, d1, e1, a1, b1, c1, buffer_[ 6], 5, k3);
233  Subround(I, c1, d1, e1, a1, b1, buffer_[ 2], 12, k3);
234 
235  Subround(J, b1, c1, d1, e1, a1, buffer_[ 4], 9, k4);
236  Subround(J, a1, b1, c1, d1, e1, buffer_[ 0], 15, k4);
237  Subround(J, e1, a1, b1, c1, d1, buffer_[ 5], 5, k4);
238  Subround(J, d1, e1, a1, b1, c1, buffer_[ 9], 11, k4);
239  Subround(J, c1, d1, e1, a1, b1, buffer_[ 7], 6, k4);
240  Subround(J, b1, c1, d1, e1, a1, buffer_[12], 8, k4);
241  Subround(J, a1, b1, c1, d1, e1, buffer_[ 2], 13, k4);
242  Subround(J, e1, a1, b1, c1, d1, buffer_[10], 12, k4);
243  Subround(J, d1, e1, a1, b1, c1, buffer_[14], 5, k4);
244  Subround(J, c1, d1, e1, a1, b1, buffer_[ 1], 12, k4);
245  Subround(J, b1, c1, d1, e1, a1, buffer_[ 3], 13, k4);
246  Subround(J, a1, b1, c1, d1, e1, buffer_[ 8], 14, k4);
247  Subround(J, e1, a1, b1, c1, d1, buffer_[11], 11, k4);
248  Subround(J, d1, e1, a1, b1, c1, buffer_[ 6], 8, k4);
249  Subround(J, c1, d1, e1, a1, b1, buffer_[15], 5, k4);
250  Subround(J, b1, c1, d1, e1, a1, buffer_[13], 6, k4);
251 
252  Subround(J, a2, b2, c2, d2, e2, buffer_[ 5], 8, k5);
253  Subround(J, e2, a2, b2, c2, d2, buffer_[14], 9, k5);
254  Subround(J, d2, e2, a2, b2, c2, buffer_[ 7], 9, k5);
255  Subround(J, c2, d2, e2, a2, b2, buffer_[ 0], 11, k5);
256  Subround(J, b2, c2, d2, e2, a2, buffer_[ 9], 13, k5);
257  Subround(J, a2, b2, c2, d2, e2, buffer_[ 2], 15, k5);
258  Subround(J, e2, a2, b2, c2, d2, buffer_[11], 15, k5);
259  Subround(J, d2, e2, a2, b2, c2, buffer_[ 4], 5, k5);
260  Subround(J, c2, d2, e2, a2, b2, buffer_[13], 7, k5);
261  Subround(J, b2, c2, d2, e2, a2, buffer_[ 6], 7, k5);
262  Subround(J, a2, b2, c2, d2, e2, buffer_[15], 8, k5);
263  Subround(J, e2, a2, b2, c2, d2, buffer_[ 8], 11, k5);
264  Subround(J, d2, e2, a2, b2, c2, buffer_[ 1], 14, k5);
265  Subround(J, c2, d2, e2, a2, b2, buffer_[10], 14, k5);
266  Subround(J, b2, c2, d2, e2, a2, buffer_[ 3], 12, k5);
267  Subround(J, a2, b2, c2, d2, e2, buffer_[12], 6, k5);
268 
269  Subround(I, e2, a2, b2, c2, d2, buffer_[ 6], 9, k6);
270  Subround(I, d2, e2, a2, b2, c2, buffer_[11], 13, k6);
271  Subround(I, c2, d2, e2, a2, b2, buffer_[ 3], 15, k6);
272  Subround(I, b2, c2, d2, e2, a2, buffer_[ 7], 7, k6);
273  Subround(I, a2, b2, c2, d2, e2, buffer_[ 0], 12, k6);
274  Subround(I, e2, a2, b2, c2, d2, buffer_[13], 8, k6);
275  Subround(I, d2, e2, a2, b2, c2, buffer_[ 5], 9, k6);
276  Subround(I, c2, d2, e2, a2, b2, buffer_[10], 11, k6);
277  Subround(I, b2, c2, d2, e2, a2, buffer_[14], 7, k6);
278  Subround(I, a2, b2, c2, d2, e2, buffer_[15], 7, k6);
279  Subround(I, e2, a2, b2, c2, d2, buffer_[ 8], 12, k6);
280  Subround(I, d2, e2, a2, b2, c2, buffer_[12], 7, k6);
281  Subround(I, c2, d2, e2, a2, b2, buffer_[ 4], 6, k6);
282  Subround(I, b2, c2, d2, e2, a2, buffer_[ 9], 15, k6);
283  Subround(I, a2, b2, c2, d2, e2, buffer_[ 1], 13, k6);
284  Subround(I, e2, a2, b2, c2, d2, buffer_[ 2], 11, k6);
285 
286  Subround(H, d2, e2, a2, b2, c2, buffer_[15], 9, k7);
287  Subround(H, c2, d2, e2, a2, b2, buffer_[ 5], 7, k7);
288  Subround(H, b2, c2, d2, e2, a2, buffer_[ 1], 15, k7);
289  Subround(H, a2, b2, c2, d2, e2, buffer_[ 3], 11, k7);
290  Subround(H, e2, a2, b2, c2, d2, buffer_[ 7], 8, k7);
291  Subround(H, d2, e2, a2, b2, c2, buffer_[14], 6, k7);
292  Subround(H, c2, d2, e2, a2, b2, buffer_[ 6], 6, k7);
293  Subround(H, b2, c2, d2, e2, a2, buffer_[ 9], 14, k7);
294  Subround(H, a2, b2, c2, d2, e2, buffer_[11], 12, k7);
295  Subround(H, e2, a2, b2, c2, d2, buffer_[ 8], 13, k7);
296  Subround(H, d2, e2, a2, b2, c2, buffer_[12], 5, k7);
297  Subround(H, c2, d2, e2, a2, b2, buffer_[ 2], 14, k7);
298  Subround(H, b2, c2, d2, e2, a2, buffer_[10], 13, k7);
299  Subround(H, a2, b2, c2, d2, e2, buffer_[ 0], 13, k7);
300  Subround(H, e2, a2, b2, c2, d2, buffer_[ 4], 7, k7);
301  Subround(H, d2, e2, a2, b2, c2, buffer_[13], 5, k7);
302 
303  Subround(G, c2, d2, e2, a2, b2, buffer_[ 8], 15, k8);
304  Subround(G, b2, c2, d2, e2, a2, buffer_[ 6], 5, k8);
305  Subround(G, a2, b2, c2, d2, e2, buffer_[ 4], 8, k8);
306  Subround(G, e2, a2, b2, c2, d2, buffer_[ 1], 11, k8);
307  Subround(G, d2, e2, a2, b2, c2, buffer_[ 3], 14, k8);
308  Subround(G, c2, d2, e2, a2, b2, buffer_[11], 14, k8);
309  Subround(G, b2, c2, d2, e2, a2, buffer_[15], 6, k8);
310  Subround(G, a2, b2, c2, d2, e2, buffer_[ 0], 14, k8);
311  Subround(G, e2, a2, b2, c2, d2, buffer_[ 5], 6, k8);
312  Subround(G, d2, e2, a2, b2, c2, buffer_[12], 9, k8);
313  Subround(G, c2, d2, e2, a2, b2, buffer_[ 2], 12, k8);
314  Subround(G, b2, c2, d2, e2, a2, buffer_[13], 9, k8);
315  Subround(G, a2, b2, c2, d2, e2, buffer_[ 9], 12, k8);
316  Subround(G, e2, a2, b2, c2, d2, buffer_[ 7], 5, k8);
317  Subround(G, d2, e2, a2, b2, c2, buffer_[10], 15, k8);
318  Subround(G, c2, d2, e2, a2, b2, buffer_[14], 8, k8);
319 
320  Subround(F, b2, c2, d2, e2, a2, buffer_[12], 8, k9);
321  Subround(F, a2, b2, c2, d2, e2, buffer_[15], 5, k9);
322  Subround(F, e2, a2, b2, c2, d2, buffer_[10], 12, k9);
323  Subround(F, d2, e2, a2, b2, c2, buffer_[ 4], 9, k9);
324  Subround(F, c2, d2, e2, a2, b2, buffer_[ 1], 12, k9);
325  Subround(F, b2, c2, d2, e2, a2, buffer_[ 5], 5, k9);
326  Subround(F, a2, b2, c2, d2, e2, buffer_[ 8], 14, k9);
327  Subround(F, e2, a2, b2, c2, d2, buffer_[ 7], 6, k9);
328  Subround(F, d2, e2, a2, b2, c2, buffer_[ 6], 8, k9);
329  Subround(F, c2, d2, e2, a2, b2, buffer_[ 2], 13, k9);
330  Subround(F, b2, c2, d2, e2, a2, buffer_[13], 6, k9);
331  Subround(F, a2, b2, c2, d2, e2, buffer_[14], 5, k9);
332  Subround(F, e2, a2, b2, c2, d2, buffer_[ 0], 15, k9);
333  Subround(F, d2, e2, a2, b2, c2, buffer_[ 3], 13, k9);
334  Subround(F, c2, d2, e2, a2, b2, buffer_[ 9], 11, k9);
335  Subround(F, b2, c2, d2, e2, a2, buffer_[11], 11, k9);
336 
337  c1 = digest_[1] + c1 + d2;
338  digest_[1] = digest_[2] + d1 + e2;
339  digest_[2] = digest_[3] + e1 + a2;
340  digest_[3] = digest_[4] + a1 + b2;
341  digest_[4] = digest_[0] + b1 + c2;
342  digest_[0] = c1;
343 }
344 
345 
346 #ifdef DO_RIPEMD_ASM
347 
348 /*
349  // F(x ^ y ^ z)
350  // place in esi
351 #define ASMF(x, y, z) \
352  AS2( mov esi, x ) \
353  AS2( xor esi, y ) \
354  AS2( xor esi, z )
355 
356 
357  // G(z ^ (x & (y^z)))
358  // place in esi
359 #define ASMG(x, y, z) \
360  AS2( mov esi, z ) \
361  AS2( xor esi, y ) \
362  AS2( and esi, x ) \
363  AS2( xor esi, z )
364 
365 
366  // H(z ^ (x | ~y))
367  // place in esi
368 #define ASMH(x, y, z) \
369  AS2( mov esi, y ) \
370  AS1( not esi ) \
371  AS2( or esi, x ) \
372  AS2( xor esi, z )
373 
374 
375  // I(y ^ (z & (x^y)))
376  // place in esi
377 #define ASMI(x, y, z) \
378  AS2( mov esi, y ) \
379  AS2( xor esi, x ) \
380  AS2( and esi, z ) \
381  AS2( xor esi, y )
382 
383 
384  // J(x ^ (y | ~z)))
385  // place in esi
386 #define ASMJ(x, y, z) \
387  AS2( mov esi, z ) \
388  AS1( not esi ) \
389  AS2( or esi, y ) \
390  AS2( xor esi, x )
391 
392 
393 // for 160 and 320
394 // #define ASMSubround(f, a, b, c, d, e, i, s, k)
395 // a += f(b, c, d) + data[i] + k;
396 // a = rotlFixed((word32)a, s) + e;
397 // c = rotlFixed((word32)c, 10U)
398 
399 #define ASMSubround(f, a, b, c, d, e, index, s, k) \
400  // a += f(b, c, d) + data[i] + k \
401  AS2( mov esp, [edi + index * 4] ) \
402  f(b, c, d) \
403  AS2( add esi, k ) \
404  AS2( add esi, esp ) \
405  AS2( add a, esi ) \
406  // a = rotlFixed((word32)a, s) + e \
407  AS2( rol a, s ) \
408  AS2( rol c, 10 ) \
409  // c = rotlFixed((word32)c, 10U) \
410  AS2( add a, e )
411 */
412 
413 
414 // combine F into subround w/ setup
415 // esi already has c, setup for next round when done
416 // esp already has edi[index], setup for next round when done
417 
418 #define ASMSubroundF(a, b, c, d, e, index, s) \
419  /* a += (b ^ c ^ d) + data[i] + k */ \
420  AS2( xor esi, b ) \
421  AS2( add a, [edi + index * 4] ) \
422  AS2( xor esi, d ) \
423  AS2( add a, esi ) \
424  /* a = rotlFixed((word32)a, s) + e */ \
425  AS2( mov esi, b ) \
426  AS2( rol a, s ) \
427  /* c = rotlFixed((word32)c, 10U) */ \
428  AS2( rol c, 10 ) \
429  AS2( add a, e )
430 
431 
432 // combine G into subround w/ setup
433 // esi already has c, setup for next round when done
434 // esp already has edi[index], setup for next round when done
435 
436 #define ASMSubroundG(a, b, c, d, e, index, s, k) \
437  /* a += (d ^ (b & (c^d))) + data[i] + k */ \
438  AS2( xor esi, d ) \
439  AS2( and esi, b ) \
440  AS2( add a, [edi + index * 4] ) \
441  AS2( xor esi, d ) \
442  AS2( lea a, [esi + a + k] ) \
443  /* a = rotlFixed((word32)a, s) + e */ \
444  AS2( mov esi, b ) \
445  AS2( rol a, s ) \
446  /* c = rotlFixed((word32)c, 10U) */ \
447  AS2( rol c, 10 ) \
448  AS2( add a, e )
449 
450 
451 // combine H into subround w/ setup
452 // esi already has c, setup for next round when done
453 // esp already has edi[index], setup for next round when done
454 
455 #define ASMSubroundH(a, b, c, d, e, index, s, k) \
456  /* a += (d ^ (b | ~c)) + data[i] + k */ \
457  AS1( not esi ) \
458  AS2( or esi, b ) \
459  AS2( add a, [edi + index * 4] ) \
460  AS2( xor esi, d ) \
461  AS2( lea a, [esi + a + k] ) \
462  /* a = rotlFixed((word32)a, s) + e */ \
463  AS2( mov esi, b ) \
464  AS2( rol a, s ) \
465  /* c = rotlFixed((word32)c, 10U) */ \
466  AS2( rol c, 10 ) \
467  AS2( add a, e )
468 
469 
470 // combine I into subround w/ setup
471 // esi already has c, setup for next round when done
472 // esp already has edi[index], setup for next round when done
473 
474 #define ASMSubroundI(a, b, c, d, e, index, s, k) \
475  /* a += (c ^ (d & (b^c))) + data[i] + k */ \
476  AS2( xor esi, b ) \
477  AS2( and esi, d ) \
478  AS2( add a, [edi + index * 4] ) \
479  AS2( xor esi, c ) \
480  AS2( lea a, [esi + a + k] ) \
481  /* a = rotlFixed((word32)a, s) + e */ \
482  AS2( mov esi, b ) \
483  AS2( rol a, s ) \
484  /* c = rotlFixed((word32)c, 10U) */ \
485  AS2( rol c, 10 ) \
486  AS2( add a, e )
487 
488 
489 // combine J into subround w/ setup
490 // esi already has d, setup for next round when done
491 // esp already has edi[index], setup for next round when done
492 
493 #define ASMSubroundJ(a, b, c, d, e, index, s, k) \
494  /* a += (b ^ (c | ~d))) + data[i] + k */ \
495  AS1( not esi ) \
496  AS2( or esi, c ) \
497  /* c = rotlFixed((word32)c, 10U) */ \
498  AS2( add a, [edi + index * 4] ) \
499  AS2( xor esi, b ) \
500  AS2( rol c, 10 ) \
501  AS2( lea a, [esi + a + k] ) \
502  /* a = rotlFixed((word32)a, s) + e */ \
503  AS2( rol a, s ) \
504  AS2( mov esi, c ) \
505  AS2( add a, e )
506 
507 
508 #ifdef _MSC_VER
509  __declspec(naked)
510 #endif
511 void RIPEMD160::AsmTransform(const byte* data, word32 times)
512 {
513 #ifdef __GNUC__
514  #define AS1(x) asm(#x);
515  #define AS2(x, y) asm(#x ", " #y);
516 
517  #define PROLOG() \
518  asm(".intel_syntax noprefix"); \
519  AS2( movd mm3, edi ) \
520  AS2( movd mm4, ebx ) \
521  AS2( movd mm5, esi ) \
522  AS2( movd mm6, ebp ) \
523  AS2( mov ecx, DWORD PTR [ebp + 8] ) \
524  AS2( mov edi, DWORD PTR [ebp + 12] ) \
525  AS2( mov edx, DWORD PTR [ebp + 16] )
526 
527  #define EPILOG() \
528  AS2( movd ebp, mm6 ) \
529  AS2( movd esi, mm5 ) \
530  AS2( movd ebx, mm4 ) \
531  AS2( mov esp, ebp ) \
532  AS2( movd edi, mm3 ) \
533  AS1( emms ) \
534  asm(".att_syntax");
535 #else
536  #define AS1(x) __asm x
537  #define AS2(x, y) __asm x, y
538 
539  #define PROLOG() \
540  AS1( push ebp ) \
541  AS2( mov ebp, esp ) \
542  AS2( movd mm3, edi ) \
543  AS2( movd mm4, ebx ) \
544  AS2( movd mm5, esi ) \
545  AS2( movd mm6, ebp ) \
546  AS2( mov edi, DWORD PTR [ebp + 8] ) \
547  AS2( mov edx, DWORD PTR [ebp + 12] )
548 
549  #define EPILOG() \
550  AS2( movd ebp, mm6 ) \
551  AS2( movd esi, mm5 ) \
552  AS2( movd ebx, mm4 ) \
553  AS2( movd edi, mm3 ) \
554  AS2( mov esp, ebp ) \
555  AS1( pop ebp ) \
556  AS1( emms ) \
557  AS1( ret 8 )
558 
559 #endif
560 
561  PROLOG()
562 
563  #ifdef OLD_GCC_OFFSET
564  AS2( lea esi, [ecx + 20] ) // digest_[0]
565  #else
566  AS2( lea esi, [ecx + 16] ) // digest_[0]
567  #endif
568 
569  AS2( sub esp, 24 ) // make room for tmp a1 - e1
570  AS2( movd mm1, esi ) // store digest_
571 
572 AS1( loopStart: )
573 
574  AS2( movd mm2, edx ) // store times_
575 
576  AS2( mov eax, [esi] ) // a1
577  AS2( mov ebx, [esi + 4] ) // b1
578  AS2( mov ecx, [esi + 8] ) // c1
579  AS2( mov edx, [esi + 12] ) // d1
580  AS2( mov ebp, [esi + 16] ) // e1
581 
582  // setup
583  AS2( mov esi, ecx )
584 
585  ASMSubroundF( eax, ebx, ecx, edx, ebp, 0, 11)
586  ASMSubroundF( ebp, eax, ebx, ecx, edx, 1, 14)
587  ASMSubroundF( edx, ebp, eax, ebx, ecx, 2, 15)
588  ASMSubroundF( ecx, edx, ebp, eax, ebx, 3, 12)
589  ASMSubroundF( ebx, ecx, edx, ebp, eax, 4, 5)
590  ASMSubroundF( eax, ebx, ecx, edx, ebp, 5, 8)
591  ASMSubroundF( ebp, eax, ebx, ecx, edx, 6, 7)
592  ASMSubroundF( edx, ebp, eax, ebx, ecx, 7, 9)
593  ASMSubroundF( ecx, edx, ebp, eax, ebx, 8, 11)
594  ASMSubroundF( ebx, ecx, edx, ebp, eax, 9, 13)
595  ASMSubroundF( eax, ebx, ecx, edx, ebp, 10, 14)
596  ASMSubroundF( ebp, eax, ebx, ecx, edx, 11, 15)
597  ASMSubroundF( edx, ebp, eax, ebx, ecx, 12, 6)
598  ASMSubroundF( ecx, edx, ebp, eax, ebx, 13, 7)
599  ASMSubroundF( ebx, ecx, edx, ebp, eax, 14, 9)
600  ASMSubroundF( eax, ebx, ecx, edx, ebp, 15, 8)
601 
602  ASMSubroundG( ebp, eax, ebx, ecx, edx, 7, 7, k1)
603  ASMSubroundG( edx, ebp, eax, ebx, ecx, 4, 6, k1)
604  ASMSubroundG( ecx, edx, ebp, eax, ebx, 13, 8, k1)
605  ASMSubroundG( ebx, ecx, edx, ebp, eax, 1, 13, k1)
606  ASMSubroundG( eax, ebx, ecx, edx, ebp, 10, 11, k1)
607  ASMSubroundG( ebp, eax, ebx, ecx, edx, 6, 9, k1)
608  ASMSubroundG( edx, ebp, eax, ebx, ecx, 15, 7, k1)
609  ASMSubroundG( ecx, edx, ebp, eax, ebx, 3, 15, k1)
610  ASMSubroundG( ebx, ecx, edx, ebp, eax, 12, 7, k1)
611  ASMSubroundG( eax, ebx, ecx, edx, ebp, 0, 12, k1)
612  ASMSubroundG( ebp, eax, ebx, ecx, edx, 9, 15, k1)
613  ASMSubroundG( edx, ebp, eax, ebx, ecx, 5, 9, k1)
614  ASMSubroundG( ecx, edx, ebp, eax, ebx, 2, 11, k1)
615  ASMSubroundG( ebx, ecx, edx, ebp, eax, 14, 7, k1)
616  ASMSubroundG( eax, ebx, ecx, edx, ebp, 11, 13, k1)
617  ASMSubroundG( ebp, eax, ebx, ecx, edx, 8, 12, k1)
618 
619  ASMSubroundH( edx, ebp, eax, ebx, ecx, 3, 11, k2)
620  ASMSubroundH( ecx, edx, ebp, eax, ebx, 10, 13, k2)
621  ASMSubroundH( ebx, ecx, edx, ebp, eax, 14, 6, k2)
622  ASMSubroundH( eax, ebx, ecx, edx, ebp, 4, 7, k2)
623  ASMSubroundH( ebp, eax, ebx, ecx, edx, 9, 14, k2)
624  ASMSubroundH( edx, ebp, eax, ebx, ecx, 15, 9, k2)
625  ASMSubroundH( ecx, edx, ebp, eax, ebx, 8, 13, k2)
626  ASMSubroundH( ebx, ecx, edx, ebp, eax, 1, 15, k2)
627  ASMSubroundH( eax, ebx, ecx, edx, ebp, 2, 14, k2)
628  ASMSubroundH( ebp, eax, ebx, ecx, edx, 7, 8, k2)
629  ASMSubroundH( edx, ebp, eax, ebx, ecx, 0, 13, k2)
630  ASMSubroundH( ecx, edx, ebp, eax, ebx, 6, 6, k2)
631  ASMSubroundH( ebx, ecx, edx, ebp, eax, 13, 5, k2)
632  ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k2)
633  ASMSubroundH( ebp, eax, ebx, ecx, edx, 5, 7, k2)
634  ASMSubroundH( edx, ebp, eax, ebx, ecx, 12, 5, k2)
635 
636  ASMSubroundI( ecx, edx, ebp, eax, ebx, 1, 11, k3)
637  ASMSubroundI( ebx, ecx, edx, ebp, eax, 9, 12, k3)
638  ASMSubroundI( eax, ebx, ecx, edx, ebp, 11, 14, k3)
639  ASMSubroundI( ebp, eax, ebx, ecx, edx, 10, 15, k3)
640  ASMSubroundI( edx, ebp, eax, ebx, ecx, 0, 14, k3)
641  ASMSubroundI( ecx, edx, ebp, eax, ebx, 8, 15, k3)
642  ASMSubroundI( ebx, ecx, edx, ebp, eax, 12, 9, k3)
643  ASMSubroundI( eax, ebx, ecx, edx, ebp, 4, 8, k3)
644  ASMSubroundI( ebp, eax, ebx, ecx, edx, 13, 9, k3)
645  ASMSubroundI( edx, ebp, eax, ebx, ecx, 3, 14, k3)
646  ASMSubroundI( ecx, edx, ebp, eax, ebx, 7, 5, k3)
647  ASMSubroundI( ebx, ecx, edx, ebp, eax, 15, 6, k3)
648  ASMSubroundI( eax, ebx, ecx, edx, ebp, 14, 8, k3)
649  ASMSubroundI( ebp, eax, ebx, ecx, edx, 5, 6, k3)
650  ASMSubroundI( edx, ebp, eax, ebx, ecx, 6, 5, k3)
651  ASMSubroundI( ecx, edx, ebp, eax, ebx, 2, 12, k3)
652 
653  // setup
654  AS2( mov esi, ebp )
655 
656  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 4, 9, k4)
657  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 0, 15, k4)
658  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 5, 5, k4)
659  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 9, 11, k4)
660  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 7, 6, k4)
661  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 12, 8, k4)
662  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 2, 13, k4)
663  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 10, 12, k4)
664  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 14, 5, k4)
665  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 1, 12, k4)
666  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 3, 13, k4)
667  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 8, 14, k4)
668  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 11, k4)
669  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 6, 8, k4)
670  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 15, 5, k4)
671  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 13, 6, k4)
672 
673  // store a1 - e1 on stack
674  AS2( movd esi, mm1 ) // digest_
675 
676  AS2( mov [esp], eax )
677  AS2( mov [esp + 4], ebx )
678  AS2( mov [esp + 8], ecx )
679  AS2( mov [esp + 12], edx )
680  AS2( mov [esp + 16], ebp )
681 
682  AS2( mov eax, [esi] ) // a2
683  AS2( mov ebx, [esi + 4] ) // b2
684  AS2( mov ecx, [esi + 8] ) // c2
685  AS2( mov edx, [esi + 12] ) // d2
686  AS2( mov ebp, [esi + 16] ) // e2
687 
688 
689  // setup
690  AS2( mov esi, edx )
691 
692  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 5, 8, k5)
693  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 14, 9, k5)
694  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 7, 9, k5)
695  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 0, 11, k5)
696  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 9, 13, k5)
697  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 2, 15, k5)
698  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 15, k5)
699  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 4, 5, k5)
700  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 13, 7, k5)
701  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 6, 7, k5)
702  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 15, 8, k5)
703  ASMSubroundJ( ebp, eax, ebx, ecx, edx, 8, 11, k5)
704  ASMSubroundJ( edx, ebp, eax, ebx, ecx, 1, 14, k5)
705  ASMSubroundJ( ecx, edx, ebp, eax, ebx, 10, 14, k5)
706  ASMSubroundJ( ebx, ecx, edx, ebp, eax, 3, 12, k5)
707  ASMSubroundJ( eax, ebx, ecx, edx, ebp, 12, 6, k5)
708 
709  // setup
710  AS2( mov esi, ebx )
711 
712  ASMSubroundI( ebp, eax, ebx, ecx, edx, 6, 9, k6)
713  ASMSubroundI( edx, ebp, eax, ebx, ecx, 11, 13, k6)
714  ASMSubroundI( ecx, edx, ebp, eax, ebx, 3, 15, k6)
715  ASMSubroundI( ebx, ecx, edx, ebp, eax, 7, 7, k6)
716  ASMSubroundI( eax, ebx, ecx, edx, ebp, 0, 12, k6)
717  ASMSubroundI( ebp, eax, ebx, ecx, edx, 13, 8, k6)
718  ASMSubroundI( edx, ebp, eax, ebx, ecx, 5, 9, k6)
719  ASMSubroundI( ecx, edx, ebp, eax, ebx, 10, 11, k6)
720  ASMSubroundI( ebx, ecx, edx, ebp, eax, 14, 7, k6)
721  ASMSubroundI( eax, ebx, ecx, edx, ebp, 15, 7, k6)
722  ASMSubroundI( ebp, eax, ebx, ecx, edx, 8, 12, k6)
723  ASMSubroundI( edx, ebp, eax, ebx, ecx, 12, 7, k6)
724  ASMSubroundI( ecx, edx, ebp, eax, ebx, 4, 6, k6)
725  ASMSubroundI( ebx, ecx, edx, ebp, eax, 9, 15, k6)
726  ASMSubroundI( eax, ebx, ecx, edx, ebp, 1, 13, k6)
727  ASMSubroundI( ebp, eax, ebx, ecx, edx, 2, 11, k6)
728 
729  ASMSubroundH( edx, ebp, eax, ebx, ecx, 15, 9, k7)
730  ASMSubroundH( ecx, edx, ebp, eax, ebx, 5, 7, k7)
731  ASMSubroundH( ebx, ecx, edx, ebp, eax, 1, 15, k7)
732  ASMSubroundH( eax, ebx, ecx, edx, ebp, 3, 11, k7)
733  ASMSubroundH( ebp, eax, ebx, ecx, edx, 7, 8, k7)
734  ASMSubroundH( edx, ebp, eax, ebx, ecx, 14, 6, k7)
735  ASMSubroundH( ecx, edx, ebp, eax, ebx, 6, 6, k7)
736  ASMSubroundH( ebx, ecx, edx, ebp, eax, 9, 14, k7)
737  ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k7)
738  ASMSubroundH( ebp, eax, ebx, ecx, edx, 8, 13, k7)
739  ASMSubroundH( edx, ebp, eax, ebx, ecx, 12, 5, k7)
740  ASMSubroundH( ecx, edx, ebp, eax, ebx, 2, 14, k7)
741  ASMSubroundH( ebx, ecx, edx, ebp, eax, 10, 13, k7)
742  ASMSubroundH( eax, ebx, ecx, edx, ebp, 0, 13, k7)
743  ASMSubroundH( ebp, eax, ebx, ecx, edx, 4, 7, k7)
744  ASMSubroundH( edx, ebp, eax, ebx, ecx, 13, 5, k7)
745 
746  ASMSubroundG( ecx, edx, ebp, eax, ebx, 8, 15, k8)
747  ASMSubroundG( ebx, ecx, edx, ebp, eax, 6, 5, k8)
748  ASMSubroundG( eax, ebx, ecx, edx, ebp, 4, 8, k8)
749  ASMSubroundG( ebp, eax, ebx, ecx, edx, 1, 11, k8)
750  ASMSubroundG( edx, ebp, eax, ebx, ecx, 3, 14, k8)
751  ASMSubroundG( ecx, edx, ebp, eax, ebx, 11, 14, k8)
752  ASMSubroundG( ebx, ecx, edx, ebp, eax, 15, 6, k8)
753  ASMSubroundG( eax, ebx, ecx, edx, ebp, 0, 14, k8)
754  ASMSubroundG( ebp, eax, ebx, ecx, edx, 5, 6, k8)
755  ASMSubroundG( edx, ebp, eax, ebx, ecx, 12, 9, k8)
756  ASMSubroundG( ecx, edx, ebp, eax, ebx, 2, 12, k8)
757  ASMSubroundG( ebx, ecx, edx, ebp, eax, 13, 9, k8)
758  ASMSubroundG( eax, ebx, ecx, edx, ebp, 9, 12, k8)
759  ASMSubroundG( ebp, eax, ebx, ecx, edx, 7, 5, k8)
760  ASMSubroundG( edx, ebp, eax, ebx, ecx, 10, 15, k8)
761  ASMSubroundG( ecx, edx, ebp, eax, ebx, 14, 8, k8)
762 
763  ASMSubroundF( ebx, ecx, edx, ebp, eax, 12, 8)
764  ASMSubroundF( eax, ebx, ecx, edx, ebp, 15, 5)
765  ASMSubroundF( ebp, eax, ebx, ecx, edx, 10, 12)
766  ASMSubroundF( edx, ebp, eax, ebx, ecx, 4, 9)
767  ASMSubroundF( ecx, edx, ebp, eax, ebx, 1, 12)
768  ASMSubroundF( ebx, ecx, edx, ebp, eax, 5, 5)
769  ASMSubroundF( eax, ebx, ecx, edx, ebp, 8, 14)
770  ASMSubroundF( ebp, eax, ebx, ecx, edx, 7, 6)
771  ASMSubroundF( edx, ebp, eax, ebx, ecx, 6, 8)
772  ASMSubroundF( ecx, edx, ebp, eax, ebx, 2, 13)
773  ASMSubroundF( ebx, ecx, edx, ebp, eax, 13, 6)
774  ASMSubroundF( eax, ebx, ecx, edx, ebp, 14, 5)
775  ASMSubroundF( ebp, eax, ebx, ecx, edx, 0, 15)
776  ASMSubroundF( edx, ebp, eax, ebx, ecx, 3, 13)
777  ASMSubroundF( ecx, edx, ebp, eax, ebx, 9, 11)
778  ASMSubroundF( ebx, ecx, edx, ebp, eax, 11, 11)
779 
780  // advance data and store for next round
781  AS2( add edi, 64 )
782  AS2( movd esi, mm1 ) // digest_
783  AS2( movd mm0, edi ) // store
784 
785  // now edi as tmp
786 
787  // c1 = digest_[1] + c1 + d2;
788  AS2( add [esp + 8], edx ) // + d2
789  AS2( mov edi, [esi + 4] ) // digest_[1]
790  AS2( add [esp + 8], edi )
791 
792  // digest_[1] = digest_[2] + d1 + e2;
793  AS2( mov [esi + 4], ebp ) // e2
794  AS2( mov edi, [esp + 12] ) // d1
795  AS2( add edi, [esi + 8] ) // digest_[2]
796  AS2( add [esi + 4], edi )
797 
798  // digest_[2] = digest_[3] + e1 + a2;
799  AS2( mov [esi + 8], eax ) // a2
800  AS2( mov edi, [esp + 16] ) // e1
801  AS2( add edi, [esi + 12] ) // digest_[3]
802  AS2( add [esi + 8], edi )
803 
804  // digest_[3] = digest_[4] + a1 + b2;
805  AS2( mov [esi + 12], ebx ) // b2
806  AS2( mov edi, [esp] ) // a1
807  AS2( add edi, [esi + 16] ) // digest_[4]
808  AS2( add [esi + 12], edi )
809 
810  // digest_[4] = digest_[0] + b1 + c2;
811  AS2( mov [esi + 16], ecx ) // c2
812  AS2( mov edi, [esp + 4] ) // b1
813  AS2( add edi, [esi] ) // digest_[0]
814  AS2( add [esi + 16], edi )
815 
816  // digest_[0] = c1;
817  AS2( mov edi, [esp + 8] ) // c1
818  AS2( mov [esi], edi )
819 
820  // setup for loop back
821  AS2( movd edx, mm2 ) // times
822  AS2( movd edi, mm0 ) // data, already advanced
823  AS1( dec edx )
824  AS1( jnz loopStart )
825 
826 
827  EPILOG()
828 }
829 
830 
831 #endif // DO_RIPEMD_ASM
832 
833 
834 } // namespace TaoCrypt