MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sha.cpp
1 /*
2  Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; see the file COPYING. If not, write to the
15  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
16  MA 02110-1301 USA.
17 */
18 
19 /* based on Wei Dai's sha.cpp from CryptoPP */
20 
21 #include "runtime.hpp"
22 #include <string.h>
23 #include "sha.hpp"
24 #ifdef USE_SYS_STL
25  #include <algorithm>
26 #else
27  #include "algorithm.hpp"
28 #endif
29 
30 
31 namespace STL = STL_NAMESPACE;
32 
33 
34 
35 namespace TaoCrypt {
36 
37 #define blk0(i) (W[i] = buffer_[i])
38 #define blk1(i) (W[i&15] = \
39  rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
40 
41 #define f1(x,y,z) (z^(x &(y^z)))
42 #define f2(x,y,z) (x^y^z)
43 #define f3(x,y,z) ((x&y)|(z&(x|y)))
44 #define f4(x,y,z) (x^y^z)
45 
46 // (R0+R1), R2, R3, R4 are the different operations used in SHA1
47 #define R0(v,w,x,y,z,i) z+= f1(w,x,y) + blk0(i) + 0x5A827999+ \
48  rotlFixed(v,5); w = rotlFixed(w,30);
49 #define R1(v,w,x,y,z,i) z+= f1(w,x,y) + blk1(i) + 0x5A827999+ \
50  rotlFixed(v,5); w = rotlFixed(w,30);
51 #define R2(v,w,x,y,z,i) z+= f2(w,x,y) + blk1(i) + 0x6ED9EBA1+ \
52  rotlFixed(v,5); w = rotlFixed(w,30);
53 #define R3(v,w,x,y,z,i) z+= f3(w,x,y) + blk1(i) + 0x8F1BBCDC+ \
54  rotlFixed(v,5); w = rotlFixed(w,30);
55 #define R4(v,w,x,y,z,i) z+= f4(w,x,y) + blk1(i) + 0xCA62C1D6+ \
56  rotlFixed(v,5); w = rotlFixed(w,30);
57 
58 
59 void SHA::Init()
60 {
61  digest_[0] = 0x67452301L;
62  digest_[1] = 0xEFCDAB89L;
63  digest_[2] = 0x98BADCFEL;
64  digest_[3] = 0x10325476L;
65  digest_[4] = 0xC3D2E1F0L;
66 
67  buffLen_ = 0;
68  loLen_ = 0;
69  hiLen_ = 0;
70 }
71 
72 void SHA256::Init()
73 {
74  digest_[0] = 0x6A09E667L;
75  digest_[1] = 0xBB67AE85L;
76  digest_[2] = 0x3C6EF372L;
77  digest_[3] = 0xA54FF53AL;
78  digest_[4] = 0x510E527FL;
79  digest_[5] = 0x9B05688CL;
80  digest_[6] = 0x1F83D9ABL;
81  digest_[7] = 0x5BE0CD19L;
82 
83  buffLen_ = 0;
84  loLen_ = 0;
85  hiLen_ = 0;
86 }
87 
88 
89 void SHA224::Init()
90 {
91  digest_[0] = 0xc1059ed8;
92  digest_[1] = 0x367cd507;
93  digest_[2] = 0x3070dd17;
94  digest_[3] = 0xf70e5939;
95  digest_[4] = 0xffc00b31;
96  digest_[5] = 0x68581511;
97  digest_[6] = 0x64f98fa7;
98  digest_[7] = 0xbefa4fa4;
99 
100  buffLen_ = 0;
101  loLen_ = 0;
102  hiLen_ = 0;
103 }
104 
105 
106 #ifdef WORD64_AVAILABLE
107 
108 void SHA512::Init()
109 {
110  digest_[0] = W64LIT(0x6a09e667f3bcc908);
111  digest_[1] = W64LIT(0xbb67ae8584caa73b);
112  digest_[2] = W64LIT(0x3c6ef372fe94f82b);
113  digest_[3] = W64LIT(0xa54ff53a5f1d36f1);
114  digest_[4] = W64LIT(0x510e527fade682d1);
115  digest_[5] = W64LIT(0x9b05688c2b3e6c1f);
116  digest_[6] = W64LIT(0x1f83d9abfb41bd6b);
117  digest_[7] = W64LIT(0x5be0cd19137e2179);
118 
119  buffLen_ = 0;
120  loLen_ = 0;
121  hiLen_ = 0;
122 }
123 
124 
125 void SHA384::Init()
126 {
127  digest_[0] = W64LIT(0xcbbb9d5dc1059ed8);
128  digest_[1] = W64LIT(0x629a292a367cd507);
129  digest_[2] = W64LIT(0x9159015a3070dd17);
130  digest_[3] = W64LIT(0x152fecd8f70e5939);
131  digest_[4] = W64LIT(0x67332667ffc00b31);
132  digest_[5] = W64LIT(0x8eb44a8768581511);
133  digest_[6] = W64LIT(0xdb0c2e0d64f98fa7);
134  digest_[7] = W64LIT(0x47b5481dbefa4fa4);
135 
136  buffLen_ = 0;
137  loLen_ = 0;
138  hiLen_ = 0;
139 }
140 
141 #endif // WORD64_AVAILABLE
142 
143 
144 SHA::SHA(const SHA& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
145  BLOCK_SIZE)
146 {
147  buffLen_ = that.buffLen_;
148  loLen_ = that.loLen_;
149  hiLen_ = that.hiLen_;
150 
151  memcpy(digest_, that.digest_, DIGEST_SIZE);
152  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
153 }
154 
155 
156 SHA256::SHA256(const SHA256& that) : HASHwithTransform(DIGEST_SIZE /
157  sizeof(word32), BLOCK_SIZE)
158 {
159  buffLen_ = that.buffLen_;
160  loLen_ = that.loLen_;
161  hiLen_ = that.hiLen_;
162 
163  memcpy(digest_, that.digest_, DIGEST_SIZE);
164  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
165 }
166 
167 
168 SHA224::SHA224(const SHA224& that) : HASHwithTransform(SHA256::DIGEST_SIZE /
169  sizeof(word32), BLOCK_SIZE)
170 {
171  buffLen_ = that.buffLen_;
172  loLen_ = that.loLen_;
173  hiLen_ = that.hiLen_;
174 
175  memcpy(digest_, that.digest_, DIGEST_SIZE);
176  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
177 }
178 
179 
180 #ifdef WORD64_AVAILABLE
181 
182 SHA512::SHA512(const SHA512& that) : HASH64withTransform(DIGEST_SIZE /
183  sizeof(word64), BLOCK_SIZE)
184 {
185  buffLen_ = that.buffLen_;
186  loLen_ = that.loLen_;
187  hiLen_ = that.hiLen_;
188 
189  memcpy(digest_, that.digest_, DIGEST_SIZE);
190  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
191 }
192 
193 
194 SHA384::SHA384(const SHA384& that) : HASH64withTransform(SHA512::DIGEST_SIZE /
195  sizeof(word64), BLOCK_SIZE)
196 {
197  buffLen_ = that.buffLen_;
198  loLen_ = that.loLen_;
199  hiLen_ = that.hiLen_;
200 
201  memcpy(digest_, that.digest_, DIGEST_SIZE);
202  memcpy(buffer_, that.buffer_, BLOCK_SIZE);
203 }
204 
205 #endif // WORD64_AVAILABLE
206 
207 
208 SHA& SHA::operator= (const SHA& that)
209 {
210  SHA tmp(that);
211  Swap(tmp);
212 
213  return *this;
214 }
215 
216 
217 SHA256& SHA256::operator= (const SHA256& that)
218 {
219  SHA256 tmp(that);
220  Swap(tmp);
221 
222  return *this;
223 }
224 
225 
226 SHA224& SHA224::operator= (const SHA224& that)
227 {
228  SHA224 tmp(that);
229  Swap(tmp);
230 
231  return *this;
232 }
233 
234 
235 #ifdef WORD64_AVAILABLE
236 
237 SHA512& SHA512::operator= (const SHA512& that)
238 {
239  SHA512 tmp(that);
240  Swap(tmp);
241 
242  return *this;
243 }
244 
245 
246 SHA384& SHA384::operator= (const SHA384& that)
247 {
248  SHA384 tmp(that);
249  Swap(tmp);
250 
251  return *this;
252 }
253 
254 #endif // WORD64_AVAILABLE
255 
256 
257 void SHA::Swap(SHA& other)
258 {
259  STL::swap(loLen_, other.loLen_);
260  STL::swap(hiLen_, other.hiLen_);
261  STL::swap(buffLen_, other.buffLen_);
262 
263  memcpy(digest_, other.digest_, DIGEST_SIZE);
264  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
265 }
266 
267 
268 void SHA256::Swap(SHA256& other)
269 {
270  STL::swap(loLen_, other.loLen_);
271  STL::swap(hiLen_, other.hiLen_);
272  STL::swap(buffLen_, other.buffLen_);
273 
274  memcpy(digest_, other.digest_, DIGEST_SIZE);
275  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
276 }
277 
278 
279 void SHA224::Swap(SHA224& other)
280 {
281  STL::swap(loLen_, other.loLen_);
282  STL::swap(hiLen_, other.hiLen_);
283  STL::swap(buffLen_, other.buffLen_);
284 
285  memcpy(digest_, other.digest_, DIGEST_SIZE);
286  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
287 }
288 
289 
290 #ifdef WORD64_AVAILABLE
291 
292 void SHA512::Swap(SHA512& other)
293 {
294  STL::swap(loLen_, other.loLen_);
295  STL::swap(hiLen_, other.hiLen_);
296  STL::swap(buffLen_, other.buffLen_);
297 
298  memcpy(digest_, other.digest_, DIGEST_SIZE);
299  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
300 }
301 
302 
303 void SHA384::Swap(SHA384& other)
304 {
305  STL::swap(loLen_, other.loLen_);
306  STL::swap(hiLen_, other.hiLen_);
307  STL::swap(buffLen_, other.buffLen_);
308 
309  memcpy(digest_, other.digest_, DIGEST_SIZE);
310  memcpy(buffer_, other.buffer_, BLOCK_SIZE);
311 }
312 
313 #endif // WORD64_AVIALABLE
314 
315 
316 #ifdef DO_SHA_ASM
317 
318 // Update digest with data of size len
319 void SHA::Update(const byte* data, word32 len)
320 {
321  if (!isMMX) {
322  HASHwithTransform::Update(data, len);
323  return;
324  }
325 
326  byte* local = reinterpret_cast<byte*>(buffer_);
327 
328  // remove buffered data if possible
329  if (buffLen_) {
330  word32 add = min(len, BLOCK_SIZE - buffLen_);
331  memcpy(&local[buffLen_], data, add);
332 
333  buffLen_ += add;
334  data += add;
335  len -= add;
336 
337  if (buffLen_ == BLOCK_SIZE) {
338  ByteReverse(local, local, BLOCK_SIZE);
339  Transform();
340  AddLength(BLOCK_SIZE);
341  buffLen_ = 0;
342  }
343  }
344 
345  // all at once for asm
346  if (buffLen_ == 0) {
347  word32 times = len / BLOCK_SIZE;
348  if (times) {
349  AsmTransform(data, times);
350  const word32 add = BLOCK_SIZE * times;
351  AddLength(add);
352  len -= add;
353  data += add;
354  }
355  }
356 
357  // cache any data left
358  if (len) {
359  memcpy(&local[buffLen_], data, len);
360  buffLen_ += len;
361  }
362 }
363 
364 #endif // DO_SHA_ASM
365 
366 
367 void SHA::Transform()
368 {
369  word32 W[BLOCK_SIZE / sizeof(word32)];
370 
371  // Copy context->state[] to working vars
372  word32 a = digest_[0];
373  word32 b = digest_[1];
374  word32 c = digest_[2];
375  word32 d = digest_[3];
376  word32 e = digest_[4];
377 
378  // 4 rounds of 20 operations each. Loop unrolled.
379  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
380  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
381  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
382  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
383 
384  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
385 
386  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
387  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
388  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
389  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
390  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
391 
392  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
393  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
394  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
395  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
396  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
397 
398  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
399  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
400  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
401  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
402  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
403 
404  // Add the working vars back into digest state[]
405  digest_[0] += a;
406  digest_[1] += b;
407  digest_[2] += c;
408  digest_[3] += d;
409  digest_[4] += e;
410 
411  // Wipe variables
412  a = b = c = d = e = 0;
413  memset(W, 0, sizeof(W));
414 }
415 
416 
417 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
418 
419 #define Ch(x,y,z) (z^(x&(y^z)))
420 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
421 
422 #define a(i) T[(0-i)&7]
423 #define b(i) T[(1-i)&7]
424 #define c(i) T[(2-i)&7]
425 #define d(i) T[(3-i)&7]
426 #define e(i) T[(4-i)&7]
427 #define f(i) T[(5-i)&7]
428 #define g(i) T[(6-i)&7]
429 #define h(i) T[(7-i)&7]
430 
431 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
432  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
433 
434 // for SHA256
435 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
436 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
437 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
438 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
439 
440 
441 static const word32 K256[64] = {
442  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
443  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
444  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
445  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
446  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
447  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
448  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
449  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
450  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
451  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
452  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
453  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
454  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
455  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
456  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
457  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
458 };
459 
460 
461 static void Transform256(word32* digest_, word32* buffer_)
462 {
463  const word32* K = K256;
464 
465  word32 W[16];
466  word32 T[8];
467 
468  // Copy digest to working vars
469  memcpy(T, digest_, sizeof(T));
470 
471  // 64 operations, partially loop unrolled
472  for (unsigned int j = 0; j < 64; j += 16) {
473  R( 0); R( 1); R( 2); R( 3);
474  R( 4); R( 5); R( 6); R( 7);
475  R( 8); R( 9); R(10); R(11);
476  R(12); R(13); R(14); R(15);
477  }
478 
479  // Add the working vars back into digest
480  digest_[0] += a(0);
481  digest_[1] += b(0);
482  digest_[2] += c(0);
483  digest_[3] += d(0);
484  digest_[4] += e(0);
485  digest_[5] += f(0);
486  digest_[6] += g(0);
487  digest_[7] += h(0);
488 
489  // Wipe variables
490  memset(W, 0, sizeof(W));
491  memset(T, 0, sizeof(T));
492 }
493 
494 
495 // undef for 256
496 #undef S0
497 #undef S1
498 #undef s0
499 #undef s1
500 
501 
502 void SHA256::Transform()
503 {
504  Transform256(digest_, buffer_);
505 }
506 
507 
508 void SHA224::Transform()
509 {
510  Transform256(digest_, buffer_);
511 }
512 
513 
514 #ifdef WORD64_AVAILABLE
515 
516 static const word64 K512[80] = {
517  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
518  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
519  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
520  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
521  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
522  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
523  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
524  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
525  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
526  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
527  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
528  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
529  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
530  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
531  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
532  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
533  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
534  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
535  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
536  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
537  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
538  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
539  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
540  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
541  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
542  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
543  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
544  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
545  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
546  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
547  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
548  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
549  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
550  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
551  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
552  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
553  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
554  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
555  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
556  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
557 };
558 
559 
560 // for SHA512
561 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
562 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
563 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
564 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
565 
566 
567 static void Transform512(word64* digest_, word64* buffer_)
568 {
569  const word64* K = K512;
570 
571  word64 W[16];
572  word64 T[8];
573 
574  // Copy digest to working vars
575  memcpy(T, digest_, sizeof(T));
576 
577  // 64 operations, partially loop unrolled
578  for (unsigned int j = 0; j < 80; j += 16) {
579  R( 0); R( 1); R( 2); R( 3);
580  R( 4); R( 5); R( 6); R( 7);
581  R( 8); R( 9); R(10); R(11);
582  R(12); R(13); R(14); R(15);
583  }
584 
585  // Add the working vars back into digest
586 
587  digest_[0] += a(0);
588  digest_[1] += b(0);
589  digest_[2] += c(0);
590  digest_[3] += d(0);
591  digest_[4] += e(0);
592  digest_[5] += f(0);
593  digest_[6] += g(0);
594  digest_[7] += h(0);
595 
596  // Wipe variables
597  memset(W, 0, sizeof(W));
598  memset(T, 0, sizeof(T));
599 }
600 
601 
602 void SHA512::Transform()
603 {
604  Transform512(digest_, buffer_);
605 }
606 
607 
608 void SHA384::Transform()
609 {
610  Transform512(digest_, buffer_);
611 }
612 
613 #endif // WORD64_AVIALABLE
614 
615 
616 #ifdef DO_SHA_ASM
617 
618 // f1(x,y,z) (z^(x &(y^z)))
619 // place in esi
620 #define ASMf1(x,y,z) \
621  AS2( mov esi, y ) \
622  AS2( xor esi, z ) \
623  AS2( and esi, x ) \
624  AS2( xor esi, z )
625 
626 
627 // R0(v,w,x,y,z,i) =
628 // z+= f1(w,x,y) + W[i] + 0x5A827999 + rotlFixed(v,5);
629 // w = rotlFixed(w,30);
630 
631 // use esi for f
632 // use edi as tmp
633 
634 
635 #define ASMR0(v,w,x,y,z,i) \
636  AS2( mov esi, x ) \
637  AS2( mov edi, [esp + i * 4] ) \
638  AS2( xor esi, y ) \
639  AS2( and esi, w ) \
640  AS2( lea z, [edi + z + 0x5A827999] ) \
641  AS2( mov edi, v ) \
642  AS2( xor esi, y ) \
643  AS2( rol edi, 5 ) \
644  AS2( add z, esi ) \
645  AS2( rol w, 30 ) \
646  AS2( add z, edi )
647 
648 
649 /* Some macro stuff, but older gas ( < 2,16 ) can't process &, so do by hand
650  % won't work on gas at all
651 
652 #define xstr(s) str(s)
653 #define str(s) #s
654 
655 #define WOFF1(a) ( a & 15)
656 #define WOFF2(a) ((a + 2) & 15)
657 #define WOFF3(a) ((a + 8) & 15)
658 #define WOFF4(a) ((a + 13) & 15)
659 
660 #ifdef __GNUC__
661  #define WGET1(i) asm("mov esp, [edi - "xstr(WOFF1(i))" * 4] ");
662  #define WGET2(i) asm("xor esp, [edi - "xstr(WOFF2(i))" * 4] ");
663  #define WGET3(i) asm("xor esp, [edi - "xstr(WOFF3(i))" * 4] ");
664  #define WGET4(i) asm("xor esp, [edi - "xstr(WOFF4(i))" * 4] ");
665  #define WPUT1(i) asm("mov [edi - "xstr(WOFF1(i))" * 4], esp ");
666 #else
667  #define WGET1(i) AS2( mov esp, [edi - WOFF1(i) * 4] )
668  #define WGET2(i) AS2( xor esp, [edi - WOFF2(i) * 4] )
669  #define WGET3(i) AS2( xor esp, [edi - WOFF3(i) * 4] )
670  #define WGET4(i) AS2( xor esp, [edi - WOFF4(i) * 4] )
671  #define WPUT1(i) AS2( mov [edi - WOFF1(i) * 4], esp )
672 #endif
673 */
674 
675 // ASMR1 = ASMR0 but use esp for W calcs
676 
677 #define ASMR1(v,w,x,y,z,i,W1,W2,W3,W4) \
678  AS2( mov edi, [esp + W1 * 4] ) \
679  AS2( mov esi, x ) \
680  AS2( xor edi, [esp + W2 * 4] ) \
681  AS2( xor esi, y ) \
682  AS2( xor edi, [esp + W3 * 4] ) \
683  AS2( and esi, w ) \
684  AS2( xor edi, [esp + W4 * 4] ) \
685  AS2( rol edi, 1 ) \
686  AS2( xor esi, y ) \
687  AS2( mov [esp + W1 * 4], edi ) \
688  AS2( lea z, [edi + z + 0x5A827999] ) \
689  AS2( mov edi, v ) \
690  AS2( rol edi, 5 ) \
691  AS2( add z, esi ) \
692  AS2( rol w, 30 ) \
693  AS2( add z, edi )
694 
695 
696 // ASMR2 = ASMR1 but f is xor, xor instead
697 
698 #define ASMR2(v,w,x,y,z,i,W1,W2,W3,W4) \
699  AS2( mov edi, [esp + W1 * 4] ) \
700  AS2( mov esi, x ) \
701  AS2( xor edi, [esp + W2 * 4] ) \
702  AS2( xor esi, y ) \
703  AS2( xor edi, [esp + W3 * 4] ) \
704  AS2( xor esi, w ) \
705  AS2( xor edi, [esp + W4 * 4] ) \
706  AS2( rol edi, 1 ) \
707  AS2( add z, esi ) \
708  AS2( mov [esp + W1 * 4], edi ) \
709  AS2( lea z, [edi + z + 0x6ED9EBA1] ) \
710  AS2( mov edi, v ) \
711  AS2( rol edi, 5 ) \
712  AS2( rol w, 30 ) \
713  AS2( add z, edi )
714 
715 
716 // ASMR3 = ASMR2 but f is (x&y)|(z&(x|y))
717 // which is (w&x)|(y&(w|x))
718 
719 #define ASMR3(v,w,x,y,z,i,W1,W2,W3,W4) \
720  AS2( mov edi, [esp + W1 * 4] ) \
721  AS2( mov esi, x ) \
722  AS2( xor edi, [esp + W2 * 4] ) \
723  AS2( or esi, w ) \
724  AS2( xor edi, [esp + W3 * 4] ) \
725  AS2( and esi, y ) \
726  AS2( xor edi, [esp + W4 * 4] ) \
727  AS2( movd mm0, esi ) \
728  AS2( rol edi, 1 ) \
729  AS2( mov esi, x ) \
730  AS2( mov [esp + W1 * 4], edi ) \
731  AS2( and esi, w ) \
732  AS2( lea z, [edi + z + 0x8F1BBCDC] ) \
733  AS2( movd edi, mm0 ) \
734  AS2( or esi, edi ) \
735  AS2( mov edi, v ) \
736  AS2( rol edi, 5 ) \
737  AS2( add z, esi ) \
738  AS2( rol w, 30 ) \
739  AS2( add z, edi )
740 
741 
742 // ASMR4 = ASMR2 but different constant
743 
744 #define ASMR4(v,w,x,y,z,i,W1,W2,W3,W4) \
745  AS2( mov edi, [esp + W1 * 4] ) \
746  AS2( mov esi, x ) \
747  AS2( xor edi, [esp + W2 * 4] ) \
748  AS2( xor esi, y ) \
749  AS2( xor edi, [esp + W3 * 4] ) \
750  AS2( xor esi, w ) \
751  AS2( xor edi, [esp + W4 * 4] ) \
752  AS2( rol edi, 1 ) \
753  AS2( add z, esi ) \
754  AS2( mov [esp + W1 * 4], edi ) \
755  AS2( lea z, [edi + z + 0xCA62C1D6] ) \
756  AS2( mov edi, v ) \
757  AS2( rol edi, 5 ) \
758  AS2( rol w, 30 ) \
759  AS2( add z, edi )
760 
761 
762 #ifdef _MSC_VER
763  __declspec(naked)
764 #endif
765 void SHA::AsmTransform(const byte* data, word32 times)
766 {
767 #ifdef __GNUC__
768  #define AS1(x) asm(#x);
769  #define AS2(x, y) asm(#x ", " #y);
770 
771  #define PROLOG() \
772  asm(".intel_syntax noprefix"); \
773  AS2( movd mm3, edi ) \
774  AS2( movd mm4, ebx ) \
775  AS2( movd mm5, esi ) \
776  AS2( movd mm6, ebp ) \
777  AS2( mov ecx, DWORD PTR [ebp + 8] ) \
778  AS2( mov edi, DWORD PTR [ebp + 12] ) \
779  AS2( mov eax, DWORD PTR [ebp + 16] )
780 
781  #define EPILOG() \
782  AS2( movd ebp, mm6 ) \
783  AS2( movd esi, mm5 ) \
784  AS2( movd ebx, mm4 ) \
785  AS2( mov esp, ebp ) \
786  AS2( movd edi, mm3 ) \
787  AS1( emms ) \
788  asm(".att_syntax");
789 #else
790  #define AS1(x) __asm x
791  #define AS2(x, y) __asm x, y
792 
793  #define PROLOG() \
794  AS1( push ebp ) \
795  AS2( mov ebp, esp ) \
796  AS2( movd mm3, edi ) \
797  AS2( movd mm4, ebx ) \
798  AS2( movd mm5, esi ) \
799  AS2( movd mm6, ebp ) \
800  AS2( mov edi, data ) \
801  AS2( mov eax, times )
802 
803  #define EPILOG() \
804  AS2( movd ebp, mm6 ) \
805  AS2( movd esi, mm5 ) \
806  AS2( movd ebx, mm4 ) \
807  AS2( movd edi, mm3 ) \
808  AS2( mov esp, ebp ) \
809  AS1( pop ebp ) \
810  AS1( emms ) \
811  AS1( ret 8 )
812 #endif
813 
814  PROLOG()
815 
816  AS2( mov esi, ecx )
817 
818  #ifdef OLD_GCC_OFFSET
819  AS2( add esi, 20 ) // digest_[0]
820  #else
821  AS2( add esi, 16 ) // digest_[0]
822  #endif
823 
824  AS2( movd mm2, eax ) // store times_
825  AS2( movd mm1, esi ) // store digest_
826 
827  AS2( sub esp, 68 ) // make room on stack
828 
829 AS1( loopStart: )
830 
831  // byte reverse 16 words of input, 4 at a time, put on stack for W[]
832 
833  // part 1
834  AS2( mov eax, [edi] )
835  AS2( mov ebx, [edi + 4] )
836  AS2( mov ecx, [edi + 8] )
837  AS2( mov edx, [edi + 12] )
838 
839  AS1( bswap eax )
840  AS1( bswap ebx )
841  AS1( bswap ecx )
842  AS1( bswap edx )
843 
844  AS2( mov [esp], eax )
845  AS2( mov [esp + 4], ebx )
846  AS2( mov [esp + 8], ecx )
847  AS2( mov [esp + 12], edx )
848 
849  // part 2
850  AS2( mov eax, [edi + 16] )
851  AS2( mov ebx, [edi + 20] )
852  AS2( mov ecx, [edi + 24] )
853  AS2( mov edx, [edi + 28] )
854 
855  AS1( bswap eax )
856  AS1( bswap ebx )
857  AS1( bswap ecx )
858  AS1( bswap edx )
859 
860  AS2( mov [esp + 16], eax )
861  AS2( mov [esp + 20], ebx )
862  AS2( mov [esp + 24], ecx )
863  AS2( mov [esp + 28], edx )
864 
865 
866  // part 3
867  AS2( mov eax, [edi + 32] )
868  AS2( mov ebx, [edi + 36] )
869  AS2( mov ecx, [edi + 40] )
870  AS2( mov edx, [edi + 44] )
871 
872  AS1( bswap eax )
873  AS1( bswap ebx )
874  AS1( bswap ecx )
875  AS1( bswap edx )
876 
877  AS2( mov [esp + 32], eax )
878  AS2( mov [esp + 36], ebx )
879  AS2( mov [esp + 40], ecx )
880  AS2( mov [esp + 44], edx )
881 
882 
883  // part 4
884  AS2( mov eax, [edi + 48] )
885  AS2( mov ebx, [edi + 52] )
886  AS2( mov ecx, [edi + 56] )
887  AS2( mov edx, [edi + 60] )
888 
889  AS1( bswap eax )
890  AS1( bswap ebx )
891  AS1( bswap ecx )
892  AS1( bswap edx )
893 
894  AS2( mov [esp + 48], eax )
895  AS2( mov [esp + 52], ebx )
896  AS2( mov [esp + 56], ecx )
897  AS2( mov [esp + 60], edx )
898 
899  AS2( mov [esp + 64], edi ) // store edi for end
900 
901  // read from digest_
902  AS2( mov eax, [esi] ) // a1
903  AS2( mov ebx, [esi + 4] ) // b1
904  AS2( mov ecx, [esi + 8] ) // c1
905  AS2( mov edx, [esi + 12] ) // d1
906  AS2( mov ebp, [esi + 16] ) // e1
907 
908 
909  ASMR0(eax, ebx, ecx, edx, ebp, 0)
910  ASMR0(ebp, eax, ebx, ecx, edx, 1)
911  ASMR0(edx, ebp, eax, ebx, ecx, 2)
912  ASMR0(ecx, edx, ebp, eax, ebx, 3)
913  ASMR0(ebx, ecx, edx, ebp, eax, 4)
914  ASMR0(eax, ebx, ecx, edx, ebp, 5)
915  ASMR0(ebp, eax, ebx, ecx, edx, 6)
916  ASMR0(edx, ebp, eax, ebx, ecx, 7)
917  ASMR0(ecx, edx, ebp, eax, ebx, 8)
918  ASMR0(ebx, ecx, edx, ebp, eax, 9)
919  ASMR0(eax, ebx, ecx, edx, ebp, 10)
920  ASMR0(ebp, eax, ebx, ecx, edx, 11)
921  ASMR0(edx, ebp, eax, ebx, ecx, 12)
922  ASMR0(ecx, edx, ebp, eax, ebx, 13)
923  ASMR0(ebx, ecx, edx, ebp, eax, 14)
924  ASMR0(eax, ebx, ecx, edx, ebp, 15)
925 
926  ASMR1(ebp, eax, ebx, ecx, edx, 16, 0, 2, 8, 13)
927  ASMR1(edx, ebp, eax, ebx, ecx, 17, 1, 3, 9, 14)
928  ASMR1(ecx, edx, ebp, eax, ebx, 18, 2, 4, 10, 15)
929  ASMR1(ebx, ecx, edx, ebp, eax, 19, 3, 5, 11, 0)
930 
931  ASMR2(eax, ebx, ecx, edx, ebp, 20, 4, 6, 12, 1)
932  ASMR2(ebp, eax, ebx, ecx, edx, 21, 5, 7, 13, 2)
933  ASMR2(edx, ebp, eax, ebx, ecx, 22, 6, 8, 14, 3)
934  ASMR2(ecx, edx, ebp, eax, ebx, 23, 7, 9, 15, 4)
935  ASMR2(ebx, ecx, edx, ebp, eax, 24, 8, 10, 0, 5)
936  ASMR2(eax, ebx, ecx, edx, ebp, 25, 9, 11, 1, 6)
937  ASMR2(ebp, eax, ebx, ecx, edx, 26, 10, 12, 2, 7)
938  ASMR2(edx, ebp, eax, ebx, ecx, 27, 11, 13, 3, 8)
939  ASMR2(ecx, edx, ebp, eax, ebx, 28, 12, 14, 4, 9)
940  ASMR2(ebx, ecx, edx, ebp, eax, 29, 13, 15, 5, 10)
941  ASMR2(eax, ebx, ecx, edx, ebp, 30, 14, 0, 6, 11)
942  ASMR2(ebp, eax, ebx, ecx, edx, 31, 15, 1, 7, 12)
943  ASMR2(edx, ebp, eax, ebx, ecx, 32, 0, 2, 8, 13)
944  ASMR2(ecx, edx, ebp, eax, ebx, 33, 1, 3, 9, 14)
945  ASMR2(ebx, ecx, edx, ebp, eax, 34, 2, 4, 10, 15)
946  ASMR2(eax, ebx, ecx, edx, ebp, 35, 3, 5, 11, 0)
947  ASMR2(ebp, eax, ebx, ecx, edx, 36, 4, 6, 12, 1)
948  ASMR2(edx, ebp, eax, ebx, ecx, 37, 5, 7, 13, 2)
949  ASMR2(ecx, edx, ebp, eax, ebx, 38, 6, 8, 14, 3)
950  ASMR2(ebx, ecx, edx, ebp, eax, 39, 7, 9, 15, 4)
951 
952 
953  ASMR3(eax, ebx, ecx, edx, ebp, 40, 8, 10, 0, 5)
954  ASMR3(ebp, eax, ebx, ecx, edx, 41, 9, 11, 1, 6)
955  ASMR3(edx, ebp, eax, ebx, ecx, 42, 10, 12, 2, 7)
956  ASMR3(ecx, edx, ebp, eax, ebx, 43, 11, 13, 3, 8)
957  ASMR3(ebx, ecx, edx, ebp, eax, 44, 12, 14, 4, 9)
958  ASMR3(eax, ebx, ecx, edx, ebp, 45, 13, 15, 5, 10)
959  ASMR3(ebp, eax, ebx, ecx, edx, 46, 14, 0, 6, 11)
960  ASMR3(edx, ebp, eax, ebx, ecx, 47, 15, 1, 7, 12)
961  ASMR3(ecx, edx, ebp, eax, ebx, 48, 0, 2, 8, 13)
962  ASMR3(ebx, ecx, edx, ebp, eax, 49, 1, 3, 9, 14)
963  ASMR3(eax, ebx, ecx, edx, ebp, 50, 2, 4, 10, 15)
964  ASMR3(ebp, eax, ebx, ecx, edx, 51, 3, 5, 11, 0)
965  ASMR3(edx, ebp, eax, ebx, ecx, 52, 4, 6, 12, 1)
966  ASMR3(ecx, edx, ebp, eax, ebx, 53, 5, 7, 13, 2)
967  ASMR3(ebx, ecx, edx, ebp, eax, 54, 6, 8, 14, 3)
968  ASMR3(eax, ebx, ecx, edx, ebp, 55, 7, 9, 15, 4)
969  ASMR3(ebp, eax, ebx, ecx, edx, 56, 8, 10, 0, 5)
970  ASMR3(edx, ebp, eax, ebx, ecx, 57, 9, 11, 1, 6)
971  ASMR3(ecx, edx, ebp, eax, ebx, 58, 10, 12, 2, 7)
972  ASMR3(ebx, ecx, edx, ebp, eax, 59, 11, 13, 3, 8)
973 
974  ASMR4(eax, ebx, ecx, edx, ebp, 60, 12, 14, 4, 9)
975  ASMR4(ebp, eax, ebx, ecx, edx, 61, 13, 15, 5, 10)
976  ASMR4(edx, ebp, eax, ebx, ecx, 62, 14, 0, 6, 11)
977  ASMR4(ecx, edx, ebp, eax, ebx, 63, 15, 1, 7, 12)
978  ASMR4(ebx, ecx, edx, ebp, eax, 64, 0, 2, 8, 13)
979  ASMR4(eax, ebx, ecx, edx, ebp, 65, 1, 3, 9, 14)
980  ASMR4(ebp, eax, ebx, ecx, edx, 66, 2, 4, 10, 15)
981  ASMR4(edx, ebp, eax, ebx, ecx, 67, 3, 5, 11, 0)
982  ASMR4(ecx, edx, ebp, eax, ebx, 68, 4, 6, 12, 1)
983  ASMR4(ebx, ecx, edx, ebp, eax, 69, 5, 7, 13, 2)
984  ASMR4(eax, ebx, ecx, edx, ebp, 70, 6, 8, 14, 3)
985  ASMR4(ebp, eax, ebx, ecx, edx, 71, 7, 9, 15, 4)
986  ASMR4(edx, ebp, eax, ebx, ecx, 72, 8, 10, 0, 5)
987  ASMR4(ecx, edx, ebp, eax, ebx, 73, 9, 11, 1, 6)
988  ASMR4(ebx, ecx, edx, ebp, eax, 74, 10, 12, 2, 7)
989  ASMR4(eax, ebx, ecx, edx, ebp, 75, 11, 13, 3, 8)
990  ASMR4(ebp, eax, ebx, ecx, edx, 76, 12, 14, 4, 9)
991  ASMR4(edx, ebp, eax, ebx, ecx, 77, 13, 15, 5, 10)
992  ASMR4(ecx, edx, ebp, eax, ebx, 78, 14, 0, 6, 11)
993  ASMR4(ebx, ecx, edx, ebp, eax, 79, 15, 1, 7, 12)
994 
995 
996  AS2( movd esi, mm1 ) // digest_
997 
998  AS2( add [esi], eax ) // write out
999  AS2( add [esi + 4], ebx )
1000  AS2( add [esi + 8], ecx )
1001  AS2( add [esi + 12], edx )
1002  AS2( add [esi + 16], ebp )
1003 
1004  // setup next round
1005  AS2( movd ebp, mm2 ) // times
1006 
1007  AS2( mov edi, DWORD PTR [esp + 64] ) // data
1008 
1009  AS2( add edi, 64 ) // next round of data
1010  AS2( mov [esp + 64], edi ) // restore
1011 
1012  AS1( dec ebp )
1013  AS2( movd mm2, ebp )
1014  AS1( jnz loopStart )
1015 
1016 
1017  EPILOG()
1018 }
1019 
1020 
1021 #endif // DO_SHA_ASM
1022 
1023 } // namespace