Crypto++  5.6.3
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
4 // Both are in the public domain.
5 
6 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
7 
8 #include "pch.h"
9 #include "config.h"
10 
11 #if CRYPTOPP_MSC_VERSION
12 # pragma warning(disable: 4100 4731)
13 #endif
14 
15 #ifndef CRYPTOPP_IMPORTS
16 #ifndef CRYPTOPP_GENERATE_X64_MASM
17 
18 #include "secblock.h"
19 #include "sha.h"
20 #include "misc.h"
21 #include "cpu.h"
22 
23 NAMESPACE_BEGIN(CryptoPP)
24 
25 // start of Steve Reid's code
26 
27 #define blk0(i) (W[i] = data[i])
28 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
29 
30 void SHA1::InitState(HashWordType *state)
31 {
32  state[0] = 0x67452301L;
33  state[1] = 0xEFCDAB89L;
34  state[2] = 0x98BADCFEL;
35  state[3] = 0x10325476L;
36  state[4] = 0xC3D2E1F0L;
37 }
38 
39 #define f1(x,y,z) (z^(x&(y^z)))
40 #define f2(x,y,z) (x^y^z)
41 #define f3(x,y,z) ((x&y)|(z&(x|y)))
42 #define f4(x,y,z) (x^y^z)
43 
44 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
45 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
46 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
47 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
48 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
49 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
50 
51 void SHA1::Transform(word32 *state, const word32 *data)
52 {
53  word32 W[16];
54  /* Copy context->state[] to working vars */
55  word32 a = state[0];
56  word32 b = state[1];
57  word32 c = state[2];
58  word32 d = state[3];
59  word32 e = state[4];
60  /* 4 rounds of 20 operations each. Loop unrolled. */
61  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
62  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
63  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
64  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
65  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
66  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
67  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
68  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
69  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
70  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
71  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
72  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
73  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
74  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
75  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
76  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
77  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
78  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
79  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
80  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
81  /* Add the working vars back into context.state[] */
82  state[0] += a;
83  state[1] += b;
84  state[2] += c;
85  state[3] += d;
86  state[4] += e;
87 }
88 
89 // end of Steve Reid's code
90 
91 // *************************************************************
92 
93 void SHA224::InitState(HashWordType *state)
94 {
95  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
96  memcpy(state, s, sizeof(s));
97 }
98 
99 void SHA256::InitState(HashWordType *state)
100 {
101  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
102  memcpy(state, s, sizeof(s));
103 }
104 
105 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
106 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
107 #else
108 extern const word32 SHA256_K[64] = {
109 #endif
110  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
111  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
112  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
113  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
114  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
115  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
116  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
117  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
118  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
119  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
120  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
121  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
122  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
123  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
124  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
125  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
126 };
127 
128 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
129 
130 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)
131 
132 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
133 #if defined(_MSC_VER) && (_MSC_VER == 1200)
134  , ... // VC60 workaround: prevent VC 6 from inlining this function
135 #endif
136  )
137 {
138 #if defined(_MSC_VER) && (_MSC_VER == 1200)
139  AS2(mov ecx, [state])
140  AS2(mov edx, [data])
141 #endif
142 
143  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
144  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
145  #define G(i) H(i+1)
146  #define F(i) H(i+2)
147  #define E(i) H(i+3)
148  #define D(i) H(i+4)
149  #define C(i) H(i+5)
150  #define B(i) H(i+6)
151  #define A(i) H(i+7)
152  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
153  #define Wt_2(i) Wt((i)-2)
154  #define Wt_15(i) Wt((i)-15)
155  #define Wt_7(i) Wt((i)-7)
156  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
157  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
158  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
159  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
160  #define Kt(i) WORD_REG(si)+(i)*4
161 #if CRYPTOPP_BOOL_X32
162  #define BASE esp+8
163 #elif CRYPTOPP_BOOL_X86
164  #define BASE esp+4
165 #elif defined(__GNUC__)
166  #define BASE r8
167 #else
168  #define BASE rsp
169 #endif
170 
171 #define RA0(i, edx, edi) \
172  AS2( add edx, [Kt(i)] )\
173  AS2( add edx, [Wt(i)] )\
174  AS2( add edx, H(i) )\
175 
176 #define RA1(i, edx, edi)
177 
178 #define RB0(i, edx, edi)
179 
180 #define RB1(i, edx, edi) \
181  AS2( mov AS_REG_7d, [Wt_2(i)] )\
182  AS2( mov edi, [Wt_15(i)])\
183  AS2( mov ebx, AS_REG_7d )\
184  AS2( shr AS_REG_7d, 10 )\
185  AS2( ror ebx, 17 )\
186  AS2( xor AS_REG_7d, ebx )\
187  AS2( ror ebx, 2 )\
188  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
189  AS2( add ebx, [Wt_7(i)])\
190  AS2( mov AS_REG_7d, edi )\
191  AS2( shr AS_REG_7d, 3 )\
192  AS2( ror edi, 7 )\
193  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
194  AS2( xor AS_REG_7d, edi )\
195  AS2( add edx, [Kt(i)])\
196  AS2( ror edi, 11 )\
197  AS2( add edx, H(i) )\
198  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
199  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
200  AS2( mov [Wt(i)], AS_REG_7d)\
201  AS2( add edx, AS_REG_7d )\
202 
203 #define ROUND(i, r, eax, ecx, edi, edx)\
204  /* in: edi = E */\
205  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
206  AS2( mov edx, F(i) )\
207  AS2( xor edx, G(i) )\
208  AS2( and edx, edi )\
209  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
210  AS2( mov AS_REG_7d, edi )\
211  AS2( ror edi, 6 )\
212  AS2( ror AS_REG_7d, 25 )\
213  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
214  AS2( xor AS_REG_7d, edi )\
215  AS2( ror edi, 5 )\
216  AS2( xor AS_REG_7d, edi )/* S1(E) */\
217  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
218  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
219  /* in: ecx = A, eax = B^C, edx = T1 */\
220  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
221  AS2( mov ebx, ecx )\
222  AS2( xor ecx, B(i) )/* A^B */\
223  AS2( and eax, ecx )\
224  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
225  AS2( mov AS_REG_7d, ebx )\
226  AS2( ror ebx, 2 )\
227  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
228  AS2( add edx, D(i) )\
229  AS2( mov D(i), edx )\
230  AS2( ror AS_REG_7d, 22 )\
231  AS2( xor AS_REG_7d, ebx )\
232  AS2( ror ebx, 11 )\
233  AS2( xor AS_REG_7d, ebx )\
234  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
235  AS2( mov H(i), eax )\
236 
237 // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
238 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
239 #if CRYPTOPP_BOOL_X64
240 #define SWAP_COPY(i) \
241  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
242  AS1( bswap WORD_REG(bx))\
243  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
244 #else // X86 and X32
245 #define SWAP_COPY(i) \
246  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
247  AS1( bswap WORD_REG(bx))\
248  AS2( mov [Wt(i)], WORD_REG(bx))
249 #endif
250 
251 #if defined(__GNUC__)
252  #if CRYPTOPP_BOOL_X64
254  #endif
255  __asm__ __volatile__
256  (
257  #if CRYPTOPP_BOOL_X64
258  "lea %4, %%r8;"
259  #endif
260  INTEL_NOPREFIX
261 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
262  ALIGN 8
263  X86_SHA256_HashBlocks PROC FRAME
264  rex_push_reg rsi
265  push_reg rdi
266  push_reg rbx
267  push_reg rbp
268  alloc_stack(LOCALS_SIZE+8)
269  .endprolog
270  mov rdi, r8
271  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
272 #endif
273 
274 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
275  #ifndef __GNUC__
276  AS2( mov edi, [len])
277  AS2( lea WORD_REG(si), [SHA256_K+48*4])
278  #endif
279  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
280  AS_PUSH_IF86(bx)
281  #endif
282 
283  AS_PUSH_IF86(bp)
284  AS2( mov ebx, esp)
285  AS2( and esp, -16)
286  AS2( sub WORD_REG(sp), LOCALS_SIZE)
287  AS_PUSH_IF86(bx)
288 #endif
289  AS2( mov STATE_SAVE, WORD_REG(cx))
290  AS2( mov DATA_SAVE, WORD_REG(dx))
291  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
292  AS2( mov DATA_END, WORD_REG(ax))
293  AS2( mov K_END, WORD_REG(si))
294 
295 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
296 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
297  AS2( test edi, 1)
298  ASJ( jnz, 2, f)
299  AS1( dec DWORD PTR K_END)
300 #endif
301  AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
302  AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
303 #endif
304 
305 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
306 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
307  ASJ( jmp, 0, f)
308 #endif
309  ASL(2) // non-SSE2
310  AS2( mov esi, ecx)
311  AS2( lea edi, A(0))
312  AS2( mov ecx, 8)
313  AS1( rep movsd)
314  AS2( mov esi, K_END)
315  ASJ( jmp, 3, f)
316 #endif
317 
318 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
319  ASL(0)
320  AS2( movdqa E(0), xmm1)
321  AS2( movdqa A(0), xmm0)
322 #endif
323 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
324  ASL(3)
325 #endif
326  AS2( sub WORD_REG(si), 48*4)
327  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
328  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
329 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
330  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
331  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
332 #endif
333  AS2( mov edi, E(0)) // E
334  AS2( mov eax, B(0)) // B
335  AS2( xor eax, C(0)) // B^C
336  AS2( mov ecx, A(0)) // A
337 
338  ROUND(0, 0, eax, ecx, edi, edx)
339  ROUND(1, 0, ecx, eax, edx, edi)
340  ROUND(2, 0, eax, ecx, edi, edx)
341  ROUND(3, 0, ecx, eax, edx, edi)
342  ROUND(4, 0, eax, ecx, edi, edx)
343  ROUND(5, 0, ecx, eax, edx, edi)
344  ROUND(6, 0, eax, ecx, edi, edx)
345  ROUND(7, 0, ecx, eax, edx, edi)
346  ROUND(8, 0, eax, ecx, edi, edx)
347  ROUND(9, 0, ecx, eax, edx, edi)
348  ROUND(10, 0, eax, ecx, edi, edx)
349  ROUND(11, 0, ecx, eax, edx, edi)
350  ROUND(12, 0, eax, ecx, edi, edx)
351  ROUND(13, 0, ecx, eax, edx, edi)
352  ROUND(14, 0, eax, ecx, edi, edx)
353  ROUND(15, 0, ecx, eax, edx, edi)
354 
355  ASL(1)
356  AS2(add WORD_REG(si), 4*16)
357  ROUND(0, 1, eax, ecx, edi, edx)
358  ROUND(1, 1, ecx, eax, edx, edi)
359  ROUND(2, 1, eax, ecx, edi, edx)
360  ROUND(3, 1, ecx, eax, edx, edi)
361  ROUND(4, 1, eax, ecx, edi, edx)
362  ROUND(5, 1, ecx, eax, edx, edi)
363  ROUND(6, 1, eax, ecx, edi, edx)
364  ROUND(7, 1, ecx, eax, edx, edi)
365  ROUND(8, 1, eax, ecx, edi, edx)
366  ROUND(9, 1, ecx, eax, edx, edi)
367  ROUND(10, 1, eax, ecx, edi, edx)
368  ROUND(11, 1, ecx, eax, edx, edi)
369  ROUND(12, 1, eax, ecx, edi, edx)
370  ROUND(13, 1, ecx, eax, edx, edi)
371  ROUND(14, 1, eax, ecx, edi, edx)
372  ROUND(15, 1, ecx, eax, edx, edi)
373  AS2( cmp WORD_REG(si), K_END)
374  ASJ( jb, 1, b)
375 
376  AS2( mov WORD_REG(dx), DATA_SAVE)
377  AS2( add WORD_REG(dx), 64)
378  AS2( mov AS_REG_7, STATE_SAVE)
379  AS2( mov DATA_SAVE, WORD_REG(dx))
380 
381 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
382 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
383  AS2( test DWORD PTR K_END, 1)
384  ASJ( jz, 4, f)
385 #endif
386  AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
387  AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
388  AS2( paddd xmm1, E(0))
389  AS2( paddd xmm0, A(0))
390  AS2( movdqa [AS_REG_7+1*16], xmm1)
391  AS2( movdqa [AS_REG_7+0*16], xmm0)
392  AS2( cmp WORD_REG(dx), DATA_END)
393  ASJ( jb, 0, b)
394 #endif
395 
396 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
397 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
398  ASJ( jmp, 5, f)
399  ASL(4) // non-SSE2
400 #endif
401  AS2( add [AS_REG_7+0*4], ecx) // A
402  AS2( add [AS_REG_7+4*4], edi) // E
403  AS2( mov eax, B(0))
404  AS2( mov ebx, C(0))
405  AS2( mov ecx, D(0))
406  AS2( add [AS_REG_7+1*4], eax)
407  AS2( add [AS_REG_7+2*4], ebx)
408  AS2( add [AS_REG_7+3*4], ecx)
409  AS2( mov eax, F(0))
410  AS2( mov ebx, G(0))
411  AS2( mov ecx, H(0))
412  AS2( add [AS_REG_7+5*4], eax)
413  AS2( add [AS_REG_7+6*4], ebx)
414  AS2( add [AS_REG_7+7*4], ecx)
415  AS2( mov ecx, AS_REG_7d)
416  AS2( cmp WORD_REG(dx), DATA_END)
417  ASJ( jb, 2, b)
418 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
419  ASL(5)
420 #endif
421 #endif
422 
423  AS_POP_IF86(sp)
424  AS_POP_IF86(bp)
425  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
426  AS_POP_IF86(bx)
427  #endif
428 
429 #ifdef CRYPTOPP_GENERATE_X64_MASM
430  add rsp, LOCALS_SIZE+8
431  pop rbp
432  pop rbx
433  pop rdi
434  pop rsi
435  ret
436  X86_SHA256_HashBlocks ENDP
437 #endif
438 
439 #ifdef __GNUC__
440  ATT_PREFIX
441  :
442  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
443  #if CRYPTOPP_BOOL_X64
444  , "m" (workspace[0])
445  #endif
446  : "memory", "cc", "%eax"
447  #if CRYPTOPP_BOOL_X64
448  , "%rbx", "%r8", "%r10"
449  #endif
450  );
451 #endif
452 }
453 
454 #endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
455 
456 #ifndef CRYPTOPP_GENERATE_X64_MASM
457 
458 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
459 extern "C" {
460 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
461 }
462 #endif
463 
464 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
465 
466 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
467 {
468  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
469  return length % BLOCKSIZE;
470 }
471 
472 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
473 {
474  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
475  return length % BLOCKSIZE;
476 }
477 
478 #endif
479 
480 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
481 
482 #define Ch(x,y,z) (z^(x&(y^z)))
483 #define Maj(x,y,z) (y^((x^y)&(y^z)))
484 
485 #define a(i) T[(0-i)&7]
486 #define b(i) T[(1-i)&7]
487 #define c(i) T[(2-i)&7]
488 #define d(i) T[(3-i)&7]
489 #define e(i) T[(4-i)&7]
490 #define f(i) T[(5-i)&7]
491 #define g(i) T[(6-i)&7]
492 #define h(i) T[(7-i)&7]
493 
494 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
495  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
496 
497 // for SHA256
498 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
499 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
500 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
501 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
502 
503 void SHA256::Transform(word32 *state, const word32 *data)
504 {
505  word32 W[16];
506 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
507  // this byte reverse is a waste of time, but this function is only called by MDC
508  ByteReverse(W, data, BLOCKSIZE);
509  X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2());
510 #else
511  word32 T[8];
512  /* Copy context->state[] to working vars */
513  memcpy(T, state, sizeof(T));
514  /* 64 operations, partially loop unrolled */
515  for (unsigned int j=0; j<64; j+=16)
516  {
517  R( 0); R( 1); R( 2); R( 3);
518  R( 4); R( 5); R( 6); R( 7);
519  R( 8); R( 9); R(10); R(11);
520  R(12); R(13); R(14); R(15);
521  }
522  /* Add the working vars back into context.state[] */
523  state[0] += a(0);
524  state[1] += b(0);
525  state[2] += c(0);
526  state[3] += d(0);
527  state[4] += e(0);
528  state[5] += f(0);
529  state[6] += g(0);
530  state[7] += h(0);
531 #endif
532 }
533 
534 /*
535 // smaller but slower
536 void SHA256::Transform(word32 *state, const word32 *data)
537 {
538  word32 T[20];
539  word32 W[32];
540  unsigned int i = 0, j = 0;
541  word32 *t = T+8;
542 
543  memcpy(t, state, 8*4);
544  word32 e = t[4], a = t[0];
545 
546  do
547  {
548  word32 w = data[j];
549  W[j] = w;
550  w += SHA256_K[j];
551  w += t[7];
552  w += S1(e);
553  w += Ch(e, t[5], t[6]);
554  e = t[3] + w;
555  t[3] = t[3+8] = e;
556  w += S0(t[0]);
557  a = w + Maj(a, t[1], t[2]);
558  t[-1] = t[7] = a;
559  --t;
560  ++j;
561  if (j%8 == 0)
562  t += 8;
563  } while (j<16);
564 
565  do
566  {
567  i = j&0xf;
568  word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
569  W[i+16] = W[i] = w;
570  w += SHA256_K[j];
571  w += t[7];
572  w += S1(e);
573  w += Ch(e, t[5], t[6]);
574  e = t[3] + w;
575  t[3] = t[3+8] = e;
576  w += S0(t[0]);
577  a = w + Maj(a, t[1], t[2]);
578  t[-1] = t[7] = a;
579 
580  w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
581  W[(i+1)+16] = W[(i+1)] = w;
582  w += SHA256_K[j+1];
583  w += (t-1)[7];
584  w += S1(e);
585  w += Ch(e, (t-1)[5], (t-1)[6]);
586  e = (t-1)[3] + w;
587  (t-1)[3] = (t-1)[3+8] = e;
588  w += S0((t-1)[0]);
589  a = w + Maj(a, (t-1)[1], (t-1)[2]);
590  (t-1)[-1] = (t-1)[7] = a;
591 
592  t-=2;
593  j+=2;
594  if (j%8 == 0)
595  t += 8;
596  } while (j<64);
597 
598  state[0] += a;
599  state[1] += t[1];
600  state[2] += t[2];
601  state[3] += t[3];
602  state[4] += e;
603  state[5] += t[5];
604  state[6] += t[6];
605  state[7] += t[7];
606 }
607 */
608 
609 #undef S0
610 #undef S1
611 #undef s0
612 #undef s1
613 #undef R
614 
615 // *************************************************************
616 
617 void SHA384::InitState(HashWordType *state)
618 {
619  static const word64 s[8] = {
620  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
621  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
622  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
623  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
624  memcpy(state, s, sizeof(s));
625 }
626 
627 void SHA512::InitState(HashWordType *state)
628 {
629  static const word64 s[8] = {
630  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
631  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
632  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
633  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
634  memcpy(state, s, sizeof(s));
635 }
636 
637 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
638 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
639 #else
640 static const word64 SHA512_K[80] = {
641 #endif
642  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
643  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
644  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
645  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
646  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
647  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
648  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
649  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
650  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
651  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
652  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
653  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
654  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
655  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
656  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
657  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
658  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
659  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
660  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
661  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
662  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
663  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
664  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
665  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
666  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
667  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
668  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
669  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
670  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
671  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
672  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
673  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
674  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
675  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
676  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
677  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
678  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
679  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
680  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
681  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
682 };
683 
684 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
685 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
686 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
687 {
688 #ifdef __GNUC__
689  __asm__ __volatile__
690  (
691  INTEL_NOPREFIX
692  AS_PUSH_IF86( bx)
693  AS2( mov ebx, eax)
694 #else
695  AS1( push ebx)
696  AS1( push esi)
697  AS1( push edi)
698  AS2( lea ebx, SHA512_K)
699 #endif
700 
701  AS2( mov eax, esp)
702  AS2( and esp, 0xfffffff0)
703  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
704  AS_PUSH_IF86( ax)
705  AS2( xor eax, eax)
706 
707 #if CRYPTOPP_BOOL_X32
708  AS2( lea edi, [esp+8+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
709  AS2( lea esi, [esp+8+20*8+8]) // 16-byte alignment, then add 8
710 #else
711  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
712  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
713 #endif
714 
715  AS2( movdqa xmm0, [ecx+0*16])
716  AS2( movdq2q mm4, xmm0)
717  AS2( movdqa [edi+0*16], xmm0)
718  AS2( movdqa xmm0, [ecx+1*16])
719  AS2( movdqa [edi+1*16], xmm0)
720  AS2( movdqa xmm0, [ecx+2*16])
721  AS2( movdq2q mm5, xmm0)
722  AS2( movdqa [edi+2*16], xmm0)
723  AS2( movdqa xmm0, [ecx+3*16])
724  AS2( movdqa [edi+3*16], xmm0)
725  ASJ( jmp, 0, f)
726 
727 #define SSE2_S0_S1(r, a, b, c) \
728  AS2( movq mm6, r)\
729  AS2( psrlq r, a)\
730  AS2( movq mm7, r)\
731  AS2( psllq mm6, 64-c)\
732  AS2( pxor mm7, mm6)\
733  AS2( psrlq r, b-a)\
734  AS2( pxor mm7, r)\
735  AS2( psllq mm6, c-b)\
736  AS2( pxor mm7, mm6)\
737  AS2( psrlq r, c-b)\
738  AS2( pxor r, mm7)\
739  AS2( psllq mm6, b-a)\
740  AS2( pxor r, mm6)
741 
742 #define SSE2_s0(r, a, b, c) \
743  AS2( movdqa xmm6, r)\
744  AS2( psrlq r, a)\
745  AS2( movdqa xmm7, r)\
746  AS2( psllq xmm6, 64-c)\
747  AS2( pxor xmm7, xmm6)\
748  AS2( psrlq r, b-a)\
749  AS2( pxor xmm7, r)\
750  AS2( psrlq r, c-b)\
751  AS2( pxor r, xmm7)\
752  AS2( psllq xmm6, c-a)\
753  AS2( pxor r, xmm6)
754 
755 #define SSE2_s1(r, a, b, c) \
756  AS2( movdqa xmm6, r)\
757  AS2( psrlq r, a)\
758  AS2( movdqa xmm7, r)\
759  AS2( psllq xmm6, 64-c)\
760  AS2( pxor xmm7, xmm6)\
761  AS2( psrlq r, b-a)\
762  AS2( pxor xmm7, r)\
763  AS2( psllq xmm6, c-b)\
764  AS2( pxor xmm7, xmm6)\
765  AS2( psrlq r, c-b)\
766  AS2( pxor r, xmm7)
767 
768  ASL(SHA512_Round)
769  // k + w is in mm0, a is in mm4, e is in mm5
770  AS2( paddq mm0, [edi+7*8]) // h
771  AS2( movq mm2, [edi+5*8]) // f
772  AS2( movq mm3, [edi+6*8]) // g
773  AS2( pxor mm2, mm3)
774  AS2( pand mm2, mm5)
775  SSE2_S0_S1(mm5,14,18,41)
776  AS2( pxor mm2, mm3)
777  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
778  AS2( paddq mm5, mm0) // h += S1(e)
779  AS2( movq mm2, [edi+1*8]) // b
780  AS2( movq mm1, mm2)
781  AS2( por mm2, mm4)
782  AS2( pand mm2, [edi+2*8]) // c
783  AS2( pand mm1, mm4)
784  AS2( por mm1, mm2)
785  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
786  AS2( paddq mm5, [edi+3*8]) // e = d + h
787  AS2( movq [edi+3*8], mm5)
788  AS2( movq [edi+11*8], mm5)
789  SSE2_S0_S1(mm4,28,34,39) // S0(a)
790  AS2( paddq mm4, mm1) // a = temp + S0(a)
791  AS2( movq [edi-8], mm4)
792  AS2( movq [edi+7*8], mm4)
793  AS1( ret)
794 
795  // first 16 rounds
796  ASL(0)
797  AS2( movq mm0, [edx+eax*8])
798  AS2( movq [esi+eax*8], mm0)
799  AS2( movq [esi+eax*8+16*8], mm0)
800  AS2( paddq mm0, [ebx+eax*8])
801  ASC( call, SHA512_Round)
802  AS1( inc eax)
803  AS2( sub edi, 8)
804  AS2( test eax, 7)
805  ASJ( jnz, 0, b)
806  AS2( add edi, 8*8)
807  AS2( cmp eax, 16)
808  ASJ( jne, 0, b)
809 
810  // rest of the rounds
811  AS2( movdqu xmm0, [esi+(16-2)*8])
812  ASL(1)
813  // data expansion, W[i-2] already in xmm0
814  AS2( movdqu xmm3, [esi])
815  AS2( paddq xmm3, [esi+(16-7)*8])
816  AS2( movdqa xmm2, [esi+(16-15)*8])
817  SSE2_s1(xmm0, 6, 19, 61)
818  AS2( paddq xmm0, xmm3)
819  SSE2_s0(xmm2, 1, 7, 8)
820  AS2( paddq xmm0, xmm2)
821  AS2( movdq2q mm0, xmm0)
822  AS2( movhlps xmm1, xmm0)
823  AS2( paddq mm0, [ebx+eax*8])
824  AS2( movlps [esi], xmm0)
825  AS2( movlps [esi+8], xmm1)
826  AS2( movlps [esi+8*16], xmm0)
827  AS2( movlps [esi+8*17], xmm1)
828  // 2 rounds
829  ASC( call, SHA512_Round)
830  AS2( sub edi, 8)
831  AS2( movdq2q mm0, xmm1)
832  AS2( paddq mm0, [ebx+eax*8+8])
833  ASC( call, SHA512_Round)
834  // update indices and loop
835  AS2( add esi, 16)
836  AS2( add eax, 2)
837  AS2( sub edi, 8)
838  AS2( test eax, 7)
839  ASJ( jnz, 1, b)
840  // do housekeeping every 8 rounds
841  AS2( mov esi, 0xf)
842  AS2( and esi, eax)
843 #if CRYPTOPP_BOOL_X32
844  AS2( lea esi, [esp+8+20*8+8+esi*8])
845 #else
846  AS2( lea esi, [esp+4+20*8+8+esi*8])
847 #endif
848  AS2( add edi, 8*8)
849  AS2( cmp eax, 80)
850  ASJ( jne, 1, b)
851 
852 #define SSE2_CombineState(i) \
853  AS2( movdqa xmm0, [edi+i*16])\
854  AS2( paddq xmm0, [ecx+i*16])\
855  AS2( movdqa [ecx+i*16], xmm0)
856 
857  SSE2_CombineState(0)
858  SSE2_CombineState(1)
859  SSE2_CombineState(2)
860  SSE2_CombineState(3)
861 
862  AS_POP_IF86( sp)
863  AS1( emms)
864 
865 #if defined(__GNUC__)
866  AS_POP_IF86( bx)
867  ATT_PREFIX
868  :
869  : "a" (SHA512_K), "c" (state), "d" (data)
870  : "%esi", "%edi", "memory", "cc"
871  );
872 #else
873  AS1( pop edi)
874  AS1( pop esi)
875  AS1( pop ebx)
876  AS1( ret)
877 #endif
878 }
879 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
880 
881 void SHA512::Transform(word64 *state, const word64 *data)
882 {
883 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
884  if (HasSSE2())
885  {
886  SHA512_SSE2_Transform(state, data);
887  return;
888  }
889 #endif
890 
891 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
892 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
893 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
894 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
895 
896 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
897  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
898 
899  word64 W[16];
900  word64 T[8];
901  /* Copy context->state[] to working vars */
902  memcpy(T, state, sizeof(T));
903  /* 80 operations, partially loop unrolled */
904  for (unsigned int j=0; j<80; j+=16)
905  {
906  R( 0); R( 1); R( 2); R( 3);
907  R( 4); R( 5); R( 6); R( 7);
908  R( 8); R( 9); R(10); R(11);
909  R(12); R(13); R(14); R(15);
910  }
911  /* Add the working vars back into context.state[] */
912  state[0] += a(0);
913  state[1] += b(0);
914  state[2] += c(0);
915  state[3] += d(0);
916  state[4] += e(0);
917  state[5] += f(0);
918  state[6] += g(0);
919  state[7] += h(0);
920 }
921 
922 NAMESPACE_END
923 
924 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
925 #endif // #ifndef CRYPTOPP_IMPORTS
Utility functions for the Crypto++ library.
Library configuration file.
Classes and functions for secure memory allocations.
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:754
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
Classes for SHA-1 and SHA-2 family of message digests.
Crypto++ library namespace.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1551