Crypto++
|
00001 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c 00002 00003 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. 00004 // Both are in the public domain. 00005 00006 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code 00007 00008 #include "pch.h" 00009 00010 #ifndef CRYPTOPP_IMPORTS 00011 #ifndef CRYPTOPP_GENERATE_X64_MASM 00012 00013 #include "sha.h" 00014 #include "misc.h" 00015 #include "cpu.h" 00016 00017 NAMESPACE_BEGIN(CryptoPP) 00018 00019 // start of Steve Reid's code 00020 00021 #define blk0(i) (W[i] = data[i]) 00022 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1)) 00023 00024 void SHA1::InitState(HashWordType *state) 00025 { 00026 state[0] = 0x67452301L; 00027 state[1] = 0xEFCDAB89L; 00028 state[2] = 0x98BADCFEL; 00029 state[3] = 0x10325476L; 00030 state[4] = 0xC3D2E1F0L; 00031 } 00032 00033 #define f1(x,y,z) (z^(x&(y^z))) 00034 #define f2(x,y,z) (x^y^z) 00035 #define f3(x,y,z) ((x&y)|(z&(x|y))) 00036 #define f4(x,y,z) (x^y^z) 00037 00038 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ 00039 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30); 00040 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30); 00041 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30); 00042 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30); 00043 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30); 00044 00045 void SHA1::Transform(word32 *state, const word32 *data) 00046 { 00047 word32 W[16]; 00048 /* Copy context->state[] to working vars */ 00049 word32 a = state[0]; 00050 word32 b = state[1]; 00051 word32 c = state[2]; 00052 word32 d = state[3]; 00053 word32 e = state[4]; 00054 /* 4 rounds of 20 operations each. Loop unrolled. */ 00055 R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); 00056 R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); 00057 R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); 00058 R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); 00059 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); 00060 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); 00061 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); 00062 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); 00063 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); 00064 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); 00065 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); 00066 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); 00067 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); 00068 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); 00069 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); 00070 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); 00071 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); 00072 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); 00073 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); 00074 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); 00075 /* Add the working vars back into context.state[] */ 00076 state[0] += a; 00077 state[1] += b; 00078 state[2] += c; 00079 state[3] += d; 00080 state[4] += e; 00081 } 00082 00083 // end of Steve Reid's code 00084 00085 // ************************************************************* 00086 00087 void SHA224::InitState(HashWordType *state) 00088 { 00089 static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; 00090 memcpy(state, s, sizeof(s)); 00091 } 00092 00093 void SHA256::InitState(HashWordType *state) 00094 { 00095 static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; 00096 memcpy(state, s, sizeof(s)); 00097 } 00098 00099 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00100 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { 00101 #else 00102 extern const word32 SHA256_K[64] = { 00103 #endif 00104 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 00105 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 00106 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 00107 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 00108 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 00109 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 00110 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 00111 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 00112 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 00113 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 00114 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 00115 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 00116 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 00117 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 00118 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 00119 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 00120 }; 00121 00122 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 00123 00124 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) 00125 00126 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 00127 00128 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len 00129 #if defined(_MSC_VER) && (_MSC_VER == 1200) 00130 , ... // VC60 workaround: prevent VC 6 from inlining this function 00131 #endif 00132 ) 00133 { 00134 #if defined(_MSC_VER) && (_MSC_VER == 1200) 00135 AS2(mov ecx, [state]) 00136 AS2(mov edx, [data]) 00137 #endif 00138 00139 #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ 00140 #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] 00141 #define G(i) H(i+1) 00142 #define F(i) H(i+2) 00143 #define E(i) H(i+3) 00144 #define D(i) H(i+4) 00145 #define C(i) H(i+5) 00146 #define B(i) H(i+6) 00147 #define A(i) H(i+7) 00148 #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 00149 #define Wt_2(i) Wt((i)-2) 00150 #define Wt_15(i) Wt((i)-15) 00151 #define Wt_7(i) Wt((i)-7) 00152 #define K_END [BASE+8*4+16*4+0*WORD_SZ] 00153 #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] 00154 #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] 00155 #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] 00156 #define Kt(i) WORD_REG(si)+(i)*4 00157 #if CRYPTOPP_BOOL_X86 00158 #define BASE esp+4 00159 #elif defined(__GNUC__) 00160 #define BASE r8 00161 #else 00162 #define BASE rsp 00163 #endif 00164 00165 #define RA0(i, edx, edi) \ 00166 AS2( add edx, [Kt(i)] )\ 00167 AS2( add edx, [Wt(i)] )\ 00168 AS2( add edx, H(i) )\ 00169 00170 #define RA1(i, edx, edi) 00171 00172 #define RB0(i, edx, edi) 00173 00174 #define RB1(i, edx, edi) \ 00175 AS2( mov AS_REG_7d, [Wt_2(i)] )\ 00176 AS2( mov edi, [Wt_15(i)])\ 00177 AS2( mov ebx, AS_REG_7d )\ 00178 AS2( shr AS_REG_7d, 10 )\ 00179 AS2( ror ebx, 17 )\ 00180 AS2( xor AS_REG_7d, ebx )\ 00181 AS2( ror ebx, 2 )\ 00182 AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\ 00183 AS2( add ebx, [Wt_7(i)])\ 00184 AS2( mov AS_REG_7d, edi )\ 00185 AS2( shr AS_REG_7d, 3 )\ 00186 AS2( ror edi, 7 )\ 00187 AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\ 00188 AS2( xor AS_REG_7d, edi )\ 00189 AS2( add edx, [Kt(i)])\ 00190 AS2( ror edi, 11 )\ 00191 AS2( add edx, H(i) )\ 00192 AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\ 00193 AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\ 00194 AS2( mov [Wt(i)], AS_REG_7d)\ 00195 AS2( add edx, AS_REG_7d )\ 00196 00197 #define ROUND(i, r, eax, ecx, edi, edx)\ 00198 /* in: edi = E */\ 00199 /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\ 00200 AS2( mov edx, F(i) )\ 00201 AS2( xor edx, G(i) )\ 00202 AS2( and edx, edi )\ 00203 AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\ 00204 AS2( mov AS_REG_7d, edi )\ 00205 AS2( ror edi, 6 )\ 00206 AS2( ror AS_REG_7d, 25 )\ 00207 RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ 00208 AS2( xor AS_REG_7d, edi )\ 00209 AS2( ror edi, 5 )\ 00210 AS2( xor AS_REG_7d, edi )/* S1(E) */\ 00211 AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\ 00212 RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ 00213 /* in: ecx = A, eax = B^C, edx = T1 */\ 00214 /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\ 00215 AS2( mov ebx, ecx )\ 00216 AS2( xor ecx, B(i) )/* A^B */\ 00217 AS2( and eax, ecx )\ 00218 AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\ 00219 AS2( mov AS_REG_7d, ebx )\ 00220 AS2( ror ebx, 2 )\ 00221 AS2( add eax, edx )/* T1 + Maj(A,B,C) */\ 00222 AS2( add edx, D(i) )\ 00223 AS2( mov D(i), edx )\ 00224 AS2( ror AS_REG_7d, 22 )\ 00225 AS2( xor AS_REG_7d, ebx )\ 00226 AS2( ror ebx, 11 )\ 00227 AS2( xor AS_REG_7d, ebx )\ 00228 AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\ 00229 AS2( mov H(i), eax )\ 00230 00231 #define SWAP_COPY(i) \ 00232 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ 00233 AS1( bswap WORD_REG(bx))\ 00234 AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx)) 00235 00236 #if defined(__GNUC__) 00237 #if CRYPTOPP_BOOL_X64 00238 FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace; 00239 #endif 00240 __asm__ __volatile__ 00241 ( 00242 #if CRYPTOPP_BOOL_X64 00243 "lea %4, %%r8;" 00244 #endif 00245 ".intel_syntax noprefix;" 00246 #elif defined(CRYPTOPP_GENERATE_X64_MASM) 00247 ALIGN 8 00248 X86_SHA256_HashBlocks PROC FRAME 00249 rex_push_reg rsi 00250 push_reg rdi 00251 push_reg rbx 00252 push_reg rbp 00253 alloc_stack(LOCALS_SIZE+8) 00254 .endprolog 00255 mov rdi, r8 00256 lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] 00257 #endif 00258 00259 #if CRYPTOPP_BOOL_X86 00260 #ifndef __GNUC__ 00261 AS2( mov edi, [len]) 00262 AS2( lea WORD_REG(si), [SHA256_K+48*4]) 00263 #endif 00264 #if !defined(_MSC_VER) || (_MSC_VER < 1400) 00265 AS_PUSH_IF86(bx) 00266 #endif 00267 00268 AS_PUSH_IF86(bp) 00269 AS2( mov ebx, esp) 00270 AS2( and esp, -16) 00271 AS2( sub WORD_REG(sp), LOCALS_SIZE) 00272 AS_PUSH_IF86(bx) 00273 #endif 00274 AS2( mov STATE_SAVE, WORD_REG(cx)) 00275 AS2( mov DATA_SAVE, WORD_REG(dx)) 00276 AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)]) 00277 AS2( mov DATA_END, WORD_REG(ax)) 00278 AS2( mov K_END, WORD_REG(si)) 00279 00280 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00281 #if CRYPTOPP_BOOL_X86 00282 AS2( test edi, 1) 00283 ASJ( jnz, 2, f) 00284 AS1( dec DWORD PTR K_END) 00285 #endif 00286 AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16]) 00287 AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16]) 00288 #endif 00289 00290 #if CRYPTOPP_BOOL_X86 00291 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00292 ASJ( jmp, 0, f) 00293 #endif 00294 ASL(2) // non-SSE2 00295 AS2( mov esi, ecx) 00296 AS2( lea edi, A(0)) 00297 AS2( mov ecx, 8) 00298 AS1( rep movsd) 00299 AS2( mov esi, K_END) 00300 ASJ( jmp, 3, f) 00301 #endif 00302 00303 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00304 ASL(0) 00305 AS2( movdqa E(0), xmm1) 00306 AS2( movdqa A(0), xmm0) 00307 #endif 00308 #if CRYPTOPP_BOOL_X86 00309 ASL(3) 00310 #endif 00311 AS2( sub WORD_REG(si), 48*4) 00312 SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3) 00313 SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7) 00314 #if CRYPTOPP_BOOL_X86 00315 SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11) 00316 SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15) 00317 #endif 00318 AS2( mov edi, E(0)) // E 00319 AS2( mov eax, B(0)) // B 00320 AS2( xor eax, C(0)) // B^C 00321 AS2( mov ecx, A(0)) // A 00322 00323 ROUND(0, 0, eax, ecx, edi, edx) 00324 ROUND(1, 0, ecx, eax, edx, edi) 00325 ROUND(2, 0, eax, ecx, edi, edx) 00326 ROUND(3, 0, ecx, eax, edx, edi) 00327 ROUND(4, 0, eax, ecx, edi, edx) 00328 ROUND(5, 0, ecx, eax, edx, edi) 00329 ROUND(6, 0, eax, ecx, edi, edx) 00330 ROUND(7, 0, ecx, eax, edx, edi) 00331 ROUND(8, 0, eax, ecx, edi, edx) 00332 ROUND(9, 0, ecx, eax, edx, edi) 00333 ROUND(10, 0, eax, ecx, edi, edx) 00334 ROUND(11, 0, ecx, eax, edx, edi) 00335 ROUND(12, 0, eax, ecx, edi, edx) 00336 ROUND(13, 0, ecx, eax, edx, edi) 00337 ROUND(14, 0, eax, ecx, edi, edx) 00338 ROUND(15, 0, ecx, eax, edx, edi) 00339 00340 ASL(1) 00341 AS2(add WORD_REG(si), 4*16) 00342 ROUND(0, 1, eax, ecx, edi, edx) 00343 ROUND(1, 1, ecx, eax, edx, edi) 00344 ROUND(2, 1, eax, ecx, edi, edx) 00345 ROUND(3, 1, ecx, eax, edx, edi) 00346 ROUND(4, 1, eax, ecx, edi, edx) 00347 ROUND(5, 1, ecx, eax, edx, edi) 00348 ROUND(6, 1, eax, ecx, edi, edx) 00349 ROUND(7, 1, ecx, eax, edx, edi) 00350 ROUND(8, 1, eax, ecx, edi, edx) 00351 ROUND(9, 1, ecx, eax, edx, edi) 00352 ROUND(10, 1, eax, ecx, edi, edx) 00353 ROUND(11, 1, ecx, eax, edx, edi) 00354 ROUND(12, 1, eax, ecx, edi, edx) 00355 ROUND(13, 1, ecx, eax, edx, edi) 00356 ROUND(14, 1, eax, ecx, edi, edx) 00357 ROUND(15, 1, ecx, eax, edx, edi) 00358 AS2( cmp WORD_REG(si), K_END) 00359 ASJ( jb, 1, b) 00360 00361 AS2( mov WORD_REG(dx), DATA_SAVE) 00362 AS2( add WORD_REG(dx), 64) 00363 AS2( mov AS_REG_7, STATE_SAVE) 00364 AS2( mov DATA_SAVE, WORD_REG(dx)) 00365 00366 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00367 #if CRYPTOPP_BOOL_X86 00368 AS2( test DWORD PTR K_END, 1) 00369 ASJ( jz, 4, f) 00370 #endif 00371 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16]) 00372 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16]) 00373 AS2( paddd xmm1, E(0)) 00374 AS2( paddd xmm0, A(0)) 00375 AS2( movdqa [AS_REG_7+1*16], xmm1) 00376 AS2( movdqa [AS_REG_7+0*16], xmm0) 00377 AS2( cmp WORD_REG(dx), DATA_END) 00378 ASJ( jb, 0, b) 00379 #endif 00380 00381 #if CRYPTOPP_BOOL_X86 00382 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00383 ASJ( jmp, 5, f) 00384 ASL(4) // non-SSE2 00385 #endif 00386 AS2( add [AS_REG_7+0*4], ecx) // A 00387 AS2( add [AS_REG_7+4*4], edi) // E 00388 AS2( mov eax, B(0)) 00389 AS2( mov ebx, C(0)) 00390 AS2( mov ecx, D(0)) 00391 AS2( add [AS_REG_7+1*4], eax) 00392 AS2( add [AS_REG_7+2*4], ebx) 00393 AS2( add [AS_REG_7+3*4], ecx) 00394 AS2( mov eax, F(0)) 00395 AS2( mov ebx, G(0)) 00396 AS2( mov ecx, H(0)) 00397 AS2( add [AS_REG_7+5*4], eax) 00398 AS2( add [AS_REG_7+6*4], ebx) 00399 AS2( add [AS_REG_7+7*4], ecx) 00400 AS2( mov ecx, AS_REG_7d) 00401 AS2( cmp WORD_REG(dx), DATA_END) 00402 ASJ( jb, 2, b) 00403 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00404 ASL(5) 00405 #endif 00406 #endif 00407 00408 AS_POP_IF86(sp) 00409 AS_POP_IF86(bp) 00410 #if !defined(_MSC_VER) || (_MSC_VER < 1400) 00411 AS_POP_IF86(bx) 00412 #endif 00413 00414 #ifdef CRYPTOPP_GENERATE_X64_MASM 00415 add rsp, LOCALS_SIZE+8 00416 pop rbp 00417 pop rbx 00418 pop rdi 00419 pop rsi 00420 ret 00421 X86_SHA256_HashBlocks ENDP 00422 #endif 00423 00424 #ifdef __GNUC__ 00425 ".att_syntax prefix;" 00426 : 00427 : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len) 00428 #if CRYPTOPP_BOOL_X64 00429 , "m" (workspace[0]) 00430 #endif 00431 : "memory", "cc", "%eax" 00432 #if CRYPTOPP_BOOL_X64 00433 , "%rbx", "%r8", "%r10" 00434 #endif 00435 ); 00436 #endif 00437 } 00438 00439 #endif // #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) 00440 00441 #ifndef CRYPTOPP_GENERATE_X64_MASM 00442 00443 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 00444 extern "C" { 00445 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len); 00446 } 00447 #endif 00448 00449 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) 00450 00451 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) 00452 { 00453 X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); 00454 return length % BLOCKSIZE; 00455 } 00456 00457 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) 00458 { 00459 X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); 00460 return length % BLOCKSIZE; 00461 } 00462 00463 #endif 00464 00465 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) 00466 00467 #define Ch(x,y,z) (z^(x&(y^z))) 00468 #define Maj(x,y,z) (y^((x^y)&(y^z))) 00469 00470 #define a(i) T[(0-i)&7] 00471 #define b(i) T[(1-i)&7] 00472 #define c(i) T[(2-i)&7] 00473 #define d(i) T[(3-i)&7] 00474 #define e(i) T[(4-i)&7] 00475 #define f(i) T[(5-i)&7] 00476 #define g(i) T[(6-i)&7] 00477 #define h(i) T[(7-i)&7] 00478 00479 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ 00480 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 00481 00482 // for SHA256 00483 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22)) 00484 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25)) 00485 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) 00486 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) 00487 00488 void SHA256::Transform(word32 *state, const word32 *data) 00489 { 00490 word32 W[16]; 00491 #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) 00492 // this byte reverse is a waste of time, but this function is only called by MDC 00493 ByteReverse(W, data, BLOCKSIZE); 00494 X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2()); 00495 #else 00496 word32 T[8]; 00497 /* Copy context->state[] to working vars */ 00498 memcpy(T, state, sizeof(T)); 00499 /* 64 operations, partially loop unrolled */ 00500 for (unsigned int j=0; j<64; j+=16) 00501 { 00502 R( 0); R( 1); R( 2); R( 3); 00503 R( 4); R( 5); R( 6); R( 7); 00504 R( 8); R( 9); R(10); R(11); 00505 R(12); R(13); R(14); R(15); 00506 } 00507 /* Add the working vars back into context.state[] */ 00508 state[0] += a(0); 00509 state[1] += b(0); 00510 state[2] += c(0); 00511 state[3] += d(0); 00512 state[4] += e(0); 00513 state[5] += f(0); 00514 state[6] += g(0); 00515 state[7] += h(0); 00516 #endif 00517 } 00518 00519 /* 00520 // smaller but slower 00521 void SHA256::Transform(word32 *state, const word32 *data) 00522 { 00523 word32 T[20]; 00524 word32 W[32]; 00525 unsigned int i = 0, j = 0; 00526 word32 *t = T+8; 00527 00528 memcpy(t, state, 8*4); 00529 word32 e = t[4], a = t[0]; 00530 00531 do 00532 { 00533 word32 w = data[j]; 00534 W[j] = w; 00535 w += SHA256_K[j]; 00536 w += t[7]; 00537 w += S1(e); 00538 w += Ch(e, t[5], t[6]); 00539 e = t[3] + w; 00540 t[3] = t[3+8] = e; 00541 w += S0(t[0]); 00542 a = w + Maj(a, t[1], t[2]); 00543 t[-1] = t[7] = a; 00544 --t; 00545 ++j; 00546 if (j%8 == 0) 00547 t += 8; 00548 } while (j<16); 00549 00550 do 00551 { 00552 i = j&0xf; 00553 word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7]; 00554 W[i+16] = W[i] = w; 00555 w += SHA256_K[j]; 00556 w += t[7]; 00557 w += S1(e); 00558 w += Ch(e, t[5], t[6]); 00559 e = t[3] + w; 00560 t[3] = t[3+8] = e; 00561 w += S0(t[0]); 00562 a = w + Maj(a, t[1], t[2]); 00563 t[-1] = t[7] = a; 00564 00565 w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7]; 00566 W[(i+1)+16] = W[(i+1)] = w; 00567 w += SHA256_K[j+1]; 00568 w += (t-1)[7]; 00569 w += S1(e); 00570 w += Ch(e, (t-1)[5], (t-1)[6]); 00571 e = (t-1)[3] + w; 00572 (t-1)[3] = (t-1)[3+8] = e; 00573 w += S0((t-1)[0]); 00574 a = w + Maj(a, (t-1)[1], (t-1)[2]); 00575 (t-1)[-1] = (t-1)[7] = a; 00576 00577 t-=2; 00578 j+=2; 00579 if (j%8 == 0) 00580 t += 8; 00581 } while (j<64); 00582 00583 state[0] += a; 00584 state[1] += t[1]; 00585 state[2] += t[2]; 00586 state[3] += t[3]; 00587 state[4] += e; 00588 state[5] += t[5]; 00589 state[6] += t[6]; 00590 state[7] += t[7]; 00591 } 00592 */ 00593 00594 #undef S0 00595 #undef S1 00596 #undef s0 00597 #undef s1 00598 #undef R 00599 00600 // ************************************************************* 00601 00602 void SHA384::InitState(HashWordType *state) 00603 { 00604 static const word64 s[8] = { 00605 W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507), 00606 W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939), 00607 W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511), 00608 W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)}; 00609 memcpy(state, s, sizeof(s)); 00610 } 00611 00612 void SHA512::InitState(HashWordType *state) 00613 { 00614 static const word64 s[8] = { 00615 W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b), 00616 W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1), 00617 W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f), 00618 W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)}; 00619 memcpy(state, s, sizeof(s)); 00620 } 00621 00622 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 00623 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = { 00624 #else 00625 static const word64 SHA512_K[80] = { 00626 #endif 00627 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), 00628 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), 00629 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), 00630 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), 00631 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), 00632 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), 00633 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), 00634 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), 00635 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), 00636 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), 00637 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), 00638 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), 00639 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), 00640 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), 00641 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), 00642 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), 00643 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), 00644 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), 00645 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), 00646 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), 00647 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), 00648 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), 00649 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), 00650 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), 00651 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), 00652 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), 00653 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), 00654 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), 00655 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), 00656 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), 00657 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), 00658 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), 00659 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), 00660 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), 00661 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), 00662 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), 00663 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), 00664 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), 00665 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), 00666 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) 00667 }; 00668 00669 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 00670 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version 00671 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data) 00672 { 00673 #ifdef __GNUC__ 00674 __asm__ __volatile__ 00675 ( 00676 ".intel_syntax noprefix;" 00677 AS1( push ebx) 00678 AS2( mov ebx, eax) 00679 #else 00680 AS1( push ebx) 00681 AS1( push esi) 00682 AS1( push edi) 00683 AS2( lea ebx, SHA512_K) 00684 #endif 00685 00686 AS2( mov eax, esp) 00687 AS2( and esp, 0xfffffff0) 00688 AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state 00689 AS1( push eax) 00690 AS2( xor eax, eax) 00691 AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying 00692 AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8 00693 00694 AS2( movdqa xmm0, [ecx+0*16]) 00695 AS2( movdq2q mm4, xmm0) 00696 AS2( movdqa [edi+0*16], xmm0) 00697 AS2( movdqa xmm0, [ecx+1*16]) 00698 AS2( movdqa [edi+1*16], xmm0) 00699 AS2( movdqa xmm0, [ecx+2*16]) 00700 AS2( movdq2q mm5, xmm0) 00701 AS2( movdqa [edi+2*16], xmm0) 00702 AS2( movdqa xmm0, [ecx+3*16]) 00703 AS2( movdqa [edi+3*16], xmm0) 00704 ASJ( jmp, 0, f) 00705 00706 #define SSE2_S0_S1(r, a, b, c) \ 00707 AS2( movq mm6, r)\ 00708 AS2( psrlq r, a)\ 00709 AS2( movq mm7, r)\ 00710 AS2( psllq mm6, 64-c)\ 00711 AS2( pxor mm7, mm6)\ 00712 AS2( psrlq r, b-a)\ 00713 AS2( pxor mm7, r)\ 00714 AS2( psllq mm6, c-b)\ 00715 AS2( pxor mm7, mm6)\ 00716 AS2( psrlq r, c-b)\ 00717 AS2( pxor r, mm7)\ 00718 AS2( psllq mm6, b-a)\ 00719 AS2( pxor r, mm6) 00720 00721 #define SSE2_s0(r, a, b, c) \ 00722 AS2( movdqa xmm6, r)\ 00723 AS2( psrlq r, a)\ 00724 AS2( movdqa xmm7, r)\ 00725 AS2( psllq xmm6, 64-c)\ 00726 AS2( pxor xmm7, xmm6)\ 00727 AS2( psrlq r, b-a)\ 00728 AS2( pxor xmm7, r)\ 00729 AS2( psrlq r, c-b)\ 00730 AS2( pxor r, xmm7)\ 00731 AS2( psllq xmm6, c-a)\ 00732 AS2( pxor r, xmm6) 00733 00734 #define SSE2_s1(r, a, b, c) \ 00735 AS2( movdqa xmm6, r)\ 00736 AS2( psrlq r, a)\ 00737 AS2( movdqa xmm7, r)\ 00738 AS2( psllq xmm6, 64-c)\ 00739 AS2( pxor xmm7, xmm6)\ 00740 AS2( psrlq r, b-a)\ 00741 AS2( pxor xmm7, r)\ 00742 AS2( psllq xmm6, c-b)\ 00743 AS2( pxor xmm7, xmm6)\ 00744 AS2( psrlq r, c-b)\ 00745 AS2( pxor r, xmm7) 00746 00747 ASL(SHA512_Round) 00748 // k + w is in mm0, a is in mm4, e is in mm5 00749 AS2( paddq mm0, [edi+7*8]) // h 00750 AS2( movq mm2, [edi+5*8]) // f 00751 AS2( movq mm3, [edi+6*8]) // g 00752 AS2( pxor mm2, mm3) 00753 AS2( pand mm2, mm5) 00754 SSE2_S0_S1(mm5,14,18,41) 00755 AS2( pxor mm2, mm3) 00756 AS2( paddq mm0, mm2) // h += Ch(e,f,g) 00757 AS2( paddq mm5, mm0) // h += S1(e) 00758 AS2( movq mm2, [edi+1*8]) // b 00759 AS2( movq mm1, mm2) 00760 AS2( por mm2, mm4) 00761 AS2( pand mm2, [edi+2*8]) // c 00762 AS2( pand mm1, mm4) 00763 AS2( por mm1, mm2) 00764 AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c) 00765 AS2( paddq mm5, [edi+3*8]) // e = d + h 00766 AS2( movq [edi+3*8], mm5) 00767 AS2( movq [edi+11*8], mm5) 00768 SSE2_S0_S1(mm4,28,34,39) // S0(a) 00769 AS2( paddq mm4, mm1) // a = temp + S0(a) 00770 AS2( movq [edi-8], mm4) 00771 AS2( movq [edi+7*8], mm4) 00772 AS1( ret) 00773 00774 // first 16 rounds 00775 ASL(0) 00776 AS2( movq mm0, [edx+eax*8]) 00777 AS2( movq [esi+eax*8], mm0) 00778 AS2( movq [esi+eax*8+16*8], mm0) 00779 AS2( paddq mm0, [ebx+eax*8]) 00780 ASC( call, SHA512_Round) 00781 AS1( inc eax) 00782 AS2( sub edi, 8) 00783 AS2( test eax, 7) 00784 ASJ( jnz, 0, b) 00785 AS2( add edi, 8*8) 00786 AS2( cmp eax, 16) 00787 ASJ( jne, 0, b) 00788 00789 // rest of the rounds 00790 AS2( movdqu xmm0, [esi+(16-2)*8]) 00791 ASL(1) 00792 // data expansion, W[i-2] already in xmm0 00793 AS2( movdqu xmm3, [esi]) 00794 AS2( paddq xmm3, [esi+(16-7)*8]) 00795 AS2( movdqa xmm2, [esi+(16-15)*8]) 00796 SSE2_s1(xmm0, 6, 19, 61) 00797 AS2( paddq xmm0, xmm3) 00798 SSE2_s0(xmm2, 1, 7, 8) 00799 AS2( paddq xmm0, xmm2) 00800 AS2( movdq2q mm0, xmm0) 00801 AS2( movhlps xmm1, xmm0) 00802 AS2( paddq mm0, [ebx+eax*8]) 00803 AS2( movlps [esi], xmm0) 00804 AS2( movlps [esi+8], xmm1) 00805 AS2( movlps [esi+8*16], xmm0) 00806 AS2( movlps [esi+8*17], xmm1) 00807 // 2 rounds 00808 ASC( call, SHA512_Round) 00809 AS2( sub edi, 8) 00810 AS2( movdq2q mm0, xmm1) 00811 AS2( paddq mm0, [ebx+eax*8+8]) 00812 ASC( call, SHA512_Round) 00813 // update indices and loop 00814 AS2( add esi, 16) 00815 AS2( add eax, 2) 00816 AS2( sub edi, 8) 00817 AS2( test eax, 7) 00818 ASJ( jnz, 1, b) 00819 // do housekeeping every 8 rounds 00820 AS2( mov esi, 0xf) 00821 AS2( and esi, eax) 00822 AS2( lea esi, [esp+4+20*8+8+esi*8]) 00823 AS2( add edi, 8*8) 00824 AS2( cmp eax, 80) 00825 ASJ( jne, 1, b) 00826 00827 #define SSE2_CombineState(i) \ 00828 AS2( movdqa xmm0, [edi+i*16])\ 00829 AS2( paddq xmm0, [ecx+i*16])\ 00830 AS2( movdqa [ecx+i*16], xmm0) 00831 00832 SSE2_CombineState(0) 00833 SSE2_CombineState(1) 00834 SSE2_CombineState(2) 00835 SSE2_CombineState(3) 00836 00837 AS1( pop esp) 00838 AS1( emms) 00839 00840 #if defined(__GNUC__) 00841 AS1( pop ebx) 00842 ".att_syntax prefix;" 00843 : 00844 : "a" (SHA512_K), "c" (state), "d" (data) 00845 : "%esi", "%edi", "memory", "cc" 00846 ); 00847 #else 00848 AS1( pop edi) 00849 AS1( pop esi) 00850 AS1( pop ebx) 00851 AS1( ret) 00852 #endif 00853 } 00854 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00855 00856 void SHA512::Transform(word64 *state, const word64 *data) 00857 { 00858 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 00859 if (HasSSE2()) 00860 { 00861 SHA512_SSE2_Transform(state, data); 00862 return; 00863 } 00864 #endif 00865 00866 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39)) 00867 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41)) 00868 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7)) 00869 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6)) 00870 00871 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\ 00872 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 00873 00874 word64 W[16]; 00875 word64 T[8]; 00876 /* Copy context->state[] to working vars */ 00877 memcpy(T, state, sizeof(T)); 00878 /* 80 operations, partially loop unrolled */ 00879 for (unsigned int j=0; j<80; j+=16) 00880 { 00881 R( 0); R( 1); R( 2); R( 3); 00882 R( 4); R( 5); R( 6); R( 7); 00883 R( 8); R( 9); R(10); R(11); 00884 R(12); R(13); R(14); R(15); 00885 } 00886 /* Add the working vars back into context.state[] */ 00887 state[0] += a(0); 00888 state[1] += b(0); 00889 state[2] += c(0); 00890 state[3] += d(0); 00891 state[4] += e(0); 00892 state[5] += f(0); 00893 state[6] += g(0); 00894 state[7] += h(0); 00895 } 00896 00897 NAMESPACE_END 00898 00899 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 00900 #endif // #ifndef CRYPTOPP_IMPORTS