8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 23 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 24 # undef CRYPTOPP_X86_ASM_AVAILABLE 25 # undef CRYPTOPP_X32_ASM_AVAILABLE 26 # undef CRYPTOPP_X64_ASM_AVAILABLE 27 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 28 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 29 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0 30 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0 35 #if !defined(NDEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 36 void Salsa20_TestInstantiations()
42 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
46 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
47 throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds);
51 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
53 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
56 m_state[0] = 0x61707865;
57 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
58 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
59 m_state[3] = 0x6b206574;
62 void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
64 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
68 get(m_state[14])(m_state[11]);
69 m_state[8] = m_state[5] = 0;
72 void Salsa20_Policy::SeekToIteration(lword iterationCount)
74 m_state[8] = (word32)iterationCount;
75 m_state[5] = (word32)SafeRightShift<32>(iterationCount);
78 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SALSA_ASM) 79 unsigned int Salsa20_Policy::GetAlignment()
const 81 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 86 return GetAlignmentOf<word32>();
89 unsigned int Salsa20_Policy::GetOptimalBlockSize()
const 91 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 93 return 4*BYTES_PER_ITERATION;
96 return BYTES_PER_ITERATION;
100 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 102 void Salsa20_OperateKeystream(byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
106 #if CRYPTOPP_MSC_VERSION 107 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 110 void Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
112 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 114 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 115 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
119 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 120 #ifdef CRYPTOPP_GENERATE_X64_MASM 122 Salsa20_OperateKeystream PROC FRAME
123 mov r10, [rsp + 5*8] ; state
124 alloc_stack(10*16 + 32*16 + 8)
125 save_xmm128 xmm6, 0200h
126 save_xmm128 xmm7, 0210h
127 save_xmm128 xmm8, 0220h
128 save_xmm128 xmm9, 0230h
129 save_xmm128 xmm10, 0240h
130 save_xmm128 xmm11, 0250h
131 save_xmm128 xmm12, 0260h
132 save_xmm128 xmm13, 0270h
133 save_xmm128 xmm14, 0280h
134 save_xmm128 xmm15, 0290h
137 #define REG_output rcx 138 #define REG_input rdx 139 #define REG_iterationCount r8 140 #define REG_state r10 141 #define REG_rounds e9d 142 #define REG_roundsLeft eax 143 #define REG_temp32 r11d 145 #define SSE2_WORKSPACE rsp 149 #if CRYPTOPP_BOOL_X64 150 #define REG_output %1 152 #define REG_iterationCount %2 154 #define REG_rounds %3 155 #define REG_roundsLeft eax 156 #define REG_temp32 edx 158 #define SSE2_WORKSPACE %5 160 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
162 #define REG_output edi 163 #define REG_input eax 164 #define REG_iterationCount ecx 165 #define REG_state esi 166 #define REG_rounds edx 167 #define REG_roundsLeft ebx 168 #define REG_temp32 ebp 170 #define SSE2_WORKSPACE esp + WORD_SZ 179 void *s = m_state.
data();
182 AS2( mov REG_iterationCount, iterationCount)
183 AS2( mov REG_input, input)
184 AS2( mov REG_output, output)
185 AS2( mov REG_state, s)
186 AS2( mov REG_rounds, r)
188 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 191 AS2( cmp REG_iterationCount, 4)
194 #if CRYPTOPP_BOOL_X86 201 #define SSE2_EXPAND_S(i, j) \ 202 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 203 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 205 AS2( movdqa xmm0, [REG_state + 0*16])
206 AS2( movdqa xmm1, [REG_state + 1*16])
207 AS2( movdqa xmm2, [REG_state + 2*16])
208 AS2( movdqa xmm3, [REG_state + 3*16])
224 #define SSE2_EXPAND_S85(i) \ 225 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 226 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 227 AS2( add REG_roundsLeft, 1) \ 228 AS2( adc REG_temp32, 0) 231 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
232 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
237 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
238 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
240 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 241 AS2( movdqa xmm4, xmm##d) \ 242 AS2( paddd xmm4, xmm##a) \ 243 AS2( movdqa xmm5, xmm4) \ 244 AS2( pslld xmm4, i) \ 245 AS2( psrld xmm5, 32-i) \ 246 AS2( pxor xmm##b, xmm4) \ 247 AS2( pxor xmm##b, xmm5) 249 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 250 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 251 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 252 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 253 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 254 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 255 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 256 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 257 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 258 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 259 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 260 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 261 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 262 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 263 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 264 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 265 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 266 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 267 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 268 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 269 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 270 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 271 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 272 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 273 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 274 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 275 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 276 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 277 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 278 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 279 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 280 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 282 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 283 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 284 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 285 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 286 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 287 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 288 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 289 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 290 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 291 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 292 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 293 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 294 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 295 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 296 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 297 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 298 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 299 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 300 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 301 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 302 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 303 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 304 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 305 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 306 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 307 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 308 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 309 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 310 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 311 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 312 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 313 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 314 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 316 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 317 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 318 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 319 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 320 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 321 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 322 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 323 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 324 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 325 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 326 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 327 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 328 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 329 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 330 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 331 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 332 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 333 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 334 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 335 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 336 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 337 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 338 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 339 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 340 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 341 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 342 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 343 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 344 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 345 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 346 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 347 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 348 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 350 #if CRYPTOPP_BOOL_X64 351 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
353 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
354 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
356 AS2( mov REG_roundsLeft, REG_rounds)
359 ASL(SSE2_Salsa_Output)
360 AS2( movdqa xmm0, xmm4)
361 AS2( punpckldq xmm4, xmm5)
362 AS2( movdqa xmm1, xmm6)
363 AS2( punpckldq xmm6, xmm7)
364 AS2( movdqa xmm2, xmm4)
365 AS2( punpcklqdq xmm4, xmm6)
366 AS2( punpckhqdq xmm2, xmm6)
367 AS2( punpckhdq xmm0, xmm5)
368 AS2( punpckhdq xmm1, xmm7)
369 AS2( movdqa xmm6, xmm0)
370 AS2( punpcklqdq xmm0, xmm1)
371 AS2( punpckhqdq xmm6, xmm1)
372 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
376 #if CRYPTOPP_BOOL_X64 377 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
379 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
381 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
382 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
384 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
385 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
387 AS2( sub REG_roundsLeft, 2)
390 #define SSE2_OUTPUT_4(a, b, c, d) \ 391 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 392 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 393 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 394 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 395 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 396 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 397 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 398 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 399 ASC( call, SSE2_Salsa_Output) 401 SSE2_OUTPUT_4(0, 13, 10, 7)
402 SSE2_OUTPUT_4(4, 1, 14, 11)
403 SSE2_OUTPUT_4(8, 5, 2, 15)
404 SSE2_OUTPUT_4(12, 9, 6, 3)
405 AS2( test REG_input, REG_input)
407 AS2( add REG_input, 12*16)
409 AS2( add REG_output, 12*16)
410 AS2( sub REG_iterationCount, 4)
411 AS2( cmp REG_iterationCount, 4)
416 AS2( sub REG_iterationCount, 1)
418 AS2( movdqa xmm0, [REG_state + 0*16])
419 AS2( movdqa xmm1, [REG_state + 1*16])
420 AS2( movdqa xmm2, [REG_state + 2*16])
421 AS2( movdqa xmm3, [REG_state + 3*16])
422 AS2( mov REG_roundsLeft, REG_rounds)
425 SSE2_QUARTER_ROUND(0, 1, 3, 7)
426 SSE2_QUARTER_ROUND(1, 2, 0, 9)
427 SSE2_QUARTER_ROUND(2, 3, 1, 13)
428 SSE2_QUARTER_ROUND(3, 0, 2, 18)
429 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
430 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
431 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
432 SSE2_QUARTER_ROUND(0, 3, 1, 7)
433 SSE2_QUARTER_ROUND(3, 2, 0, 9)
434 SSE2_QUARTER_ROUND(2, 1, 3, 13)
435 SSE2_QUARTER_ROUND(1, 0, 2, 18)
436 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
437 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
438 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
439 AS2( sub REG_roundsLeft, 2)
442 AS2( paddd xmm0, [REG_state + 0*16])
443 AS2( paddd xmm1, [REG_state + 1*16])
444 AS2( paddd xmm2, [REG_state + 2*16])
445 AS2( paddd xmm3, [REG_state + 3*16])
447 AS2( add dword ptr [REG_state + 8*4], 1)
448 AS2( adc dword ptr [REG_state + 5*4], 0)
450 AS2( pcmpeqb xmm6, xmm6)
452 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
453 AS2( movdqa xmm4, xmm0)
454 AS2( movdqa xmm5, xmm3)
455 AS2( pand xmm0, xmm7)
456 AS2( pand xmm4, xmm6)
457 AS2( pand xmm3, xmm6)
458 AS2( pand xmm5, xmm7)
460 AS2( movdqa xmm5, xmm1)
461 AS2( pand xmm1, xmm7)
462 AS2( pand xmm5, xmm6)
464 AS2( pand xmm6, xmm2)
465 AS2( pand xmm2, xmm7)
469 AS2( movdqa xmm5, xmm4)
470 AS2( movdqa xmm6, xmm0)
471 AS3( shufpd xmm4, xmm1, 2)
472 AS3( shufpd xmm0, xmm2, 2)
473 AS3( shufpd xmm1, xmm5, 2)
474 AS3( shufpd xmm2, xmm6, 2)
477 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
485 #if CRYPTOPP_BOOL_X64 486 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
487 :
"r" (m_rounds),
"r" (m_state.m_ptr),
"r" (workspace)
488 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 490 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
491 :
"d" (m_rounds),
"S" (m_state.m_ptr)
496 #ifdef CRYPTOPP_GENERATE_X64_MASM 497 movdqa xmm6, [rsp + 0200h]
498 movdqa xmm7, [rsp + 0210h]
499 movdqa xmm8, [rsp + 0220h]
500 movdqa xmm9, [rsp + 0230h]
501 movdqa xmm10, [rsp + 0240h]
502 movdqa xmm11, [rsp + 0250h]
503 movdqa xmm12, [rsp + 0260h]
504 movdqa xmm13, [rsp + 0270h]
505 movdqa xmm14, [rsp + 0280h]
506 movdqa xmm15, [rsp + 0290h]
507 add rsp, 10*16 + 32*16 + 8
509 Salsa20_OperateKeystream ENDP
515 #ifndef CRYPTOPP_GENERATE_X64_MASM 517 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
519 while (iterationCount--)
521 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
522 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
523 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
524 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
526 for (
int i=m_rounds; i>0; i-=2)
528 #define QUARTER_ROUND(a, b, c, d) \ 529 b = b ^ rotlFixed(a + d, 7); \ 530 c = c ^ rotlFixed(b + a, 9); \ 531 d = d ^ rotlFixed(c + b, 13); \ 532 a = a ^ rotlFixed(d + c, 18); 534 QUARTER_ROUND(x0, x4, x8, x12)
535 QUARTER_ROUND(x1, x5, x9, x13)
536 QUARTER_ROUND(x2, x6, x10, x14)
537 QUARTER_ROUND(x3, x7, x11, x15)
539 QUARTER_ROUND(x0, x13, x10, x7)
540 QUARTER_ROUND(x1, x14, x11, x4)
541 QUARTER_ROUND(x2, x15, x8, x5)
542 QUARTER_ROUND(x3, x12, x9, x6)
545 #define SALSA_OUTPUT(x) {\ 546 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 547 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 548 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 549 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 550 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 551 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 552 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 553 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 554 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 555 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 556 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 557 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 558 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 559 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 560 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 561 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 563 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 564 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION);
567 if (++m_state[8] == 0)
573 void XSalsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
577 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
578 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
580 GetUserKey(LITTLE_ENDIAN_ORDER, m_key.begin(), m_key.size(), key, length);
582 memcpy(m_key.begin()+4, m_key.begin(), 16);
585 m_state[0] = 0x61707865;
586 m_state[1] = 0x3320646e;
587 m_state[2] = 0x79622d32;
588 m_state[3] = 0x6b206574;
591 void XSalsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
593 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
596 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
599 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
601 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
602 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
603 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
605 for (
int i=m_rounds; i>0; i-=2)
607 QUARTER_ROUND(x0, x4, x8, x12)
608 QUARTER_ROUND(x1, x5, x9, x13)
609 QUARTER_ROUND(x2, x6, x10, x14)
610 QUARTER_ROUND(x3, x7, x11, x15)
612 QUARTER_ROUND(x0, x13, x10, x7)
613 QUARTER_ROUND(x1, x14, x11, x4)
614 QUARTER_ROUND(x2, x15, x8, x5)
615 QUARTER_ROUND(x3, x12, x9, x6)
618 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
619 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
620 m_state[8] = m_state[5] = 0;
625 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM Standard names for retrieving values by name when working with NameValuePairs.
Utility functions for the Crypto++ library.
Library configuration file.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Exception thrown when an invalid number of rounds is encountered.
A::pointer data()
Provides a pointer to the first element in the memory block.
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
Classes for Salsa and Salsa20 stream ciphers.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Crypto++ library namespace.
Interface for retrieving values given their names.