Crypto++  5.6.3
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if CRYPTOPP_MSC_VERSION
13 # pragma warning(disable: 4731)
14 #endif
15 
16 NAMESPACE_BEGIN(CryptoPP)
17 
18 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
19 #include <intrin.h>
20 #endif
21 
22 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
23 #ifdef __BORLANDC__
24 #define const // Turbo C++ 2006 workaround
25 #endif
26 static const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
27 static const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
28 static const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
29 static const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
30 static const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
31 #ifdef __BORLANDC__
32 #undef const
33 #endif
34 #if VMAC_BOOL_WORD128
35 #ifdef __powerpc__
36 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
37 #define m126 ((word128(m62)<<64)|m64)
38 #else
39 static const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
40 #endif
41 #endif
42 
43 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
44 {
45  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
46  if (digestLength != 8 && digestLength != 16)
47  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
48  m_is128 = digestLength == 16;
49 
50  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
51  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
52  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
53 
54  AllocateBlocks();
55 
56  BlockCipher &cipher = AccessCipher();
57  cipher.SetKey(userKey, keylength, params);
58  const unsigned int blockSize = cipher.BlockSize();
59  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
60  SecBlock<word64> out(blockSizeInWords);
61  SecByteBlock in;
62  in.CleanNew(blockSize);
63  size_t i;
64 
65  /* Fill nh key */
66  in[0] = 0x80;
67  cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
68  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
69 
70  /* Fill poly key */
71  in[0] = 0xC0;
72  in[15] = 0;
73  for (i = 0; i <= (size_t)m_is128; i++)
74  {
75  cipher.ProcessBlock(in, out.BytePtr());
76  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
77  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
78  in[15]++;
79  }
80 
81  /* Fill ip key */
82  in[0] = 0xE0;
83  in[15] = 0;
84  word64 *l3Key = m_l3Key();
85  for (i = 0; i <= (size_t)m_is128; i++)
86  do
87  {
88  cipher.ProcessBlock(in, out.BytePtr());
89  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
90  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
91  in[15]++;
92  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
93 
94  m_padCached = false;
95  size_t nonceLength;
96  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
97  Resynchronize(nonce, (int)nonceLength);
98 }
99 
101 {
103  IV[0] &= 0x7f;
104 }
105 
106 void VMAC_Base::Resynchronize(const byte *nonce, int len)
107 {
108  size_t length = ThrowIfInvalidIVLength(len);
109  size_t s = IVSize();
110  byte *storedNonce = m_nonce();
111 
112  if (m_is128)
113  {
114  memset(storedNonce, 0, s-length);
115  memcpy(storedNonce+s-length, nonce, length);
116  AccessCipher().ProcessBlock(storedNonce, m_pad());
117  }
118  else
119  {
120  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
121  {
122  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
123  for (size_t i=0; m_padCached && i<s-length; i++)
124  m_padCached = (storedNonce[i] == 0);
125  }
126  if (!m_padCached)
127  {
128  memset(storedNonce, 0, s-length);
129  memcpy(storedNonce+s-length, nonce, length-1);
130  storedNonce[s-1] = nonce[length-1] & 0xfe;
131  AccessCipher().ProcessBlock(storedNonce, m_pad());
132  m_padCached = true;
133  }
134  storedNonce[s-1] = nonce[length-1];
135  }
136  m_isFirstBlock = true;
137  Restart();
138 }
139 
140 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
141 {
142  CRYPTOPP_UNUSED(data);
143  assert(false);
144  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
145 }
146 
148 {
149  return
150 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_VMAC_ASM)
151  HasSSE2() ? 16 :
152 #endif
153  GetCipher().OptimalDataAlignment();
154 }
155 
156 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
157 #if CRYPTOPP_MSC_VERSION
158 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
159 #endif
160 void
161 #ifdef __GNUC__
162 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround
163 #endif
164 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
165 {
166  const word64 *nhK = m_nhKey();
167  word64 *polyS = m_polyState();
168  word32 L1KeyLength = m_L1KeyLength;
169 
170  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
171  CRYPTOPP_UNUSED(blocksRemainingInWord64);
172 
173 #ifdef __GNUC__
174  word32 temp;
175  __asm__ __volatile__
176  (
177  AS2( mov %%ebx, %0)
178  AS2( mov %1, %%ebx)
179  INTEL_NOPREFIX
180 #else
181  #if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
182  char isFirstBlock = m_isFirstBlock;
183  AS2( mov ebx, [L1KeyLength])
184  AS2( mov dl, [isFirstBlock])
185  #else
186  AS2( mov ecx, this)
187  AS2( mov ebx, [ecx+m_L1KeyLength])
188  AS2( mov dl, [ecx+m_isFirstBlock])
189  #endif
190  AS2( mov eax, tagPart)
191  AS2( shl eax, 4)
192  AS2( mov edi, nhK)
193  AS2( add edi, eax)
194  AS2( add eax, eax)
195  AS2( add eax, polyS)
196 
197  AS2( mov esi, data)
198  AS2( mov ecx, blocksRemainingInWord64)
199 #endif
200 
201  AS2( shr ebx, 3)
202 #if CRYPTOPP_BOOL_X32
203  AS_PUSH_IF86( bp)
204  AS2( sub esp, 24)
205 #else
206  AS_PUSH_IF86( bp)
207  AS2( sub esp, 12)
208 #endif
209  ASL(4)
210  AS2( mov ebp, ebx)
211  AS2( cmp ecx, ebx)
212  AS2( cmovl ebp, ecx)
213  AS2( sub ecx, ebp)
214  AS2( lea ebp, [edi+8*ebp]) // end of nhK
215  AS2( movq mm6, [esi])
216  AS2( paddq mm6, [edi])
217  AS2( movq mm5, [esi+8])
218  AS2( paddq mm5, [edi+8])
219  AS2( add esi, 16)
220  AS2( add edi, 16)
221  AS2( movq mm4, mm6)
222  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
223  AS2( pmuludq mm6, mm5)
224  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
225  AS2( pmuludq mm5, mm2)
226  AS2( pmuludq mm2, mm3)
227  AS2( pmuludq mm3, mm4)
228  AS2( pxor mm7, mm7)
229  AS2( movd [esp], mm6)
230  AS2( psrlq mm6, 32)
231 #if CRYPTOPP_BOOL_X32
232  AS2( movd [esp+8], mm5)
233 #else
234  AS2( movd [esp+4], mm5)
235 #endif
236  AS2( psrlq mm5, 32)
237  AS2( cmp edi, ebp)
238  ASJ( je, 1, f)
239  ASL(0)
240  AS2( movq mm0, [esi])
241  AS2( paddq mm0, [edi])
242  AS2( movq mm1, [esi+8])
243  AS2( paddq mm1, [edi+8])
244  AS2( add esi, 16)
245  AS2( add edi, 16)
246  AS2( movq mm4, mm0)
247  AS2( paddq mm5, mm2)
248  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
249  AS2( pmuludq mm0, mm1)
250 #if CRYPTOPP_BOOL_X32
251  AS2( movd [esp+16], mm3)
252 #else
253  AS2( movd [esp+8], mm3)
254 #endif
255  AS2( psrlq mm3, 32)
256  AS2( paddq mm5, mm3)
257  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
258  AS2( pmuludq mm1, mm2)
259  AS2( pmuludq mm2, mm3)
260  AS2( pmuludq mm3, mm4)
261  AS2( movd mm4, [esp])
262  AS2( paddq mm7, mm4)
263 #if CRYPTOPP_BOOL_X32
264  AS2( movd mm4, [esp+8])
265  AS2( paddq mm6, mm4)
266  AS2( movd mm4, [esp+16])
267 #else
268  AS2( movd mm4, [esp+4])
269  AS2( paddq mm6, mm4)
270  AS2( movd mm4, [esp+8])
271 #endif
272  AS2( paddq mm6, mm4)
273  AS2( movd [esp], mm0)
274  AS2( psrlq mm0, 32)
275  AS2( paddq mm6, mm0)
276 #if CRYPTOPP_BOOL_X32
277  AS2( movd [esp+8], mm1)
278 #else
279  AS2( movd [esp+4], mm1)
280 #endif
281  AS2( psrlq mm1, 32)
282  AS2( paddq mm5, mm1)
283  AS2( cmp edi, ebp)
284  ASJ( jne, 0, b)
285  ASL(1)
286  AS2( paddq mm5, mm2)
287 #if CRYPTOPP_BOOL_X32
288  AS2( movd [esp+16], mm3)
289 #else
290  AS2( movd [esp+8], mm3)
291 #endif
292  AS2( psrlq mm3, 32)
293  AS2( paddq mm5, mm3)
294  AS2( movd mm4, [esp])
295  AS2( paddq mm7, mm4)
296 #if CRYPTOPP_BOOL_X32
297  AS2( movd mm4, [esp+8])
298  AS2( paddq mm6, mm4)
299  AS2( movd mm4, [esp+16])
300 #else
301  AS2( movd mm4, [esp+4])
302  AS2( paddq mm6, mm4)
303  AS2( movd mm4, [esp+8])
304 #endif
305  AS2( paddq mm6, mm4)
306  AS2( lea ebp, [8*ebx])
307  AS2( sub edi, ebp) // reset edi to start of nhK
308 
309  AS2( movd [esp], mm7)
310  AS2( psrlq mm7, 32)
311  AS2( paddq mm6, mm7)
312 #if CRYPTOPP_BOOL_X32
313  AS2( movd [esp+8], mm6)
314 #else
315  AS2( movd [esp+4], mm6)
316 #endif
317  AS2( psrlq mm6, 32)
318  AS2( paddq mm5, mm6)
319  AS2( psllq mm5, 2)
320  AS2( psrlq mm5, 2)
321 
322 #define a0 [eax+2*4]
323 #define a1 [eax+3*4]
324 #define a2 [eax+0*4]
325 #define a3 [eax+1*4]
326 #define k0 [eax+2*8+2*4]
327 #define k1 [eax+2*8+3*4]
328 #define k2 [eax+2*8+0*4]
329 #define k3 [eax+2*8+1*4]
330  AS2( test dl, dl)
331  ASJ( jz, 2, f)
332  AS2( movd mm1, k0)
333  AS2( movd mm0, [esp])
334  AS2( paddq mm0, mm1)
335  AS2( movd a0, mm0)
336  AS2( psrlq mm0, 32)
337  AS2( movd mm1, k1)
338 #if CRYPTOPP_BOOL_X32
339  AS2( movd mm2, [esp+8])
340 #else
341  AS2( movd mm2, [esp+4])
342 #endif
343  AS2( paddq mm1, mm2)
344  AS2( paddq mm0, mm1)
345  AS2( movd a1, mm0)
346  AS2( psrlq mm0, 32)
347  AS2( paddq mm5, k2)
348  AS2( paddq mm0, mm5)
349  AS2( movq a2, mm0)
350  AS2( xor edx, edx)
351  ASJ( jmp, 3, f)
352  ASL(2)
353  AS2( movd mm0, a3)
354  AS2( movq mm4, mm0)
355  AS2( pmuludq mm0, k3) // a3*k3
356  AS2( movd mm1, a0)
357  AS2( pmuludq mm1, k2) // a0*k2
358  AS2( movd mm2, a1)
359  AS2( movd mm6, k1)
360  AS2( pmuludq mm2, mm6) // a1*k1
361  AS2( movd mm3, a2)
362  AS2( psllq mm0, 1)
363  AS2( paddq mm0, mm5)
364  AS2( movq mm5, mm3)
365  AS2( movd mm7, k0)
366  AS2( pmuludq mm3, mm7) // a2*k0
367  AS2( pmuludq mm4, mm7) // a3*k0
368  AS2( pmuludq mm5, mm6) // a2*k1
369  AS2( paddq mm0, mm1)
370  AS2( movd mm1, a1)
371  AS2( paddq mm4, mm5)
372  AS2( movq mm5, mm1)
373  AS2( pmuludq mm1, k2) // a1*k2
374  AS2( paddq mm0, mm2)
375  AS2( movd mm2, a0)
376  AS2( paddq mm0, mm3)
377  AS2( movq mm3, mm2)
378  AS2( pmuludq mm2, k3) // a0*k3
379  AS2( pmuludq mm3, mm7) // a0*k0
380 #if CRYPTOPP_BOOL_X32
381  AS2( movd [esp+16], mm0)
382 #else
383  AS2( movd [esp+8], mm0)
384 #endif
385  AS2( psrlq mm0, 32)
386  AS2( pmuludq mm7, mm5) // a1*k0
387  AS2( pmuludq mm5, k3) // a1*k3
388  AS2( paddq mm0, mm1)
389  AS2( movd mm1, a2)
390  AS2( pmuludq mm1, k2) // a2*k2
391  AS2( paddq mm0, mm2)
392  AS2( paddq mm0, mm4)
393  AS2( movq mm4, mm0)
394  AS2( movd mm2, a3)
395  AS2( pmuludq mm2, mm6) // a3*k1
396  AS2( pmuludq mm6, a0) // a0*k1
397  AS2( psrlq mm0, 31)
398  AS2( paddq mm0, mm3)
399  AS2( movd mm3, [esp])
400  AS2( paddq mm0, mm3)
401  AS2( movd mm3, a2)
402  AS2( pmuludq mm3, k3) // a2*k3
403  AS2( paddq mm5, mm1)
404  AS2( movd mm1, a3)
405  AS2( pmuludq mm1, k2) // a3*k2
406  AS2( paddq mm5, mm2)
407 #if CRYPTOPP_BOOL_X32
408  AS2( movd mm2, [esp+8])
409 #else
410  AS2( movd mm2, [esp+4])
411 #endif
412  AS2( psllq mm5, 1)
413  AS2( paddq mm0, mm5)
414  AS2( psllq mm4, 33)
415  AS2( movd a0, mm0)
416  AS2( psrlq mm0, 32)
417  AS2( paddq mm6, mm7)
418 #if CRYPTOPP_BOOL_X32
419  AS2( movd mm7, [esp+16])
420 #else
421  AS2( movd mm7, [esp+8])
422 #endif
423  AS2( paddq mm0, mm6)
424  AS2( paddq mm0, mm2)
425  AS2( paddq mm3, mm1)
426  AS2( psllq mm3, 1)
427  AS2( paddq mm0, mm3)
428  AS2( psrlq mm4, 1)
429  AS2( movd a1, mm0)
430  AS2( psrlq mm0, 32)
431  AS2( por mm4, mm7)
432  AS2( paddq mm0, mm4)
433  AS2( movq a2, mm0)
434 #undef a0
435 #undef a1
436 #undef a2
437 #undef a3
438 #undef k0
439 #undef k1
440 #undef k2
441 #undef k3
442 
443  ASL(3)
444  AS2( test ecx, ecx)
445  ASJ( jnz, 4, b)
446 #if CRYPTOPP_BOOL_X32
447  AS2( add esp, 24)
448 #else
449  AS2( add esp, 12)
450 #endif
451  AS_POP_IF86( bp)
452  AS1( emms)
453 #ifdef __GNUC__
454  ATT_PREFIX
455  AS2( mov %0, %%ebx)
456  : "=m" (temp)
457  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
458  : "memory", "cc"
459  );
460 #endif
461 }
462 #endif
463 
464 #if VMAC_BOOL_WORD128
465  #define DeclareNH(a) word128 a=0
466  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
467  #define AccumulateNH(a, b, c) a += word128(b)*(c)
468  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
469 #else
470  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
471  #define MUL32(a, b) __emulu(word32(a), word32(b))
472  #else
473  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
474  #endif
475  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
476  #define DeclareNH(a) word64 a##0=0, a##1=0
477  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
478  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
479  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
480  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
481  #define DeclareNH(a) word64 a##0=0, a##1=0
482  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
483  #define AccumulateNH(a, b, c) {\
484  word64 ph, pl;\
485  pl = _umul128(b,c,&ph);\
486  a##0 += pl;\
487  a##1 += ph + (a##0 < pl);}
488  #else
489  #define VMAC_BOOL_32BIT 1
490  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
491  #define MUL64(rh,rl,i1,i2) \
492  { word64 _i1 = (i1), _i2 = (i2); \
493  word64 m1= MUL32(_i1,_i2>>32); \
494  word64 m2= MUL32(_i1>>32,_i2); \
495  rh = MUL32(_i1>>32,_i2>>32); \
496  rl = MUL32(_i1,_i2); \
497  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
498  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
499  }
500  #define AccumulateNH(a, b, c) {\
501  word64 p = MUL32(b, c);\
502  a##1 += word32((p)>>32);\
503  a##0 += word32(p);\
504  p = MUL32((b)>>32, c);\
505  a##2 += word32((p)>>32);\
506  a##1 += word32(p);\
507  p = MUL32((b)>>32, (c)>>32);\
508  a##2 += p;\
509  p = MUL32(b, (c)>>32);\
510  a##1 += word32(p);\
511  a##2 += word32(p>>32);}
512  #endif
513 #endif
514 #ifndef VMAC_BOOL_32BIT
515  #define VMAC_BOOL_32BIT 0
516 #endif
517 #ifndef ADD128
518  #define ADD128(rh,rl,ih,il) \
519  { word64 _il = (il); \
520  (rl) += (_il); \
521  (rh) += (ih) + ((rl) < (_il)); \
522  }
523 #endif
524 
525 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
526 template <bool T_128BitTag>
527 #endif
528 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
529 {
530  #define INNER_LOOP_ITERATION(j) {\
531  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
532  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
533  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
534  if (T_128BitTag)\
535  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
536  }
537 
538 #if (defined(_MSC_VER) && _MSC_VER < 1300)
539  bool T_128BitTag = m_is128;
540 #endif
541  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
542  size_t innerLoopEnd = L1KeyLengthInWord64;
543  const word64 *nhK = m_nhKey();
544  word64 *polyS = m_polyState();
545  bool isFirstBlock = true;
546  size_t i;
547 
548  #if !VMAC_BOOL_32BIT
549  #if VMAC_BOOL_WORD128
550  word128 a1=0, a2=0;
551  #else
552  word64 ah1=0, al1=0, ah2=0, al2=0;
553  #endif
554  word64 kh1, kl1, kh2, kl2;
555  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
556  if (T_128BitTag)
557  {
558  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
559  }
560  #endif
561 
562  do
563  {
564  DeclareNH(nhA);
565  DeclareNH(nhB);
566 
567  i = 0;
568  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
569  {
570  if (blocksRemainingInWord64 % 8)
571  {
572  innerLoopEnd = blocksRemainingInWord64 % 8;
573  for (; i<innerLoopEnd; i+=2)
574  INNER_LOOP_ITERATION(0);
575  }
576  innerLoopEnd = blocksRemainingInWord64;
577  }
578  for (; i<innerLoopEnd; i+=8)
579  {
580  INNER_LOOP_ITERATION(0);
581  INNER_LOOP_ITERATION(1);
582  INNER_LOOP_ITERATION(2);
583  INNER_LOOP_ITERATION(3);
584  }
585  blocksRemainingInWord64 -= innerLoopEnd;
586  data += innerLoopEnd;
587 
588  #if VMAC_BOOL_32BIT
589  word32 nh0[2], nh1[2];
590  word64 nh2[2];
591 
592  nh0[0] = word32(nhA0);
593  nhA1 += (nhA0 >> 32);
594  nh1[0] = word32(nhA1);
595  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
596 
597  if (T_128BitTag)
598  {
599  nh0[1] = word32(nhB0);
600  nhB1 += (nhB0 >> 32);
601  nh1[1] = word32(nhB1);
602  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
603  }
604 
605  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
606  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
607  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
608  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
609  #define aHi ((polyS+i*4)[0])
610  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
611  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
612  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
613  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
614  #define kHi ((polyS+i*4+2)[0])
615 
616  if (isFirstBlock)
617  {
618  isFirstBlock = false;
619  if (m_isFirstBlock)
620  {
621  m_isFirstBlock = false;
622  for (i=0; i<=(size_t)T_128BitTag; i++)
623  {
624  word64 t = (word64)nh0[i] + k0;
625  a0 = (word32)t;
626  t = (t >> 32) + nh1[i] + k1;
627  a1 = (word32)t;
628  aHi = (t >> 32) + nh2[i] + kHi;
629  }
630  continue;
631  }
632  }
633  for (i=0; i<=(size_t)T_128BitTag; i++)
634  {
635  word64 p, t;
636  word32 t2;
637 
638  p = MUL32(a3, 2*k3);
639  p += nh2[i];
640  p += MUL32(a0, k2);
641  p += MUL32(a1, k1);
642  p += MUL32(a2, k0);
643  t2 = (word32)p;
644  p >>= 32;
645  p += MUL32(a0, k3);
646  p += MUL32(a1, k2);
647  p += MUL32(a2, k1);
648  p += MUL32(a3, k0);
649  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
650  p >>= 31;
651  p += nh0[i];
652  p += MUL32(a0, k0);
653  p += MUL32(a1, 2*k3);
654  p += MUL32(a2, 2*k2);
655  p += MUL32(a3, 2*k1);
656  t2 = (word32)p;
657  p >>= 32;
658  p += nh1[i];
659  p += MUL32(a0, k1);
660  p += MUL32(a1, k0);
661  p += MUL32(a2, 2*k3);
662  p += MUL32(a3, 2*k2);
663  a0 = t2;
664  a1 = (word32)p;
665  aHi = (p >> 32) + t;
666  }
667 
668  #undef a0
669  #undef a1
670  #undef a2
671  #undef a3
672  #undef aHi
673  #undef k0
674  #undef k1
675  #undef k2
676  #undef k3
677  #undef kHi
678  #else // #if VMAC_BOOL_32BIT
679  if (isFirstBlock)
680  {
681  isFirstBlock = false;
682  if (m_isFirstBlock)
683  {
684  m_isFirstBlock = false;
685  #if VMAC_BOOL_WORD128
686  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
687 
688  first_poly_step(a1, kh1, kl1, nhA);
689  if (T_128BitTag)
690  first_poly_step(a2, kh2, kl2, nhB);
691  #else
692  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
693  mh &= m62;\
694  ADD128(mh, ml, kh, kl); \
695  ah = mh; al = ml;}
696 
697  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
698  if (T_128BitTag)
699  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
700  #endif
701  continue;
702  }
703  else
704  {
705  #if VMAC_BOOL_WORD128
706  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
707  #else
708  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
709  #endif
710  if (T_128BitTag)
711  {
712  #if VMAC_BOOL_WORD128
713  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
714  #else
715  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
716  #endif
717  }
718  }
719  }
720 
721  #if VMAC_BOOL_WORD128
722  #define poly_step(a, kh, kl, m) \
723  { word128 t1, t2, t3, t4;\
724  Multiply128(t2, a>>64, kl);\
725  Multiply128(t3, a, kh);\
726  Multiply128(t1, a, kl);\
727  Multiply128(t4, a>>64, 2*kh);\
728  t2 += t3;\
729  t4 += t1;\
730  t2 += t4>>64;\
731  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
732  t2 *= 2;\
733  a += m & m126;\
734  a += t2>>64;}
735 
736  poly_step(a1, kh1, kl1, nhA);
737  if (T_128BitTag)
738  poly_step(a2, kh2, kl2, nhB);
739  #else
740  #define poly_step(ah, al, kh, kl, mh, ml) \
741  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
742  /* compute ab*cd, put bd into result registers */ \
743  MUL64(t2h,t2l,ah,kl); \
744  MUL64(t3h,t3l,al,kh); \
745  MUL64(t1h,t1l,ah,2*kh); \
746  MUL64(ah,al,al,kl); \
747  /* add together ad + bc */ \
748  ADD128(t2h,t2l,t3h,t3l); \
749  /* add 2 * ac to result */ \
750  ADD128(ah,al,t1h,t1l); \
751  /* now (ah,al), (t2l,2*t2h) need summing */ \
752  /* first add the high registers, carrying into t2h */ \
753  ADD128(t2h,ah,z,t2l); \
754  /* double t2h and add top bit of ah */ \
755  t2h += t2h + (ah >> 63); \
756  ah &= m63; \
757  /* now add the low registers */ \
758  mh &= m62; \
759  ADD128(ah,al,mh,ml); \
760  ADD128(ah,al,z,t2h); \
761  }
762 
763  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
764  if (T_128BitTag)
765  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
766  #endif
767  #endif // #if VMAC_BOOL_32BIT
768  } while (blocksRemainingInWord64);
769 
770  #if VMAC_BOOL_WORD128
771  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
772  if (T_128BitTag)
773  {
774  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
775  }
776  #elif !VMAC_BOOL_32BIT
777  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
778  if (T_128BitTag)
779  {
780  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
781  }
782  #endif
783 }
784 
785 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
786 {
787 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
788  if (HasSSE2())
789  {
790  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
791  if (m_is128)
792  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
793  m_isFirstBlock = false;
794  }
795  else
796 #endif
797  {
798 #if defined(_MSC_VER) && _MSC_VER < 1300
799  VHASH_Update_Template(data, blocksRemainingInWord64);
800 #else
801  if (m_is128)
802  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
803  else
804  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
805 #endif
806  }
807 }
808 
809 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
810 {
811  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
812  VHASH_Update(data, (length-remaining)/8);
813  return remaining;
814 }
815 
816 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
817 {
818  word64 rh, rl, t, z=0;
819  word64 p1 = input[0], p2 = input[1];
820  word64 k1 = l3Key[0], k2 = l3Key[1];
821 
822  /* fully reduce (p1,p2)+(len,0) mod p127 */
823  t = p1 >> 63;
824  p1 &= m63;
825  ADD128(p1, p2, len, t);
826  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
827  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
828  ADD128(p1, p2, z, t);
829  p1 &= m63;
830 
831  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
832  t = p1 + (p2 >> 32);
833  t += (t >> 32);
834  t += (word32)t > 0xfffffffeU;
835  p1 += (t >> 32);
836  p2 += (p1 << 32);
837 
838  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
839  p1 += k1;
840  p1 += (0 - (p1 < k1)) & 257;
841  p2 += k2;
842  p2 += (0 - (p2 < k2)) & 257;
843 
844  /* compute (p1+k1)*(p2+k2)%p64 */
845  MUL64(rh, rl, p1, p2);
846  t = rh >> 56;
847  ADD128(t, rl, z, rh);
848  rh <<= 8;
849  ADD128(t, rl, z, rh);
850  t += t << 8;
851  rl += t;
852  rl += (0 - (rl < t)) & 257;
853  rl += (0 - (rl > p64-1)) & 257;
854  return rl;
855 }
856 
857 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
858 {
859  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
860 
861  if (len)
862  {
863  memset(m_data()+len, 0, (0-len)%16);
864  VHASH_Update(DataBuf(), ((len+15)/16)*2);
865  len *= 8; // convert to bits
866  }
867  else if (m_isFirstBlock)
868  {
869  // special case for empty string
870  m_polyState()[0] = m_polyState()[2];
871  m_polyState()[1] = m_polyState()[3];
872  if (m_is128)
873  {
874  m_polyState()[4] = m_polyState()[6];
875  m_polyState()[5] = m_polyState()[7];
876  }
877  }
878 
879  if (m_is128)
880  {
881  word64 t[2];
882  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
883  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
884  if (size == 16)
885  {
886  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
887  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
888  }
889  else
890  {
891  t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
892  t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
893  memcpy(mac, t, size);
894  }
895  }
896  else
897  {
898  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
899  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
900  if (size == 8)
901  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
902  else
903  {
904  t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
905  memcpy(mac, &t, size);
906  }
907  }
908 }
909 
910 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:78
An invalid argument was detected.
Definition: cryptlib.h:166
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:100
T2 ModPowerOf2(const T1 &a, const T2 &b)
Tests whether the residue of a value is a power of 2.
Definition: misc.h:730
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:640
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1085
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:181
SecByteBlock is a SecBlock<byte> typedef.
Definition: secblock.h:719
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1013
Classes and functions for secure memory allocations.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Definition: cryptlib.h:364
A method was called which was not implemented.
Definition: cryptlib.h:187
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:685
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianess.
Definition: misc.h:1695
void UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
Sets the key for this object without performing parameter validation.
Definition: vmac.cpp:43
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:857
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:20
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:232
bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:93
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:147
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:79
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
resynchronize with an IV. ivLength=-1 means use IVSize()
Definition: vmac.cpp:106
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Gets a secure IV for the next message.
Definition: cryptlib.cpp:176
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Gets a secure IV for the next message.
Definition: vmac.cpp:100
Interface for retrieving values given their names.
Definition: cryptlib.h:261
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:516