Crypto++  5.6.3
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if _MSC_VER >= 1400
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Inline assembly operands don't work with .intel_syntax",
48 // http://llvm.org/bugs/show_bug.cgi?id=24232
49 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
50 # undef CRYPTOPP_X86_ASM_AVAILABLE
51 # undef CRYPTOPP_X32_ASM_AVAILABLE
52 # undef CRYPTOPP_X64_ASM_AVAILABLE
53 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
54 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
55 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
56 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
57 #else
58 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
59 #endif
60 
61 NAMESPACE_BEGIN(CryptoPP)
62 
63 // Debian QEMU/ARMEL issue in MultiplyTop; see http://github.com/weidai11/cryptopp/issues/31.
64 #if __ARMEL__ && (CRYPTOPP_GCC_VERSION >= 50200) && (CRYPTOPP_GCC_VERSION < 50300) && __OPTIMIZE__
65 # define WORKAROUND_ARMEL_BUG 1
66 #endif
67 
68 // Debian QEMU/ARM64 issue in Integer or ModularArithmetic; see http://github.com/weidai11/cryptopp/issues/61.
69 #if (__aarch64__ || __AARCH64EL__) && (CRYPTOPP_GCC_VERSION >= 50200) && (CRYPTOPP_GCC_VERSION < 50300)
70 # define WORKAROUND_ARM64_BUG 1
71 #endif
72 
73 #if WORKAROUND_ARMEL_BUG
74 # pragma GCC push_options
75 # pragma GCC optimize("O1")
76 #endif
77 
78 #if WORKAROUND_ARM64_BUG
79 # pragma GCC push_options
80 # pragma GCC optimize("no-devirtualize")
81 #endif
82 
83 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
84 {
85  if (valueType != typeid(Integer))
86  return false;
87  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
88  return true;
89 }
90 
91 inline static int Compare(const word *A, const word *B, size_t N)
92 {
93  while (N--)
94  if (A[N] > B[N])
95  return 1;
96  else if (A[N] < B[N])
97  return -1;
98 
99  return 0;
100 }
101 
102 inline static int Increment(word *A, size_t N, word B=1)
103 {
104  assert(N);
105  word t = A[0];
106  A[0] = t+B;
107  if (A[0] >= t)
108  return 0;
109  for (unsigned i=1; i<N; i++)
110  if (++A[i])
111  return 0;
112  return 1;
113 }
114 
115 inline static int Decrement(word *A, size_t N, word B=1)
116 {
117  assert(N);
118  word t = A[0];
119  A[0] = t-B;
120  if (A[0] <= t)
121  return 0;
122  for (unsigned i=1; i<N; i++)
123  if (A[i]--)
124  return 0;
125  return 1;
126 }
127 
128 static void TwosComplement(word *A, size_t N)
129 {
130  Decrement(A, N);
131  for (unsigned i=0; i<N; i++)
132  A[i] = ~A[i];
133 }
134 
135 static word AtomicInverseModPower2(word A)
136 {
137  assert(A%2==1);
138 
139  word R=A%8;
140 
141  for (unsigned i=3; i<WORD_BITS; i*=2)
142  R = R*(2-R*A);
143 
144  assert(R*A==1);
145  return R;
146 }
147 
148 // ********************************************************
149 
150 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
151  #define Declare2Words(x) word x##0, x##1;
152  #define AssignWord(a, b) a##0 = b; a##1 = 0;
153  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
154  #define LowWord(a) a##0
155  #define HighWord(a) a##1
156  #ifdef _MSC_VER
157  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
158  #ifndef __INTEL_COMPILER
159  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
160  #endif
161  #elif defined(__DECCXX)
162  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
163  #elif defined(__x86_64__)
164  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
165  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
166  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
167  #else
168  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
169  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
170  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
171  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
172  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
173  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
174  #endif
175  #endif
176  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
177  #ifndef Double3Words
178  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
179  #endif
180  #ifndef Acc2WordsBy2
181  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
182  #endif
183  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
184  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
185  #define GetCarry(u) u##1
186  #define GetBorrow(u) u##1
187 #else
188  #define Declare2Words(x) dword x;
189  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
190  #define MultiplyWords(p, a, b) p = __emulu(a, b);
191  #else
192  #define MultiplyWords(p, a, b) p = (dword)a*b;
193  #endif
194  #define AssignWord(a, b) a = b;
195  #define Add2WordsBy1(a, b, c) a = b + c;
196  #define Acc2WordsBy2(a, b) a += b;
197  #define LowWord(a) word(a)
198  #define HighWord(a) word(a>>WORD_BITS)
199  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
200  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
201  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
202  #define GetCarry(u) HighWord(u)
203  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
204 #endif
205 #ifndef MulAcc
206  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
207 #endif
208 #ifndef Acc2WordsBy1
209  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
210 #endif
211 #ifndef Acc3WordsBy2
212  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
213 #endif
214 
215 class DWord
216 {
217 public:
218  // Converity finding on default ctor. We've isntrumented the code,
219  // and cannot uncover a case where it affects a result.
220 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
221  // Repeating pattern of 1010 for debug builds to break things...
222  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
223 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
224  // Repeating pattern of 1010 for debug builds to break things...
225  DWord() : m_halfs() {memset(&m_halfs, 0xa, sizeof(m_halfs));}
226 #else
227  DWord() {}
228 #endif
229 
230 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
231  explicit DWord(word low) : m_whole(low) {}
232 #else
233  explicit DWord(word low)
234  {
235  m_halfs.low = low;
236  m_halfs.high = 0;
237  }
238 #endif
239 
240  DWord(word low, word high)
241  {
242  m_halfs.low = low;
243  m_halfs.high = high;
244  }
245 
246  static DWord Multiply(word a, word b)
247  {
248  DWord r;
249  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
250  r.m_whole = (dword)a * b;
251  #elif defined(MultiplyWordsLoHi)
252  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
253  #else
254  assert(0);
255  #endif
256  return r;
257  }
258 
259  static DWord MultiplyAndAdd(word a, word b, word c)
260  {
261  DWord r = Multiply(a, b);
262  return r += c;
263  }
264 
265  DWord & operator+=(word a)
266  {
267  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
268  m_whole = m_whole + a;
269  #else
270  m_halfs.low += a;
271  m_halfs.high += (m_halfs.low < a);
272  #endif
273  return *this;
274  }
275 
276  DWord operator+(word a)
277  {
278  DWord r;
279  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
280  r.m_whole = m_whole + a;
281  #else
282  r.m_halfs.low = m_halfs.low + a;
283  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
284  #endif
285  return r;
286  }
287 
288  DWord operator-(DWord a)
289  {
290  DWord r;
291  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
292  r.m_whole = m_whole - a.m_whole;
293  #else
294  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
295  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
296  #endif
297  return r;
298  }
299 
300  DWord operator-(word a)
301  {
302  DWord r;
303  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
304  r.m_whole = m_whole - a;
305  #else
306  r.m_halfs.low = m_halfs.low - a;
307  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
308  #endif
309  return r;
310  }
311 
312  // returns quotient, which must fit in a word
313  word operator/(word divisor);
314 
315  word operator%(word a);
316 
317  bool operator!() const
318  {
319  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
320  return !m_whole;
321  #else
322  return !m_halfs.high && !m_halfs.low;
323  #endif
324  }
325 
326  word GetLowHalf() const {return m_halfs.low;}
327  word GetHighHalf() const {return m_halfs.high;}
328  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
329 
330 private:
331  union
332  {
333  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
334  dword m_whole;
335  #endif
336  struct
337  {
338  #ifdef IS_LITTLE_ENDIAN
339  word low;
340  word high;
341  #else
342  word high;
343  word low;
344  #endif
345  } m_halfs;
346  };
347 };
348 
349 class Word
350 {
351 public:
352  // Converity finding on default ctor. We've isntrumented the code,
353  // and cannot uncover a case where it affects a result.
354 #if defined(__COVERITY__)
355  Word() : m_whole(0) {}
356 #elif !defined(NDEBUG)
357  // Repeating pattern of 1010 for debug builds to break things...
358  Word() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
359 #else
360  Word() {}
361 #endif
362 
363  Word(word value) : m_whole(value) {}
364  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
365 
366  static Word Multiply(hword a, hword b)
367  {
368  Word r;
369  r.m_whole = (word)a * b;
370  return r;
371  }
372 
373  Word operator-(Word a)
374  {
375  Word r;
376  r.m_whole = m_whole - a.m_whole;
377  return r;
378  }
379 
380  Word operator-(hword a)
381  {
382  Word r;
383  r.m_whole = m_whole - a;
384  return r;
385  }
386 
387  // returns quotient, which must fit in a word
388  hword operator/(hword divisor)
389  {
390  return hword(m_whole / divisor);
391  }
392 
393  bool operator!() const
394  {
395  return !m_whole;
396  }
397 
398  word GetWhole() const {return m_whole;}
399  hword GetLowHalf() const {return hword(m_whole);}
400  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
401  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
402 
403 private:
404  word m_whole;
405 };
406 
407 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
408 template <class S, class D>
409 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
410 {
411  CRYPTOPP_UNUSED(dummy);
412 
413  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
414  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
415 
416  // estimate the quotient: do a 2 S by 1 S divide
417  S Q;
418  if (S(B1+1) == 0)
419  Q = A[2];
420  else if (B1 > 0)
421  Q = D(A[1], A[2]) / S(B1+1);
422  else
423  Q = D(A[0], A[1]) / B0;
424 
425  // now subtract Q*B from A
426  D p = D::Multiply(B0, Q);
427  D u = (D) A[0] - p.GetLowHalf();
428  A[0] = u.GetLowHalf();
429  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
430  A[1] = u.GetLowHalf();
431  A[2] += u.GetHighHalf();
432 
433  // Q <= actual quotient, so fix it
434  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
435  {
436  u = (D) A[0] - B0;
437  A[0] = u.GetLowHalf();
438  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
439  A[1] = u.GetLowHalf();
440  A[2] += u.GetHighHalf();
441  Q++;
442  assert(Q); // shouldn't overflow
443  }
444 
445  return Q;
446 }
447 
448 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
449 template <class S, class D>
450 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
451 {
452  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
453  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
454  else
455  {
456  S Q[2];
457  T[0] = Al.GetLowHalf();
458  T[1] = Al.GetHighHalf();
459  T[2] = Ah.GetLowHalf();
460  T[3] = Ah.GetHighHalf();
461  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
462  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
463  return D(Q[0], Q[1]);
464  }
465 }
466 
467 // returns quotient, which must fit in a word
468 inline word DWord::operator/(word a)
469 {
470  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
471  return word(m_whole / a);
472  #else
473  hword r[4];
474  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
475  #endif
476 }
477 
478 inline word DWord::operator%(word a)
479 {
480  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
481  return word(m_whole % a);
482  #else
483  if (a < (word(1) << (WORD_BITS/2)))
484  {
485  hword h = hword(a);
486  word r = m_halfs.high % h;
487  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
488  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
489  }
490  else
491  {
492  hword r[4];
493  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
494  return Word(r[0], r[1]).GetWhole();
495  }
496  #endif
497 }
498 
499 // ********************************************************
500 
501 // Use some tricks to share assembly code between MSVC and GCC
502 #if defined(__GNUC__)
503  #define AddPrologue \
504  int result; \
505  __asm__ __volatile__ \
506  ( \
507  INTEL_NOPREFIX
508  #define AddEpilogue \
509  ".att_syntax prefix;" \
510  : "=a" (result)\
511  : "d" (C), "a" (A), "D" (B), "c" (N) \
512  : "%esi", "memory", "cc" \
513  );\
514  return result;
515  #define MulPrologue \
516  __asm__ __volatile__ \
517  ( \
518  ".intel_syntax noprefix;" \
519  AS1( push ebx) \
520  AS2( mov ebx, edx)
521  #define MulEpilogue \
522  AS1( pop ebx) \
523  ".att_syntax prefix;" \
524  : \
525  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
526  : "%esi", "memory", "cc" \
527  );
528  #define SquPrologue MulPrologue
529  #define SquEpilogue \
530  AS1( pop ebx) \
531  ".att_syntax prefix;" \
532  : \
533  : "d" (s_maskLow16), "c" (C), "a" (A) \
534  : "%esi", "%edi", "memory", "cc" \
535  );
536  #define TopPrologue MulPrologue
537  #define TopEpilogue \
538  AS1( pop ebx) \
539  ".att_syntax prefix;" \
540  : \
541  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
542  : "memory", "cc" \
543  );
544 #else
545  #define AddPrologue \
546  __asm push edi \
547  __asm push esi \
548  __asm mov eax, [esp+12] \
549  __asm mov edi, [esp+16]
550  #define AddEpilogue \
551  __asm pop esi \
552  __asm pop edi \
553  __asm ret 8
554 #if _MSC_VER < 1300
555  #define SaveEBX __asm push ebx
556  #define RestoreEBX __asm pop ebx
557 #else
558  #define SaveEBX
559  #define RestoreEBX
560 #endif
561  #define SquPrologue \
562  AS2( mov eax, A) \
563  AS2( mov ecx, C) \
564  SaveEBX \
565  AS2( lea ebx, s_maskLow16)
566  #define MulPrologue \
567  AS2( mov eax, A) \
568  AS2( mov edi, B) \
569  AS2( mov ecx, C) \
570  SaveEBX \
571  AS2( lea ebx, s_maskLow16)
572  #define TopPrologue \
573  AS2( mov eax, A) \
574  AS2( mov edi, B) \
575  AS2( mov ecx, C) \
576  AS2( mov esi, L) \
577  SaveEBX \
578  AS2( lea ebx, s_maskLow16)
579  #define SquEpilogue RestoreEBX
580  #define MulEpilogue RestoreEBX
581  #define TopEpilogue RestoreEBX
582 #endif
583 
584 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
585 extern "C" {
586 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
587 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
588 }
589 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
590 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
591 {
592  word result;
593  __asm__ __volatile__
594  (
595  INTEL_NOPREFIX
596  AS1( neg %1)
597  ASJ( jz, 1, f)
598  AS2( mov %0,[%3+8*%1])
599  AS2( add %0,[%4+8*%1])
600  AS2( mov [%2+8*%1],%0)
601  ASL(0)
602  AS2( mov %0,[%3+8*%1+8])
603  AS2( adc %0,[%4+8*%1+8])
604  AS2( mov [%2+8*%1+8],%0)
605  AS2( lea %1,[%1+2])
606  ASJ( jrcxz, 1, f)
607  AS2( mov %0,[%3+8*%1])
608  AS2( adc %0,[%4+8*%1])
609  AS2( mov [%2+8*%1],%0)
610  ASJ( jmp, 0, b)
611  ASL(1)
612  AS2( mov %0, 0)
613  AS2( adc %0, %0)
614  ATT_NOPREFIX
615  : "=&r" (result), "+c" (N)
616  : "r" (C+N), "r" (A+N), "r" (B+N)
617  : "memory", "cc"
618  );
619  return (int)result;
620 }
621 
622 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
623 {
624  word result;
625  __asm__ __volatile__
626  (
627  INTEL_NOPREFIX
628  AS1( neg %1)
629  ASJ( jz, 1, f)
630  AS2( mov %0,[%3+8*%1])
631  AS2( sub %0,[%4+8*%1])
632  AS2( mov [%2+8*%1],%0)
633  ASL(0)
634  AS2( mov %0,[%3+8*%1+8])
635  AS2( sbb %0,[%4+8*%1+8])
636  AS2( mov [%2+8*%1+8],%0)
637  AS2( lea %1,[%1+2])
638  ASJ( jrcxz, 1, f)
639  AS2( mov %0,[%3+8*%1])
640  AS2( sbb %0,[%4+8*%1])
641  AS2( mov [%2+8*%1],%0)
642  ASJ( jmp, 0, b)
643  ASL(1)
644  AS2( mov %0, 0)
645  AS2( adc %0, %0)
646  ATT_NOPREFIX
647  : "=&r" (result), "+c" (N)
648  : "r" (C+N), "r" (A+N), "r" (B+N)
649  : "memory", "cc"
650  );
651  return (int)result;
652 }
653 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
654 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
655 {
656  AddPrologue
657 
658  // now: eax = A, edi = B, edx = C, ecx = N
659  AS2( lea eax, [eax+4*ecx])
660  AS2( lea edi, [edi+4*ecx])
661  AS2( lea edx, [edx+4*ecx])
662 
663  AS1( neg ecx) // ecx is negative index
664  AS2( test ecx, 2) // this clears carry flag
665  ASJ( jz, 0, f)
666  AS2( sub ecx, 2)
667  ASJ( jmp, 1, f)
668 
669  ASL(0)
670  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
671  AS2( mov esi,[eax+4*ecx])
672  AS2( adc esi,[edi+4*ecx])
673  AS2( mov [edx+4*ecx],esi)
674  AS2( mov esi,[eax+4*ecx+4])
675  AS2( adc esi,[edi+4*ecx+4])
676  AS2( mov [edx+4*ecx+4],esi)
677  ASL(1)
678  AS2( mov esi,[eax+4*ecx+8])
679  AS2( adc esi,[edi+4*ecx+8])
680  AS2( mov [edx+4*ecx+8],esi)
681  AS2( mov esi,[eax+4*ecx+12])
682  AS2( adc esi,[edi+4*ecx+12])
683  AS2( mov [edx+4*ecx+12],esi)
684 
685  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
686  ASJ( jmp, 0, b)
687 
688  ASL(2)
689  AS2( mov eax, 0)
690  AS1( setc al) // store carry into eax (return result register)
691 
692  AddEpilogue
693 }
694 
695 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
696 {
697  AddPrologue
698 
699  // now: eax = A, edi = B, edx = C, ecx = N
700  AS2( lea eax, [eax+4*ecx])
701  AS2( lea edi, [edi+4*ecx])
702  AS2( lea edx, [edx+4*ecx])
703 
704  AS1( neg ecx) // ecx is negative index
705  AS2( test ecx, 2) // this clears carry flag
706  ASJ( jz, 0, f)
707  AS2( sub ecx, 2)
708  ASJ( jmp, 1, f)
709 
710  ASL(0)
711  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
712  AS2( mov esi,[eax+4*ecx])
713  AS2( sbb esi,[edi+4*ecx])
714  AS2( mov [edx+4*ecx],esi)
715  AS2( mov esi,[eax+4*ecx+4])
716  AS2( sbb esi,[edi+4*ecx+4])
717  AS2( mov [edx+4*ecx+4],esi)
718  ASL(1)
719  AS2( mov esi,[eax+4*ecx+8])
720  AS2( sbb esi,[edi+4*ecx+8])
721  AS2( mov [edx+4*ecx+8],esi)
722  AS2( mov esi,[eax+4*ecx+12])
723  AS2( sbb esi,[edi+4*ecx+12])
724  AS2( mov [edx+4*ecx+12],esi)
725 
726  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
727  ASJ( jmp, 0, b)
728 
729  ASL(2)
730  AS2( mov eax, 0)
731  AS1( setc al) // store carry into eax (return result register)
732 
733  AddEpilogue
734 }
735 
736 #if CRYPTOPP_INTEGER_SSE2
737 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
738 {
739  AddPrologue
740 
741  // now: eax = A, edi = B, edx = C, ecx = N
742  AS2( lea eax, [eax+4*ecx])
743  AS2( lea edi, [edi+4*ecx])
744  AS2( lea edx, [edx+4*ecx])
745 
746  AS1( neg ecx) // ecx is negative index
747  AS2( pxor mm2, mm2)
748  ASJ( jz, 2, f)
749  AS2( test ecx, 2) // this clears carry flag
750  ASJ( jz, 0, f)
751  AS2( sub ecx, 2)
752  ASJ( jmp, 1, f)
753 
754  ASL(0)
755  AS2( movd mm0, DWORD PTR [eax+4*ecx])
756  AS2( movd mm1, DWORD PTR [edi+4*ecx])
757  AS2( paddq mm0, mm1)
758  AS2( paddq mm2, mm0)
759  AS2( movd DWORD PTR [edx+4*ecx], mm2)
760  AS2( psrlq mm2, 32)
761 
762  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
763  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
764  AS2( paddq mm0, mm1)
765  AS2( paddq mm2, mm0)
766  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
767  AS2( psrlq mm2, 32)
768 
769  ASL(1)
770  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
771  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
772  AS2( paddq mm0, mm1)
773  AS2( paddq mm2, mm0)
774  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
775  AS2( psrlq mm2, 32)
776 
777  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
778  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
779  AS2( paddq mm0, mm1)
780  AS2( paddq mm2, mm0)
781  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
782  AS2( psrlq mm2, 32)
783 
784  AS2( add ecx, 4)
785  ASJ( jnz, 0, b)
786 
787  ASL(2)
788  AS2( movd eax, mm2)
789  AS1( emms)
790 
791  AddEpilogue
792 }
793 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
794 {
795  AddPrologue
796 
797  // now: eax = A, edi = B, edx = C, ecx = N
798  AS2( lea eax, [eax+4*ecx])
799  AS2( lea edi, [edi+4*ecx])
800  AS2( lea edx, [edx+4*ecx])
801 
802  AS1( neg ecx) // ecx is negative index
803  AS2( pxor mm2, mm2)
804  ASJ( jz, 2, f)
805  AS2( test ecx, 2) // this clears carry flag
806  ASJ( jz, 0, f)
807  AS2( sub ecx, 2)
808  ASJ( jmp, 1, f)
809 
810  ASL(0)
811  AS2( movd mm0, DWORD PTR [eax+4*ecx])
812  AS2( movd mm1, DWORD PTR [edi+4*ecx])
813  AS2( psubq mm0, mm1)
814  AS2( psubq mm0, mm2)
815  AS2( movd DWORD PTR [edx+4*ecx], mm0)
816  AS2( psrlq mm0, 63)
817 
818  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
819  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
820  AS2( psubq mm2, mm1)
821  AS2( psubq mm2, mm0)
822  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
823  AS2( psrlq mm2, 63)
824 
825  ASL(1)
826  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
827  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
828  AS2( psubq mm0, mm1)
829  AS2( psubq mm0, mm2)
830  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
831  AS2( psrlq mm0, 63)
832 
833  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
834  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
835  AS2( psubq mm2, mm1)
836  AS2( psubq mm2, mm0)
837  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
838  AS2( psrlq mm2, 63)
839 
840  AS2( add ecx, 4)
841  ASJ( jnz, 0, b)
842 
843  ASL(2)
844  AS2( movd eax, mm2)
845  AS1( emms)
846 
847  AddEpilogue
848 }
849 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
850 #else
851 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
852 {
853  assert (N%2 == 0);
854 
855  Declare2Words(u);
856  AssignWord(u, 0);
857  for (size_t i=0; i<N; i+=2)
858  {
859  AddWithCarry(u, A[i], B[i]);
860  C[i] = LowWord(u);
861  AddWithCarry(u, A[i+1], B[i+1]);
862  C[i+1] = LowWord(u);
863  }
864  return int(GetCarry(u));
865 }
866 
867 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
868 {
869  assert (N%2 == 0);
870 
871  Declare2Words(u);
872  AssignWord(u, 0);
873  for (size_t i=0; i<N; i+=2)
874  {
875  SubtractWithBorrow(u, A[i], B[i]);
876  C[i] = LowWord(u);
877  SubtractWithBorrow(u, A[i+1], B[i+1]);
878  C[i+1] = LowWord(u);
879  }
880  return int(GetBorrow(u));
881 }
882 #endif
883 
884 static word LinearMultiply(word *C, const word *A, word B, size_t N)
885 {
886  word carry=0;
887  for(unsigned i=0; i<N; i++)
888  {
889  Declare2Words(p);
890  MultiplyWords(p, A[i], B);
891  Acc2WordsBy1(p, carry);
892  C[i] = LowWord(p);
893  carry = HighWord(p);
894  }
895  return carry;
896 }
897 
898 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
899 
900 #define Mul_2 \
901  Mul_Begin(2) \
902  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
903  Mul_End(1, 1)
904 
905 #define Mul_4 \
906  Mul_Begin(4) \
907  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
908  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
909  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
910  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
911  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
912  Mul_End(5, 3)
913 
914 #define Mul_8 \
915  Mul_Begin(8) \
916  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
917  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
918  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
919  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
920  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
921  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
922  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
923  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
924  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
925  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
926  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
927  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
928  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
929  Mul_End(13, 7)
930 
931 #define Mul_16 \
932  Mul_Begin(16) \
933  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
934  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
935  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
936  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
937  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
938  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
939  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
940  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
941  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
942  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
943  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
944  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
945  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
946  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
947  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
948  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
949  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
950  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
951  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
952  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
953  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
954  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
955  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
956  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
957  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
958  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
959  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
960  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
961  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
962  Mul_End(29, 15)
963 
964 #define Squ_2 \
965  Squ_Begin(2) \
966  Squ_End(2)
967 
968 #define Squ_4 \
969  Squ_Begin(4) \
970  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
971  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
972  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
973  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
974  Squ_End(4)
975 
976 #define Squ_8 \
977  Squ_Begin(8) \
978  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
979  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
980  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
981  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
982  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
983  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
984  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
985  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
986  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
987  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
988  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
989  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
990  Squ_End(8)
991 
992 #define Squ_16 \
993  Squ_Begin(16) \
994  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
995  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
996  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
997  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
998  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
999  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1000  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1001  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1002  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1003  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1004  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1005  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1006  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1007  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1008  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1009  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1010  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1011  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1012  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1013  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1014  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1015  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1016  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1017  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1018  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1019  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1020  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1021  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1022  Squ_End(16)
1023 
1024 #define Bot_2 \
1025  Mul_Begin(2) \
1026  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1027  Bot_End(2)
1028 
1029 #define Bot_4 \
1030  Mul_Begin(4) \
1031  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1032  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1033  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1034  Bot_End(4)
1035 
1036 #define Bot_8 \
1037  Mul_Begin(8) \
1038  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1039  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1040  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1041  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1042  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1043  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1044  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1045  Bot_End(8)
1046 
1047 #define Bot_16 \
1048  Mul_Begin(16) \
1049  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1050  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1051  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1052  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1053  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1054  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1055  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1056  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1057  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1058  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1059  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1060  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1061  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1062  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1063  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1064  Bot_End(16)
1065 
1066 #endif
1067 
1068 #if 0
1069 #define Mul_Begin(n) \
1070  Declare2Words(p) \
1071  Declare2Words(c) \
1072  Declare2Words(d) \
1073  MultiplyWords(p, A[0], B[0]) \
1074  AssignWord(c, LowWord(p)) \
1075  AssignWord(d, HighWord(p))
1076 
1077 #define Mul_Acc(i, j) \
1078  MultiplyWords(p, A[i], B[j]) \
1079  Acc2WordsBy1(c, LowWord(p)) \
1080  Acc2WordsBy1(d, HighWord(p))
1081 
1082 #define Mul_SaveAcc(k, i, j) \
1083  R[k] = LowWord(c); \
1084  Add2WordsBy1(c, d, HighWord(c)) \
1085  MultiplyWords(p, A[i], B[j]) \
1086  AssignWord(d, HighWord(p)) \
1087  Acc2WordsBy1(c, LowWord(p))
1088 
1089 #define Mul_End(n) \
1090  R[2*n-3] = LowWord(c); \
1091  Acc2WordsBy1(d, HighWord(c)) \
1092  MultiplyWords(p, A[n-1], B[n-1])\
1093  Acc2WordsBy2(d, p) \
1094  R[2*n-2] = LowWord(d); \
1095  R[2*n-1] = HighWord(d);
1096 
1097 #define Bot_SaveAcc(k, i, j) \
1098  R[k] = LowWord(c); \
1099  word e = LowWord(d) + HighWord(c); \
1100  e += A[i] * B[j];
1101 
1102 #define Bot_Acc(i, j) \
1103  e += A[i] * B[j];
1104 
1105 #define Bot_End(n) \
1106  R[n-1] = e;
1107 #else
1108 #define Mul_Begin(n) \
1109  Declare2Words(p) \
1110  word c; \
1111  Declare2Words(d) \
1112  MultiplyWords(p, A[0], B[0]) \
1113  c = LowWord(p); \
1114  AssignWord(d, HighWord(p))
1115 
1116 #define Mul_Acc(i, j) \
1117  MulAcc(c, d, A[i], B[j])
1118 
1119 #define Mul_SaveAcc(k, i, j) \
1120  R[k] = c; \
1121  c = LowWord(d); \
1122  AssignWord(d, HighWord(d)) \
1123  MulAcc(c, d, A[i], B[j])
1124 
1125 #define Mul_End(k, i) \
1126  R[k] = c; \
1127  MultiplyWords(p, A[i], B[i]) \
1128  Acc2WordsBy2(p, d) \
1129  R[k+1] = LowWord(p); \
1130  R[k+2] = HighWord(p);
1131 
1132 #define Bot_SaveAcc(k, i, j) \
1133  R[k] = c; \
1134  c = LowWord(d); \
1135  c += A[i] * B[j];
1136 
1137 #define Bot_Acc(i, j) \
1138  c += A[i] * B[j];
1139 
1140 #define Bot_End(n) \
1141  R[n-1] = c;
1142 #endif
1143 
1144 #define Squ_Begin(n) \
1145  Declare2Words(p) \
1146  word c; \
1147  Declare2Words(d) \
1148  Declare2Words(e) \
1149  MultiplyWords(p, A[0], A[0]) \
1150  R[0] = LowWord(p); \
1151  AssignWord(e, HighWord(p)) \
1152  MultiplyWords(p, A[0], A[1]) \
1153  c = LowWord(p); \
1154  AssignWord(d, HighWord(p)) \
1155  Squ_NonDiag \
1156 
1157 #define Squ_NonDiag \
1158  Double3Words(c, d)
1159 
1160 #define Squ_SaveAcc(k, i, j) \
1161  Acc3WordsBy2(c, d, e) \
1162  R[k] = c; \
1163  MultiplyWords(p, A[i], A[j]) \
1164  c = LowWord(p); \
1165  AssignWord(d, HighWord(p)) \
1166 
1167 #define Squ_Acc(i, j) \
1168  MulAcc(c, d, A[i], A[j])
1169 
1170 #define Squ_Diag(i) \
1171  Squ_NonDiag \
1172  MulAcc(c, d, A[i], A[i])
1173 
1174 #define Squ_End(n) \
1175  Acc3WordsBy2(c, d, e) \
1176  R[2*n-3] = c; \
1177  MultiplyWords(p, A[n-1], A[n-1])\
1178  Acc2WordsBy2(p, e) \
1179  R[2*n-2] = LowWord(p); \
1180  R[2*n-1] = HighWord(p);
1181 
1182 
1183 void Baseline_Multiply2(word *R, const word *A, const word *B)
1184 {
1185  Mul_2
1186 }
1187 
1188 void Baseline_Multiply4(word *R, const word *A, const word *B)
1189 {
1190  Mul_4
1191 }
1192 
1193 void Baseline_Multiply8(word *R, const word *A, const word *B)
1194 {
1195  Mul_8
1196 }
1197 
1198 void Baseline_Square2(word *R, const word *A)
1199 {
1200  Squ_2
1201 }
1202 
1203 void Baseline_Square4(word *R, const word *A)
1204 {
1205  Squ_4
1206 }
1207 
1208 void Baseline_Square8(word *R, const word *A)
1209 {
1210  Squ_8
1211 }
1212 
1213 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
1214 {
1215  Bot_2
1216 }
1217 
1218 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
1219 {
1220  Bot_4
1221 }
1222 
1223 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
1224 {
1225  Bot_8
1226 }
1227 
1228 #define Top_Begin(n) \
1229  Declare2Words(p) \
1230  word c; \
1231  Declare2Words(d) \
1232  MultiplyWords(p, A[0], B[n-2]);\
1233  AssignWord(d, HighWord(p));
1234 
1235 #define Top_Acc(i, j) \
1236  MultiplyWords(p, A[i], B[j]);\
1237  Acc2WordsBy1(d, HighWord(p));
1238 
1239 #define Top_SaveAcc0(i, j) \
1240  c = LowWord(d); \
1241  AssignWord(d, HighWord(d)) \
1242  MulAcc(c, d, A[i], B[j])
1243 
1244 #define Top_SaveAcc1(i, j) \
1245  c = L<c; \
1246  Acc2WordsBy1(d, c); \
1247  c = LowWord(d); \
1248  AssignWord(d, HighWord(d)) \
1249  MulAcc(c, d, A[i], B[j])
1250 
1251 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1252 {
1253  CRYPTOPP_UNUSED(L);
1254  word T[4];
1255  Baseline_Multiply2(T, A, B);
1256  R[0] = T[2];
1257  R[1] = T[3];
1258 }
1259 
1260 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
1261 {
1262  Top_Begin(4)
1263  Top_Acc(1, 1) Top_Acc(2, 0) \
1264  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1265  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1266  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1267  Mul_End(1, 3)
1268 }
1269 
1270 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
1271 {
1272  Top_Begin(8)
1273  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1274  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1275  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1276  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1277  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1278  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1279  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1280  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1281  Mul_End(5, 7)
1282 }
1283 
1284 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1285 void Baseline_Multiply16(word *R, const word *A, const word *B)
1286 {
1287  Mul_16
1288 }
1289 
1290 void Baseline_Square16(word *R, const word *A)
1291 {
1292  Squ_16
1293 }
1294 
1295 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
1296 {
1297  Bot_16
1298 }
1299 
1300 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
1301 {
1302  Top_Begin(16)
1303  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1304  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1305  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1306  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1307  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1308  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1309  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1310  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1311  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1312  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1313  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1314  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1315  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1316  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1317  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1318  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1319  Mul_End(13, 15)
1320 }
1321 #endif
1322 
1323 // ********************************************************
1324 
1325 #if CRYPTOPP_INTEGER_SSE2
1326 
1327 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1328 
1329 #undef Mul_Begin
1330 #undef Mul_Acc
1331 #undef Top_Begin
1332 #undef Top_Acc
1333 #undef Squ_Acc
1334 #undef Squ_NonDiag
1335 #undef Squ_Diag
1336 #undef Squ_SaveAcc
1337 #undef Squ_Begin
1338 #undef Mul_SaveAcc
1339 #undef Bot_Acc
1340 #undef Bot_SaveAcc
1341 #undef Bot_End
1342 #undef Squ_End
1343 #undef Mul_End
1344 
1345 #define SSE2_FinalSave(k) \
1346  AS2( psllq xmm5, 16) \
1347  AS2( paddq xmm4, xmm5) \
1348  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1349 
1350 #define SSE2_SaveShift(k) \
1351  AS2( movq xmm0, xmm6) \
1352  AS2( punpckhqdq xmm6, xmm0) \
1353  AS2( movq xmm1, xmm7) \
1354  AS2( punpckhqdq xmm7, xmm1) \
1355  AS2( paddd xmm6, xmm0) \
1356  AS2( pslldq xmm6, 4) \
1357  AS2( paddd xmm7, xmm1) \
1358  AS2( paddd xmm4, xmm6) \
1359  AS2( pslldq xmm7, 4) \
1360  AS2( movq xmm6, xmm4) \
1361  AS2( paddd xmm5, xmm7) \
1362  AS2( movq xmm7, xmm5) \
1363  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1364  AS2( psrlq xmm6, 16) \
1365  AS2( paddq xmm6, xmm7) \
1366  AS2( punpckhqdq xmm4, xmm0) \
1367  AS2( punpckhqdq xmm5, xmm0) \
1368  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1369  AS2( psrlq xmm6, 3*16) \
1370  AS2( paddd xmm4, xmm6) \
1371 
1372 #define Squ_SSE2_SaveShift(k) \
1373  AS2( movq xmm0, xmm6) \
1374  AS2( punpckhqdq xmm6, xmm0) \
1375  AS2( movq xmm1, xmm7) \
1376  AS2( punpckhqdq xmm7, xmm1) \
1377  AS2( paddd xmm6, xmm0) \
1378  AS2( pslldq xmm6, 4) \
1379  AS2( paddd xmm7, xmm1) \
1380  AS2( paddd xmm4, xmm6) \
1381  AS2( pslldq xmm7, 4) \
1382  AS2( movhlps xmm6, xmm4) \
1383  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1384  AS2( paddd xmm5, xmm7) \
1385  AS2( movhps QWORD PTR [esp+12], xmm5)\
1386  AS2( psrlq xmm4, 16) \
1387  AS2( paddq xmm4, xmm5) \
1388  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1389  AS2( psrlq xmm4, 3*16) \
1390  AS2( paddd xmm4, xmm6) \
1391  AS2( movq QWORD PTR [esp+4], xmm4)\
1392 
1393 #define SSE2_FirstMultiply(i) \
1394  AS2( movdqa xmm7, [esi+(i)*16])\
1395  AS2( movdqa xmm5, [edi-(i)*16])\
1396  AS2( pmuludq xmm5, xmm7) \
1397  AS2( movdqa xmm4, [ebx])\
1398  AS2( movdqa xmm6, xmm4) \
1399  AS2( pand xmm4, xmm5) \
1400  AS2( psrld xmm5, 16) \
1401  AS2( pmuludq xmm7, [edx-(i)*16])\
1402  AS2( pand xmm6, xmm7) \
1403  AS2( psrld xmm7, 16)
1404 
1405 #define Squ_Begin(n) \
1406  SquPrologue \
1407  AS2( mov esi, esp)\
1408  AS2( and esp, 0xfffffff0)\
1409  AS2( lea edi, [esp-32*n])\
1410  AS2( sub esp, 32*n+16)\
1411  AS1( push esi)\
1412  AS2( mov esi, edi) \
1413  AS2( xor edx, edx) \
1414  ASL(1) \
1415  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1416  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1417  AS2( movdqa [edi+2*edx], xmm0) \
1418  AS2( psrlq xmm0, 32) \
1419  AS2( movdqa [edi+2*edx+16], xmm0) \
1420  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1421  AS2( psrlq xmm1, 32) \
1422  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1423  AS2( add edx, 16) \
1424  AS2( cmp edx, 8*(n)) \
1425  ASJ( jne, 1, b) \
1426  AS2( lea edx, [edi+16*n])\
1427  SSE2_FirstMultiply(0) \
1428 
1429 #define Squ_Acc(i) \
1430  ASL(LSqu##i) \
1431  AS2( movdqa xmm1, [esi+(i)*16]) \
1432  AS2( movdqa xmm0, [edi-(i)*16]) \
1433  AS2( movdqa xmm2, [ebx]) \
1434  AS2( pmuludq xmm0, xmm1) \
1435  AS2( pmuludq xmm1, [edx-(i)*16]) \
1436  AS2( movdqa xmm3, xmm2) \
1437  AS2( pand xmm2, xmm0) \
1438  AS2( psrld xmm0, 16) \
1439  AS2( paddd xmm4, xmm2) \
1440  AS2( paddd xmm5, xmm0) \
1441  AS2( pand xmm3, xmm1) \
1442  AS2( psrld xmm1, 16) \
1443  AS2( paddd xmm6, xmm3) \
1444  AS2( paddd xmm7, xmm1) \
1445 
1446 #define Squ_Acc1(i)
1447 #define Squ_Acc2(i) ASC(call, LSqu##i)
1448 #define Squ_Acc3(i) Squ_Acc2(i)
1449 #define Squ_Acc4(i) Squ_Acc2(i)
1450 #define Squ_Acc5(i) Squ_Acc2(i)
1451 #define Squ_Acc6(i) Squ_Acc2(i)
1452 #define Squ_Acc7(i) Squ_Acc2(i)
1453 #define Squ_Acc8(i) Squ_Acc2(i)
1454 
1455 #define SSE2_End(E, n) \
1456  SSE2_SaveShift(2*(n)-3) \
1457  AS2( movdqa xmm7, [esi+16]) \
1458  AS2( movdqa xmm0, [edi]) \
1459  AS2( pmuludq xmm0, xmm7) \
1460  AS2( movdqa xmm2, [ebx]) \
1461  AS2( pmuludq xmm7, [edx]) \
1462  AS2( movdqa xmm6, xmm2) \
1463  AS2( pand xmm2, xmm0) \
1464  AS2( psrld xmm0, 16) \
1465  AS2( paddd xmm4, xmm2) \
1466  AS2( paddd xmm5, xmm0) \
1467  AS2( pand xmm6, xmm7) \
1468  AS2( psrld xmm7, 16) \
1469  SSE2_SaveShift(2*(n)-2) \
1470  SSE2_FinalSave(2*(n)-1) \
1471  AS1( pop esp)\
1472  E
1473 
1474 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1475 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1476 #define Top_End(n) SSE2_End(TopEpilogue, n)
1477 
1478 #define Squ_Column1(k, i) \
1479  Squ_SSE2_SaveShift(k) \
1480  AS2( add esi, 16) \
1481  SSE2_FirstMultiply(1)\
1482  Squ_Acc##i(i) \
1483  AS2( paddd xmm4, xmm4) \
1484  AS2( paddd xmm5, xmm5) \
1485  AS2( movdqa xmm3, [esi]) \
1486  AS2( movq xmm1, QWORD PTR [esi+8]) \
1487  AS2( pmuludq xmm1, xmm3) \
1488  AS2( pmuludq xmm3, xmm3) \
1489  AS2( movdqa xmm0, [ebx])\
1490  AS2( movdqa xmm2, xmm0) \
1491  AS2( pand xmm0, xmm1) \
1492  AS2( psrld xmm1, 16) \
1493  AS2( paddd xmm6, xmm0) \
1494  AS2( paddd xmm7, xmm1) \
1495  AS2( pand xmm2, xmm3) \
1496  AS2( psrld xmm3, 16) \
1497  AS2( paddd xmm6, xmm6) \
1498  AS2( paddd xmm7, xmm7) \
1499  AS2( paddd xmm4, xmm2) \
1500  AS2( paddd xmm5, xmm3) \
1501  AS2( movq xmm0, QWORD PTR [esp+4])\
1502  AS2( movq xmm1, QWORD PTR [esp+12])\
1503  AS2( paddd xmm4, xmm0)\
1504  AS2( paddd xmm5, xmm1)\
1505 
1506 #define Squ_Column0(k, i) \
1507  Squ_SSE2_SaveShift(k) \
1508  AS2( add edi, 16) \
1509  AS2( add edx, 16) \
1510  SSE2_FirstMultiply(1)\
1511  Squ_Acc##i(i) \
1512  AS2( paddd xmm6, xmm6) \
1513  AS2( paddd xmm7, xmm7) \
1514  AS2( paddd xmm4, xmm4) \
1515  AS2( paddd xmm5, xmm5) \
1516  AS2( movq xmm0, QWORD PTR [esp+4])\
1517  AS2( movq xmm1, QWORD PTR [esp+12])\
1518  AS2( paddd xmm4, xmm0)\
1519  AS2( paddd xmm5, xmm1)\
1520 
1521 #define SSE2_MulAdd45 \
1522  AS2( movdqa xmm7, [esi]) \
1523  AS2( movdqa xmm0, [edi]) \
1524  AS2( pmuludq xmm0, xmm7) \
1525  AS2( movdqa xmm2, [ebx]) \
1526  AS2( pmuludq xmm7, [edx]) \
1527  AS2( movdqa xmm6, xmm2) \
1528  AS2( pand xmm2, xmm0) \
1529  AS2( psrld xmm0, 16) \
1530  AS2( paddd xmm4, xmm2) \
1531  AS2( paddd xmm5, xmm0) \
1532  AS2( pand xmm6, xmm7) \
1533  AS2( psrld xmm7, 16)
1534 
1535 #define Mul_Begin(n) \
1536  MulPrologue \
1537  AS2( mov esi, esp)\
1538  AS2( and esp, 0xfffffff0)\
1539  AS2( sub esp, 48*n+16)\
1540  AS1( push esi)\
1541  AS2( xor edx, edx) \
1542  ASL(1) \
1543  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1544  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1545  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1546  AS2( movdqa [esp+20+2*edx], xmm0) \
1547  AS2( psrlq xmm0, 32) \
1548  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1549  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1550  AS2( psrlq xmm1, 32) \
1551  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1552  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1553  AS2( psrlq xmm2, 32) \
1554  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1555  AS2( add edx, 16) \
1556  AS2( cmp edx, 8*(n)) \
1557  ASJ( jne, 1, b) \
1558  AS2( lea edi, [esp+20])\
1559  AS2( lea edx, [esp+20+16*n])\
1560  AS2( lea esi, [esp+20+32*n])\
1561  SSE2_FirstMultiply(0) \
1562 
1563 #define Mul_Acc(i) \
1564  ASL(LMul##i) \
1565  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1566  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1567  AS2( movdqa xmm2, [ebx]) \
1568  AS2( pmuludq xmm0, xmm1) \
1569  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1570  AS2( movdqa xmm3, xmm2) \
1571  AS2( pand xmm2, xmm0) \
1572  AS2( psrld xmm0, 16) \
1573  AS2( paddd xmm4, xmm2) \
1574  AS2( paddd xmm5, xmm0) \
1575  AS2( pand xmm3, xmm1) \
1576  AS2( psrld xmm1, 16) \
1577  AS2( paddd xmm6, xmm3) \
1578  AS2( paddd xmm7, xmm1) \
1579 
1580 #define Mul_Acc1(i)
1581 #define Mul_Acc2(i) ASC(call, LMul##i)
1582 #define Mul_Acc3(i) Mul_Acc2(i)
1583 #define Mul_Acc4(i) Mul_Acc2(i)
1584 #define Mul_Acc5(i) Mul_Acc2(i)
1585 #define Mul_Acc6(i) Mul_Acc2(i)
1586 #define Mul_Acc7(i) Mul_Acc2(i)
1587 #define Mul_Acc8(i) Mul_Acc2(i)
1588 #define Mul_Acc9(i) Mul_Acc2(i)
1589 #define Mul_Acc10(i) Mul_Acc2(i)
1590 #define Mul_Acc11(i) Mul_Acc2(i)
1591 #define Mul_Acc12(i) Mul_Acc2(i)
1592 #define Mul_Acc13(i) Mul_Acc2(i)
1593 #define Mul_Acc14(i) Mul_Acc2(i)
1594 #define Mul_Acc15(i) Mul_Acc2(i)
1595 #define Mul_Acc16(i) Mul_Acc2(i)
1596 
1597 #define Mul_Column1(k, i) \
1598  SSE2_SaveShift(k) \
1599  AS2( add esi, 16) \
1600  SSE2_MulAdd45\
1601  Mul_Acc##i(i) \
1602 
1603 #define Mul_Column0(k, i) \
1604  SSE2_SaveShift(k) \
1605  AS2( add edi, 16) \
1606  AS2( add edx, 16) \
1607  SSE2_MulAdd45\
1608  Mul_Acc##i(i) \
1609 
1610 #define Bot_Acc(i) \
1611  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1612  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1613  AS2( pmuludq xmm0, xmm1) \
1614  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1615  AS2( paddq xmm4, xmm0) \
1616  AS2( paddd xmm6, xmm1)
1617 
1618 #define Bot_SaveAcc(k) \
1619  SSE2_SaveShift(k) \
1620  AS2( add edi, 16) \
1621  AS2( add edx, 16) \
1622  AS2( movdqa xmm6, [esi]) \
1623  AS2( movdqa xmm0, [edi]) \
1624  AS2( pmuludq xmm0, xmm6) \
1625  AS2( paddq xmm4, xmm0) \
1626  AS2( psllq xmm5, 16) \
1627  AS2( paddq xmm4, xmm5) \
1628  AS2( pmuludq xmm6, [edx])
1629 
1630 #define Bot_End(n) \
1631  AS2( movhlps xmm7, xmm6) \
1632  AS2( paddd xmm6, xmm7) \
1633  AS2( psllq xmm6, 32) \
1634  AS2( paddd xmm4, xmm6) \
1635  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1636  AS1( pop esp)\
1637  MulEpilogue
1638 
1639 #define Top_Begin(n) \
1640  TopPrologue \
1641  AS2( mov edx, esp)\
1642  AS2( and esp, 0xfffffff0)\
1643  AS2( sub esp, 48*n+16)\
1644  AS1( push edx)\
1645  AS2( xor edx, edx) \
1646  ASL(1) \
1647  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1648  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1649  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1650  AS2( movdqa [esp+20+2*edx], xmm0) \
1651  AS2( psrlq xmm0, 32) \
1652  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1653  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1654  AS2( psrlq xmm1, 32) \
1655  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1656  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1657  AS2( psrlq xmm2, 32) \
1658  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1659  AS2( add edx, 16) \
1660  AS2( cmp edx, 8*(n)) \
1661  ASJ( jne, 1, b) \
1662  AS2( mov eax, esi) \
1663  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1664  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1665  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1666  AS2( pxor xmm4, xmm4)\
1667  AS2( pxor xmm5, xmm5)
1668 
1669 #define Top_Acc(i) \
1670  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1671  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1672  AS2( psrlq xmm0, 48) \
1673  AS2( paddd xmm5, xmm0)\
1674 
1675 #define Top_Column0(i) \
1676  AS2( psllq xmm5, 32) \
1677  AS2( add edi, 16) \
1678  AS2( add edx, 16) \
1679  SSE2_MulAdd45\
1680  Mul_Acc##i(i) \
1681 
1682 #define Top_Column1(i) \
1683  SSE2_SaveShift(0) \
1684  AS2( add esi, 16) \
1685  SSE2_MulAdd45\
1686  Mul_Acc##i(i) \
1687  AS2( shr eax, 16) \
1688  AS2( movd xmm0, eax)\
1689  AS2( movd xmm1, [ecx+4])\
1690  AS2( psrld xmm1, 16)\
1691  AS2( pcmpgtd xmm1, xmm0)\
1692  AS2( psrld xmm1, 31)\
1693  AS2( paddd xmm4, xmm1)\
1694 
1695 void SSE2_Square4(word *C, const word *A)
1696 {
1697  Squ_Begin(2)
1698  Squ_Column0(0, 1)
1699  Squ_End(2)
1700 }
1701 
1702 void SSE2_Square8(word *C, const word *A)
1703 {
1704  Squ_Begin(4)
1705 #ifndef __GNUC__
1706  ASJ( jmp, 0, f)
1707  Squ_Acc(2)
1708  AS1( ret) ASL(0)
1709 #endif
1710  Squ_Column0(0, 1)
1711  Squ_Column1(1, 1)
1712  Squ_Column0(2, 2)
1713  Squ_Column1(3, 1)
1714  Squ_Column0(4, 1)
1715  Squ_End(4)
1716 }
1717 
1718 void SSE2_Square16(word *C, const word *A)
1719 {
1720  Squ_Begin(8)
1721 #ifndef __GNUC__
1722  ASJ( jmp, 0, f)
1723  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1724  AS1( ret) ASL(0)
1725 #endif
1726  Squ_Column0(0, 1)
1727  Squ_Column1(1, 1)
1728  Squ_Column0(2, 2)
1729  Squ_Column1(3, 2)
1730  Squ_Column0(4, 3)
1731  Squ_Column1(5, 3)
1732  Squ_Column0(6, 4)
1733  Squ_Column1(7, 3)
1734  Squ_Column0(8, 3)
1735  Squ_Column1(9, 2)
1736  Squ_Column0(10, 2)
1737  Squ_Column1(11, 1)
1738  Squ_Column0(12, 1)
1739  Squ_End(8)
1740 }
1741 
1742 void SSE2_Square32(word *C, const word *A)
1743 {
1744  Squ_Begin(16)
1745  ASJ( jmp, 0, f)
1746  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1747  AS1( ret) ASL(0)
1748  Squ_Column0(0, 1)
1749  Squ_Column1(1, 1)
1750  Squ_Column0(2, 2)
1751  Squ_Column1(3, 2)
1752  Squ_Column0(4, 3)
1753  Squ_Column1(5, 3)
1754  Squ_Column0(6, 4)
1755  Squ_Column1(7, 4)
1756  Squ_Column0(8, 5)
1757  Squ_Column1(9, 5)
1758  Squ_Column0(10, 6)
1759  Squ_Column1(11, 6)
1760  Squ_Column0(12, 7)
1761  Squ_Column1(13, 7)
1762  Squ_Column0(14, 8)
1763  Squ_Column1(15, 7)
1764  Squ_Column0(16, 7)
1765  Squ_Column1(17, 6)
1766  Squ_Column0(18, 6)
1767  Squ_Column1(19, 5)
1768  Squ_Column0(20, 5)
1769  Squ_Column1(21, 4)
1770  Squ_Column0(22, 4)
1771  Squ_Column1(23, 3)
1772  Squ_Column0(24, 3)
1773  Squ_Column1(25, 2)
1774  Squ_Column0(26, 2)
1775  Squ_Column1(27, 1)
1776  Squ_Column0(28, 1)
1777  Squ_End(16)
1778 }
1779 
1780 void SSE2_Multiply4(word *C, const word *A, const word *B)
1781 {
1782  Mul_Begin(2)
1783 #ifndef __GNUC__
1784  ASJ( jmp, 0, f)
1785  Mul_Acc(2)
1786  AS1( ret) ASL(0)
1787 #endif
1788  Mul_Column0(0, 2)
1789  Mul_End(2)
1790 }
1791 
1792 void SSE2_Multiply8(word *C, const word *A, const word *B)
1793 {
1794  Mul_Begin(4)
1795 #ifndef __GNUC__
1796  ASJ( jmp, 0, f)
1797  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1798  AS1( ret) ASL(0)
1799 #endif
1800  Mul_Column0(0, 2)
1801  Mul_Column1(1, 3)
1802  Mul_Column0(2, 4)
1803  Mul_Column1(3, 3)
1804  Mul_Column0(4, 2)
1805  Mul_End(4)
1806 }
1807 
1808 void SSE2_Multiply16(word *C, const word *A, const word *B)
1809 {
1810  Mul_Begin(8)
1811 #ifndef __GNUC__
1812  ASJ( jmp, 0, f)
1813  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1814  AS1( ret) ASL(0)
1815 #endif
1816  Mul_Column0(0, 2)
1817  Mul_Column1(1, 3)
1818  Mul_Column0(2, 4)
1819  Mul_Column1(3, 5)
1820  Mul_Column0(4, 6)
1821  Mul_Column1(5, 7)
1822  Mul_Column0(6, 8)
1823  Mul_Column1(7, 7)
1824  Mul_Column0(8, 6)
1825  Mul_Column1(9, 5)
1826  Mul_Column0(10, 4)
1827  Mul_Column1(11, 3)
1828  Mul_Column0(12, 2)
1829  Mul_End(8)
1830 }
1831 
1832 void SSE2_Multiply32(word *C, const word *A, const word *B)
1833 {
1834  Mul_Begin(16)
1835  ASJ( jmp, 0, f)
1836  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1837  AS1( ret) ASL(0)
1838  Mul_Column0(0, 2)
1839  Mul_Column1(1, 3)
1840  Mul_Column0(2, 4)
1841  Mul_Column1(3, 5)
1842  Mul_Column0(4, 6)
1843  Mul_Column1(5, 7)
1844  Mul_Column0(6, 8)
1845  Mul_Column1(7, 9)
1846  Mul_Column0(8, 10)
1847  Mul_Column1(9, 11)
1848  Mul_Column0(10, 12)
1849  Mul_Column1(11, 13)
1850  Mul_Column0(12, 14)
1851  Mul_Column1(13, 15)
1852  Mul_Column0(14, 16)
1853  Mul_Column1(15, 15)
1854  Mul_Column0(16, 14)
1855  Mul_Column1(17, 13)
1856  Mul_Column0(18, 12)
1857  Mul_Column1(19, 11)
1858  Mul_Column0(20, 10)
1859  Mul_Column1(21, 9)
1860  Mul_Column0(22, 8)
1861  Mul_Column1(23, 7)
1862  Mul_Column0(24, 6)
1863  Mul_Column1(25, 5)
1864  Mul_Column0(26, 4)
1865  Mul_Column1(27, 3)
1866  Mul_Column0(28, 2)
1867  Mul_End(16)
1868 }
1869 
1870 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1871 {
1872  Mul_Begin(2)
1873  Bot_SaveAcc(0) Bot_Acc(2)
1874  Bot_End(2)
1875 }
1876 
1877 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1878 {
1879  Mul_Begin(4)
1880 #ifndef __GNUC__
1881  ASJ( jmp, 0, f)
1882  Mul_Acc(3) Mul_Acc(2)
1883  AS1( ret) ASL(0)
1884 #endif
1885  Mul_Column0(0, 2)
1886  Mul_Column1(1, 3)
1887  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1888  Bot_End(4)
1889 }
1890 
1891 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1892 {
1893  Mul_Begin(8)
1894 #ifndef __GNUC__
1895  ASJ( jmp, 0, f)
1896  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1897  AS1( ret) ASL(0)
1898 #endif
1899  Mul_Column0(0, 2)
1900  Mul_Column1(1, 3)
1901  Mul_Column0(2, 4)
1902  Mul_Column1(3, 5)
1903  Mul_Column0(4, 6)
1904  Mul_Column1(5, 7)
1905  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1906  Bot_End(8)
1907 }
1908 
1909 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1910 {
1911  Mul_Begin(16)
1912 #ifndef __GNUC__
1913  ASJ( jmp, 0, f)
1914  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1915  AS1( ret) ASL(0)
1916 #endif
1917  Mul_Column0(0, 2)
1918  Mul_Column1(1, 3)
1919  Mul_Column0(2, 4)
1920  Mul_Column1(3, 5)
1921  Mul_Column0(4, 6)
1922  Mul_Column1(5, 7)
1923  Mul_Column0(6, 8)
1924  Mul_Column1(7, 9)
1925  Mul_Column0(8, 10)
1926  Mul_Column1(9, 11)
1927  Mul_Column0(10, 12)
1928  Mul_Column1(11, 13)
1929  Mul_Column0(12, 14)
1930  Mul_Column1(13, 15)
1931  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1932  Bot_End(16)
1933 }
1934 
1935 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1936 {
1937  Top_Begin(4)
1938  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1939 #ifndef __GNUC__
1940  ASJ( jmp, 0, f)
1941  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1942  AS1( ret) ASL(0)
1943 #endif
1944  Top_Column0(4)
1945  Top_Column1(3)
1946  Mul_Column0(0, 2)
1947  Top_End(2)
1948 }
1949 
1950 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
1951 {
1952  Top_Begin(8)
1953  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1954 #ifndef __GNUC__
1955  ASJ( jmp, 0, f)
1956  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1957  AS1( ret) ASL(0)
1958 #endif
1959  Top_Column0(8)
1960  Top_Column1(7)
1961  Mul_Column0(0, 6)
1962  Mul_Column1(1, 5)
1963  Mul_Column0(2, 4)
1964  Mul_Column1(3, 3)
1965  Mul_Column0(4, 2)
1966  Top_End(4)
1967 }
1968 
1969 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
1970 {
1971  Top_Begin(16)
1972  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1973 #ifndef __GNUC__
1974  ASJ( jmp, 0, f)
1975  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1976  AS1( ret) ASL(0)
1977 #endif
1978  Top_Column0(16)
1979  Top_Column1(15)
1980  Mul_Column0(0, 14)
1981  Mul_Column1(1, 13)
1982  Mul_Column0(2, 12)
1983  Mul_Column1(3, 11)
1984  Mul_Column0(4, 10)
1985  Mul_Column1(5, 9)
1986  Mul_Column0(6, 8)
1987  Mul_Column1(7, 7)
1988  Mul_Column0(8, 6)
1989  Mul_Column1(9, 5)
1990  Mul_Column0(10, 4)
1991  Mul_Column1(11, 3)
1992  Mul_Column0(12, 2)
1993  Top_End(8)
1994 }
1995 
1996 #endif // #if CRYPTOPP_INTEGER_SSE2
1997 
1998 // ********************************************************
1999 
2000 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2001 typedef void (* PMul)(word *C, const word *A, const word *B);
2002 typedef void (* PSqu)(word *C, const word *A);
2003 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2004 
2005 #if CRYPTOPP_INTEGER_SSE2
2006 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2007 static size_t s_recursionLimit = 8;
2008 #else
2009 static const size_t s_recursionLimit = 16;
2010 #endif
2011 
2012 static PMul s_pMul[9], s_pBot[9];
2013 static PSqu s_pSqu[9];
2014 static PMulTop s_pTop[9];
2015 
2016 static void SetFunctionPointers()
2017 {
2018  s_pMul[0] = &Baseline_Multiply2;
2019  s_pBot[0] = &Baseline_MultiplyBottom2;
2020  s_pSqu[0] = &Baseline_Square2;
2021  s_pTop[0] = &Baseline_MultiplyTop2;
2022  s_pTop[1] = &Baseline_MultiplyTop4;
2023 
2024 #if CRYPTOPP_INTEGER_SSE2
2025  if (HasSSE2())
2026  {
2027 #if _MSC_VER != 1200 || defined(NDEBUG)
2028  if (IsP4())
2029  {
2030  s_pAdd = &SSE2_Add;
2031  s_pSub = &SSE2_Sub;
2032  }
2033 #endif
2034 
2035  s_recursionLimit = 32;
2036 
2037  s_pMul[1] = &SSE2_Multiply4;
2038  s_pMul[2] = &SSE2_Multiply8;
2039  s_pMul[4] = &SSE2_Multiply16;
2040  s_pMul[8] = &SSE2_Multiply32;
2041 
2042  s_pBot[1] = &SSE2_MultiplyBottom4;
2043  s_pBot[2] = &SSE2_MultiplyBottom8;
2044  s_pBot[4] = &SSE2_MultiplyBottom16;
2045  s_pBot[8] = &SSE2_MultiplyBottom32;
2046 
2047  s_pSqu[1] = &SSE2_Square4;
2048  s_pSqu[2] = &SSE2_Square8;
2049  s_pSqu[4] = &SSE2_Square16;
2050  s_pSqu[8] = &SSE2_Square32;
2051 
2052  s_pTop[2] = &SSE2_MultiplyTop8;
2053  s_pTop[4] = &SSE2_MultiplyTop16;
2054  s_pTop[8] = &SSE2_MultiplyTop32;
2055  }
2056  else
2057 #endif
2058  {
2059  s_pMul[1] = &Baseline_Multiply4;
2060  s_pMul[2] = &Baseline_Multiply8;
2061 
2062  s_pBot[1] = &Baseline_MultiplyBottom4;
2063  s_pBot[2] = &Baseline_MultiplyBottom8;
2064 
2065  s_pSqu[1] = &Baseline_Square4;
2066  s_pSqu[2] = &Baseline_Square8;
2067 
2068  s_pTop[2] = &Baseline_MultiplyTop8;
2069 
2070 #if !CRYPTOPP_INTEGER_SSE2
2071  s_pMul[4] = &Baseline_Multiply16;
2072  s_pBot[4] = &Baseline_MultiplyBottom16;
2073  s_pSqu[4] = &Baseline_Square16;
2074  s_pTop[4] = &Baseline_MultiplyTop16;
2075 #endif
2076  }
2077 }
2078 
2079 inline int Add(word *C, const word *A, const word *B, size_t N)
2080 {
2081 #if CRYPTOPP_INTEGER_SSE2
2082  return s_pAdd(N, C, A, B);
2083 #else
2084  return Baseline_Add(N, C, A, B);
2085 #endif
2086 }
2087 
2088 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2089 {
2090 #if CRYPTOPP_INTEGER_SSE2
2091  return s_pSub(N, C, A, B);
2092 #else
2093  return Baseline_Sub(N, C, A, B);
2094 #endif
2095 }
2096 
2097 // ********************************************************
2098 
2099 
2100 #define A0 A
2101 #define A1 (A+N2)
2102 #define B0 B
2103 #define B1 (B+N2)
2104 
2105 #define T0 T
2106 #define T1 (T+N2)
2107 #define T2 (T+N)
2108 #define T3 (T+N+N2)
2109 
2110 #define R0 R
2111 #define R1 (R+N2)
2112 #define R2 (R+N)
2113 #define R3 (R+N+N2)
2114 
2115 // R[2*N] - result = A*B
2116 // T[2*N] - temporary work space
2117 // A[N] --- multiplier
2118 // B[N] --- multiplicant
2119 
2120 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2121 {
2122  assert(N>=2 && N%2==0);
2123 
2124  if (N <= s_recursionLimit)
2125  s_pMul[N/4](R, A, B);
2126  else
2127  {
2128  const size_t N2 = N/2;
2129 
2130  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2131  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2132 
2133  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2134  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2135 
2136  RecursiveMultiply(R2, T2, A1, B1, N2);
2137  RecursiveMultiply(T0, T2, R0, R1, N2);
2138  RecursiveMultiply(R0, T2, A0, B0, N2);
2139 
2140  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2141 
2142  int c2 = Add(R2, R2, R1, N2);
2143  int c3 = c2;
2144  c2 += Add(R1, R2, R0, N2);
2145  c3 += Add(R2, R2, R3, N2);
2146 
2147  if (AN2 == BN2)
2148  c3 -= Subtract(R1, R1, T0, N);
2149  else
2150  c3 += Add(R1, R1, T0, N);
2151 
2152  c3 += Increment(R2, N2, c2);
2153  assert (c3 >= 0 && c3 <= 2);
2154  Increment(R3, N2, c3);
2155  }
2156 }
2157 
2158 // R[2*N] - result = A*A
2159 // T[2*N] - temporary work space
2160 // A[N] --- number to be squared
2161 
2162 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2163 {
2164  assert(N && N%2==0);
2165 
2166  if (N <= s_recursionLimit)
2167  s_pSqu[N/4](R, A);
2168  else
2169  {
2170  const size_t N2 = N/2;
2171 
2172  RecursiveSquare(R0, T2, A0, N2);
2173  RecursiveSquare(R2, T2, A1, N2);
2174  RecursiveMultiply(T0, T2, A0, A1, N2);
2175 
2176  int carry = Add(R1, R1, T0, N);
2177  carry += Add(R1, R1, T0, N);
2178  Increment(R3, N2, carry);
2179  }
2180 }
2181 
2182 // R[N] - bottom half of A*B
2183 // T[3*N/2] - temporary work space
2184 // A[N] - multiplier
2185 // B[N] - multiplicant
2186 
2187 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2188 {
2189  assert(N>=2 && N%2==0);
2190 
2191  if (N <= s_recursionLimit)
2192  s_pBot[N/4](R, A, B);
2193  else
2194  {
2195  const size_t N2 = N/2;
2196 
2197  RecursiveMultiply(R, T, A0, B0, N2);
2198  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2199  Add(R1, R1, T0, N2);
2200  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2201  Add(R1, R1, T0, N2);
2202  }
2203 }
2204 
2205 // R[N] --- upper half of A*B
2206 // T[2*N] - temporary work space
2207 // L[N] --- lower half of A*B
2208 // A[N] --- multiplier
2209 // B[N] --- multiplicant
2210 
2211 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2212 {
2213  assert(N>=2 && N%2==0);
2214 
2215  if (N <= s_recursionLimit)
2216  s_pTop[N/4](R, A, B, L[N-1]);
2217  else
2218  {
2219  const size_t N2 = N/2;
2220 
2221  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2222  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2223 
2224  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2225  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2226 
2227  RecursiveMultiply(T0, T2, R0, R1, N2);
2228  RecursiveMultiply(R0, T2, A1, B1, N2);
2229 
2230  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2231 
2232  int t, c3;
2233  int c2 = Subtract(T2, L+N2, L, N2);
2234 
2235  if (AN2 == BN2)
2236  {
2237  c2 -= Add(T2, T2, T0, N2);
2238  t = (Compare(T2, R0, N2) == -1);
2239  c3 = t - Subtract(T2, T2, T1, N2);
2240  }
2241  else
2242  {
2243  c2 += Subtract(T2, T2, T0, N2);
2244  t = (Compare(T2, R0, N2) == -1);
2245  c3 = t + Add(T2, T2, T1, N2);
2246  }
2247 
2248  c2 += t;
2249  if (c2 >= 0)
2250  c3 += Increment(T2, N2, c2);
2251  else
2252  c3 -= Decrement(T2, N2, -c2);
2253  c3 += Add(R0, T2, R1, N2);
2254 
2255  assert (c3 >= 0 && c3 <= 2);
2256  Increment(R1, N2, c3);
2257  }
2258 }
2259 
2260 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2261 {
2262  RecursiveMultiply(R, T, A, B, N);
2263 }
2264 
2265 inline void Square(word *R, word *T, const word *A, size_t N)
2266 {
2267  RecursiveSquare(R, T, A, N);
2268 }
2269 
2270 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2271 {
2272  RecursiveMultiplyBottom(R, T, A, B, N);
2273 }
2274 
2275 // R[NA+NB] - result = A*B
2276 // T[NA+NB] - temporary work space
2277 // A[NA] ---- multiplier
2278 // B[NB] ---- multiplicant
2279 
2280 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2281 {
2282  if (NA == NB)
2283  {
2284  if (A == B)
2285  Square(R, T, A, NA);
2286  else
2287  Multiply(R, T, A, B, NA);
2288 
2289  return;
2290  }
2291 
2292  if (NA > NB)
2293  {
2294  std::swap(A, B);
2295  std::swap(NA, NB);
2296  }
2297 
2298  assert(NB % NA == 0);
2299 
2300  if (NA==2 && !A[1])
2301  {
2302  switch (A[0])
2303  {
2304  case 0:
2305  SetWords(R, 0, NB+2);
2306  return;
2307  case 1:
2308  CopyWords(R, B, NB);
2309  R[NB] = R[NB+1] = 0;
2310  return;
2311  default:
2312  R[NB] = LinearMultiply(R, B, A[0], NB);
2313  R[NB+1] = 0;
2314  return;
2315  }
2316  }
2317 
2318  size_t i;
2319  if ((NB/NA)%2 == 0)
2320  {
2321  Multiply(R, T, A, B, NA);
2322  CopyWords(T+2*NA, R+NA, NA);
2323 
2324  for (i=2*NA; i<NB; i+=2*NA)
2325  Multiply(T+NA+i, T, A, B+i, NA);
2326  for (i=NA; i<NB; i+=2*NA)
2327  Multiply(R+i, T, A, B+i, NA);
2328  }
2329  else
2330  {
2331  for (i=0; i<NB; i+=2*NA)
2332  Multiply(R+i, T, A, B+i, NA);
2333  for (i=NA; i<NB; i+=2*NA)
2334  Multiply(T+NA+i, T, A, B+i, NA);
2335  }
2336 
2337  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2338  Increment(R+NB, NA);
2339 }
2340 
2341 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2342 // T[3*N/2] - temporary work space
2343 // A[N] ----- an odd number as input
2344 
2345 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2346 {
2347  if (N==2)
2348  {
2349  T[0] = AtomicInverseModPower2(A[0]);
2350  T[1] = 0;
2351  s_pBot[0](T+2, T, A);
2352  TwosComplement(T+2, 2);
2353  Increment(T+2, 2, 2);
2354  s_pBot[0](R, T, T+2);
2355  }
2356  else
2357  {
2358  const size_t N2 = N/2;
2359  RecursiveInverseModPower2(R0, T0, A0, N2);
2360  T0[0] = 1;
2361  SetWords(T0+1, 0, N2-1);
2362  MultiplyTop(R1, T1, T0, R0, A0, N2);
2363  MultiplyBottom(T0, T1, R0, A1, N2);
2364  Add(T0, R1, T0, N2);
2365  TwosComplement(T0, N2);
2366  MultiplyBottom(R1, T1, R0, T0, N2);
2367  }
2368 }
2369 
2370 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2371 // T[3*N] - temporary work space
2372 // X[2*N] - number to be reduced
2373 // M[N] --- modulus
2374 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2375 
2376 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2377 {
2378 #if 1
2379  MultiplyBottom(R, T, X, U, N);
2380  MultiplyTop(T, T+N, X, R, M, N);
2381  word borrow = Subtract(T, X+N, T, N);
2382  // defend against timing attack by doing this Add even when not needed
2383  word carry = Add(T+N, T, M, N);
2384  assert(carry | !borrow);
2385  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2386  CopyWords(R, T + ((0-borrow) & N), N);
2387 #elif 0
2388  const word u = 0-U[0];
2389  Declare2Words(p)
2390  for (size_t i=0; i<N; i++)
2391  {
2392  const word t = u * X[i];
2393  word c = 0;
2394  for (size_t j=0; j<N; j+=2)
2395  {
2396  MultiplyWords(p, t, M[j]);
2397  Acc2WordsBy1(p, X[i+j]);
2398  Acc2WordsBy1(p, c);
2399  X[i+j] = LowWord(p);
2400  c = HighWord(p);
2401  MultiplyWords(p, t, M[j+1]);
2402  Acc2WordsBy1(p, X[i+j+1]);
2403  Acc2WordsBy1(p, c);
2404  X[i+j+1] = LowWord(p);
2405  c = HighWord(p);
2406  }
2407 
2408  if (Increment(X+N+i, N-i, c))
2409  while (!Subtract(X+N, X+N, M, N)) {}
2410  }
2411 
2412  memcpy(R, X+N, N*WORD_SIZE);
2413 #else
2414  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2415  for (size_t i=0; i<N; i++)
2416  {
2417  __m64 t = _mm_cvtsi32_si64(X[i]);
2418  t = _mm_mul_su32(t, u);
2419  __m64 c = _mm_setzero_si64();
2420  for (size_t j=0; j<N; j+=2)
2421  {
2422  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2423  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2424  c = _mm_add_si64(c, p);
2425  X[i+j] = _mm_cvtsi64_si32(c);
2426  c = _mm_srli_si64(c, 32);
2427  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2428  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2429  c = _mm_add_si64(c, p);
2430  X[i+j+1] = _mm_cvtsi64_si32(c);
2431  c = _mm_srli_si64(c, 32);
2432  }
2433 
2434  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2435  while (!Subtract(X+N, X+N, M, N)) {}
2436  }
2437 
2438  memcpy(R, X+N, N*WORD_SIZE);
2439  _mm_empty();
2440 #endif
2441 }
2442 
2443 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2444 // T[2*N] - temporary work space
2445 // X[2*N] - number to be reduced
2446 // M[N] --- modulus
2447 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2448 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2449 
2450 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2451 {
2452  assert(N%2==0 && N>=4);
2453 
2454 #define M0 M
2455 #define M1 (M+N2)
2456 #define V0 V
2457 #define V1 (V+N2)
2458 
2459 #define X0 X
2460 #define X1 (X+N2)
2461 #define X2 (X+N)
2462 #define X3 (X+N+N2)
2463 
2464  const size_t N2 = N/2;
2465  Multiply(T0, T2, V0, X3, N2);
2466  int c2 = Add(T0, T0, X0, N);
2467  MultiplyBottom(T3, T2, T0, U, N2);
2468  MultiplyTop(T2, R, T0, T3, M0, N2);
2469  c2 -= Subtract(T2, T1, T2, N2);
2470  Multiply(T0, R, T3, M1, N2);
2471  c2 -= Subtract(T0, T2, T0, N2);
2472  int c3 = -(int)Subtract(T1, X2, T1, N2);
2473  Multiply(R0, T2, V1, X3, N2);
2474  c3 += Add(R, R, T, N);
2475 
2476  if (c2>0)
2477  c3 += Increment(R1, N2);
2478  else if (c2<0)
2479  c3 -= Decrement(R1, N2, -c2);
2480 
2481  assert(c3>=-1 && c3<=1);
2482  if (c3>0)
2483  Subtract(R, R, M, N);
2484  else if (c3<0)
2485  Add(R, R, M, N);
2486 
2487 #undef M0
2488 #undef M1
2489 #undef V0
2490 #undef V1
2491 
2492 #undef X0
2493 #undef X1
2494 #undef X2
2495 #undef X3
2496 }
2497 
2498 #undef A0
2499 #undef A1
2500 #undef B0
2501 #undef B1
2502 
2503 #undef T0
2504 #undef T1
2505 #undef T2
2506 #undef T3
2507 
2508 #undef R0
2509 #undef R1
2510 #undef R2
2511 #undef R3
2512 
2513 /*
2514 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2515 static word SubatomicDivide(word *A, word B0, word B1)
2516 {
2517  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2518  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2519 
2520  // estimate the quotient: do a 2 word by 1 word divide
2521  word Q;
2522  if (B1+1 == 0)
2523  Q = A[2];
2524  else
2525  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2526 
2527  // now subtract Q*B from A
2528  DWord p = DWord::Multiply(B0, Q);
2529  DWord u = (DWord) A[0] - p.GetLowHalf();
2530  A[0] = u.GetLowHalf();
2531  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2532  A[1] = u.GetLowHalf();
2533  A[2] += u.GetHighHalf();
2534 
2535  // Q <= actual quotient, so fix it
2536  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2537  {
2538  u = (DWord) A[0] - B0;
2539  A[0] = u.GetLowHalf();
2540  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2541  A[1] = u.GetLowHalf();
2542  A[2] += u.GetHighHalf();
2543  Q++;
2544  assert(Q); // shouldn't overflow
2545  }
2546 
2547  return Q;
2548 }
2549 
2550 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2551 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2552 {
2553  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2554  {
2555  Q[0] = A[2];
2556  Q[1] = A[3];
2557  }
2558  else
2559  {
2560  word T[4];
2561  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2562  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2563  Q[0] = SubatomicDivide(T, B[0], B[1]);
2564 
2565 #ifndef NDEBUG
2566  // multiply quotient and divisor and add remainder, make sure it equals dividend
2567  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2568  word P[4];
2569  LowLevel::Multiply2(P, Q, B);
2570  Add(P, P, T, 4);
2571  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2572 #endif
2573  }
2574 }
2575 */
2576 
2577 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2578 {
2579  word T[4];
2580  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2581  Q[0] = q.GetLowHalf();
2582  Q[1] = q.GetHighHalf();
2583 
2584 #ifndef NDEBUG
2585  if (B[0] || B[1])
2586  {
2587  // multiply quotient and divisor and add remainder, make sure it equals dividend
2588  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2589  word P[4];
2590  s_pMul[0](P, Q, B);
2591  Add(P, P, T, 4);
2592  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2593  }
2594 #endif
2595 }
2596 
2597 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2598 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2599 {
2600  assert(N && N%2==0);
2601 
2602  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2603 
2604  word borrow = Subtract(R, R, T, N+2);
2605  assert(!borrow && !R[N+1]);
2606  CRYPTOPP_UNUSED(borrow);
2607 
2608  while (R[N] || Compare(R, B, N) >= 0)
2609  {
2610  R[N] -= Subtract(R, R, B, N);
2611  Q[1] += (++Q[0]==0);
2612  assert(Q[0] || Q[1]); // no overflow
2613  }
2614 }
2615 
2616 // R[NB] -------- remainder = A%B
2617 // Q[NA-NB+2] --- quotient = A/B
2618 // T[NA+3*(NB+2)] - temp work space
2619 // A[NA] -------- dividend
2620 // B[NB] -------- divisor
2621 
2622 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2623 {
2624  assert(NA && NB && NA%2==0 && NB%2==0);
2625  assert(B[NB-1] || B[NB-2]);
2626  assert(NB <= NA);
2627 
2628  // set up temporary work space
2629  word *const TA=T;
2630  word *const TB=T+NA+2;
2631  word *const TP=T+NA+2+NB;
2632 
2633  // copy B into TB and normalize it so that TB has highest bit set to 1
2634  unsigned shiftWords = (B[NB-1]==0);
2635  TB[0] = TB[NB-1] = 0;
2636  CopyWords(TB+shiftWords, B, NB-shiftWords);
2637  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2638  assert(shiftBits < WORD_BITS);
2639  ShiftWordsLeftByBits(TB, NB, shiftBits);
2640 
2641  // copy A into TA and normalize it
2642  TA[0] = TA[NA] = TA[NA+1] = 0;
2643  CopyWords(TA+shiftWords, A, NA);
2644  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2645 
2646  if (TA[NA+1]==0 && TA[NA] <= 1)
2647  {
2648  Q[NA-NB+1] = Q[NA-NB] = 0;
2649  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2650  {
2651  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2652  ++Q[NA-NB];
2653  }
2654  }
2655  else
2656  {
2657  NA+=2;
2658  assert(Compare(TA+NA-NB, TB, NB) < 0);
2659  }
2660 
2661  word BT[2];
2662  BT[0] = TB[NB-2] + 1;
2663  BT[1] = TB[NB-1] + (BT[0]==0);
2664 
2665  // start reducing TA mod TB, 2 words at a time
2666  for (size_t i=NA-2; i>=NB; i-=2)
2667  {
2668  AtomicDivide(Q+i-NB, TA+i-2, BT);
2669  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2670  }
2671 
2672  // copy TA into R, and denormalize it
2673  CopyWords(R, TA+shiftWords, NB);
2674  ShiftWordsRightByBits(R, NB, shiftBits);
2675 }
2676 
2677 static inline size_t EvenWordCount(const word *X, size_t N)
2678 {
2679  while (N && X[N-2]==0 && X[N-1]==0)
2680  N-=2;
2681  return N;
2682 }
2683 
2684 // return k
2685 // R[N] --- result = A^(-1) * 2^k mod M
2686 // T[4*N] - temporary work space
2687 // A[NA] -- number to take inverse of
2688 // M[N] --- modulus
2689 
2690 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2691 {
2692  assert(NA<=N && N && N%2==0);
2693 
2694  word *b = T;
2695  word *c = T+N;
2696  word *f = T+2*N;
2697  word *g = T+3*N;
2698  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2699  unsigned int k=0;
2700  bool s=false;
2701 
2702  SetWords(T, 0, 3*N);
2703  b[0]=1;
2704  CopyWords(f, A, NA);
2705  CopyWords(g, M, N);
2706 
2707  while (1)
2708  {
2709  word t=f[0];
2710  while (!t)
2711  {
2712  if (EvenWordCount(f, fgLen)==0)
2713  {
2714  SetWords(R, 0, N);
2715  return 0;
2716  }
2717 
2718  ShiftWordsRightByWords(f, fgLen, 1);
2719  bcLen += 2 * (c[bcLen-1] != 0);
2720  assert(bcLen <= N);
2721  ShiftWordsLeftByWords(c, bcLen, 1);
2722  k+=WORD_BITS;
2723  t=f[0];
2724  }
2725 
2726  // t must be non-0; otherwise undefined.
2727  unsigned int i = TrailingZeros(t);
2728  t >>= i;
2729  k += i;
2730 
2731  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2732  {
2733  if (s)
2734  Subtract(R, M, b, N);
2735  else
2736  CopyWords(R, b, N);
2737  return k;
2738  }
2739 
2740  ShiftWordsRightByBits(f, fgLen, i);
2741  t = ShiftWordsLeftByBits(c, bcLen, i);
2742  c[bcLen] += t;
2743  bcLen += 2 * (t!=0);
2744  assert(bcLen <= N);
2745 
2746  bool swap = Compare(f, g, fgLen)==-1;
2747  ConditionalSwapPointers(swap, f, g);
2748  ConditionalSwapPointers(swap, b, c);
2749  s ^= swap;
2750 
2751  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2752 
2753  Subtract(f, f, g, fgLen);
2754  t = Add(b, b, c, bcLen);
2755  b[bcLen] += t;
2756  bcLen += 2*t;
2757  assert(bcLen <= N);
2758  }
2759 }
2760 
2761 // R[N] - result = A/(2^k) mod M
2762 // A[N] - input
2763 // M[N] - modulus
2764 
2765 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2766 {
2767  CopyWords(R, A, N);
2768 
2769  while (k--)
2770  {
2771  if (R[0]%2==0)
2772  ShiftWordsRightByBits(R, N, 1);
2773  else
2774  {
2775  word carry = Add(R, R, M, N);
2776  ShiftWordsRightByBits(R, N, 1);
2777  R[N-1] += carry<<(WORD_BITS-1);
2778  }
2779  }
2780 }
2781 
2782 // R[N] - result = A*(2^k) mod M
2783 // A[N] - input
2784 // M[N] - modulus
2785 
2786 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2787 {
2788  CopyWords(R, A, N);
2789 
2790  while (k--)
2791  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2792  Subtract(R, R, M, N);
2793 }
2794 
2795 // ******************************************************************
2796 
2797 InitializeInteger::InitializeInteger()
2798 {
2799  if (!g_pAssignIntToInteger)
2800  {
2801  SetFunctionPointers();
2802  g_pAssignIntToInteger = AssignIntToInteger;
2803  }
2804 }
2805 
2806 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2807 
2808 static inline size_t RoundupSize(size_t n)
2809 {
2810  if (n<=8)
2811  return RoundupSizeTable[n];
2812  else if (n<=16)
2813  return 16;
2814  else if (n<=32)
2815  return 32;
2816  else if (n<=64)
2817  return 64;
2818  else return size_t(1) << BitPrecision(n-1);
2819 }
2820 
2822  : reg(2), sign(POSITIVE)
2823 {
2824  reg[0] = reg[1] = 0;
2825 }
2826 
2828  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2829 {
2830  CopyWords(reg, t.reg, reg.size());
2831 }
2832 
2833 Integer::Integer(Sign s, lword value)
2834  : reg(2), sign(s)
2835 {
2836  reg[0] = word(value);
2837  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2838 }
2839 
2840 Integer::Integer(signed long value)
2841  : reg(2)
2842 {
2843  if (value >= 0)
2844  sign = POSITIVE;
2845  else
2846  {
2847  sign = NEGATIVE;
2848  value = -value;
2849  }
2850  reg[0] = word(value);
2851  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2852 }
2853 
2854 Integer::Integer(Sign s, word high, word low)
2855  : reg(2), sign(s)
2856 {
2857  reg[0] = low;
2858  reg[1] = high;
2859 }
2860 
2862 {
2863  if (ByteCount() > sizeof(long))
2864  return false;
2865 
2866  unsigned long value = (unsigned long)reg[0];
2867  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2868 
2869  if (sign==POSITIVE)
2870  return (signed long)value >= 0;
2871  else
2872  return -(signed long)value < 0;
2873 }
2874 
2875 signed long Integer::ConvertToLong() const
2876 {
2877  assert(IsConvertableToLong());
2878 
2879  unsigned long value = (unsigned long)reg[0];
2880  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2881  return sign==POSITIVE ? value : -(signed long)value;
2882 }
2883 
2884 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
2885 {
2886  Decode(encodedInteger, byteCount, s);
2887 }
2888 
2889 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
2890 {
2891  Decode(encodedInteger, byteCount, s);
2892 }
2893 
2895 {
2896  BERDecode(bt);
2897 }
2898 
2900 {
2901  Randomize(rng, bitcount);
2902 }
2903 
2904 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2905 {
2906  if (!Randomize(rng, min, max, rnType, equiv, mod))
2908 }
2909 
2911 {
2912  Integer r((word)0, BitsToWords(e+1));
2913  r.SetBit(e);
2914  return r;
2915 }
2916 
2917 template <long i>
2919 {
2920  Integer * operator()() const
2921  {
2922  return new Integer(i);
2923  }
2924 };
2925 
2927 {
2928  return Singleton<Integer>().Ref();
2929 }
2930 
2932 {
2933  return Singleton<Integer, NewInteger<1> >().Ref();
2934 }
2935 
2937 {
2938  return Singleton<Integer, NewInteger<2> >().Ref();
2939 }
2940 
2941 bool Integer::operator!() const
2942 {
2943  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
2944 }
2945 
2946 Integer& Integer::operator=(const Integer& t)
2947 {
2948  if (this != &t)
2949  {
2950  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
2951  reg.New(RoundupSize(t.WordCount()));
2952  CopyWords(reg, t.reg, reg.size());
2953  sign = t.sign;
2954  }
2955  return *this;
2956 }
2957 
2958 bool Integer::GetBit(size_t n) const
2959 {
2960  if (n/WORD_BITS >= reg.size())
2961  return 0;
2962  else
2963  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
2964 }
2965 
2966 void Integer::SetBit(size_t n, bool value)
2967 {
2968  if (value)
2969  {
2970  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
2971  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
2972  }
2973  else
2974  {
2975  if (n/WORD_BITS < reg.size())
2976  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
2977  }
2978 }
2979 
2980 byte Integer::GetByte(size_t n) const
2981 {
2982  if (n/WORD_SIZE >= reg.size())
2983  return 0;
2984  else
2985  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
2986 }
2987 
2988 void Integer::SetByte(size_t n, byte value)
2989 {
2990  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
2991  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
2992  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
2993 }
2994 
2995 lword Integer::GetBits(size_t i, size_t n) const
2996 {
2997  lword v = 0;
2998  assert(n <= sizeof(v)*8);
2999  for (unsigned int j=0; j<n; j++)
3000  v |= lword(GetBit(i+j)) << j;
3001  return v;
3002 }
3003 
3004 Integer Integer::operator-() const
3005 {
3006  Integer result(*this);
3007  result.Negate();
3008  return result;
3009 }
3010 
3011 Integer Integer::AbsoluteValue() const
3012 {
3013  Integer result(*this);
3014  result.sign = POSITIVE;
3015  return result;
3016 }
3017 
3019 {
3020  reg.swap(a.reg);
3021  std::swap(sign, a.sign);
3022 }
3023 
3024 Integer::Integer(word value, size_t length)
3025  : reg(RoundupSize(length)), sign(POSITIVE)
3026 {
3027  reg[0] = value;
3028  SetWords(reg+1, 0, reg.size()-1);
3029 }
3030 
3031 template <class T>
3032 static Integer StringToInteger(const T *str)
3033 {
3034  int radix;
3035  // GCC workaround
3036  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3037  unsigned int length;
3038  for (length = 0; str[length] != 0; length++) {}
3039 
3040  Integer v;
3041 
3042  if (length == 0)
3043  return v;
3044 
3045  switch (str[length-1])
3046  {
3047  case 'h':
3048  case 'H':
3049  radix=16;
3050  break;
3051  case 'o':
3052  case 'O':
3053  radix=8;
3054  break;
3055  case 'b':
3056  case 'B':
3057  radix=2;
3058  break;
3059  default:
3060  radix=10;
3061  }
3062 
3063  if (length > 2 && str[0] == '0' && str[1] == 'x')
3064  radix = 16;
3065 
3066  for (unsigned i=0; i<length; i++)
3067  {
3068  int digit;
3069 
3070  if (str[i] >= '0' && str[i] <= '9')
3071  digit = str[i] - '0';
3072  else if (str[i] >= 'A' && str[i] <= 'F')
3073  digit = str[i] - 'A' + 10;
3074  else if (str[i] >= 'a' && str[i] <= 'f')
3075  digit = str[i] - 'a' + 10;
3076  else
3077  digit = radix;
3078 
3079  if (digit < radix)
3080  {
3081  v *= radix;
3082  v += digit;
3083  }
3084  }
3085 
3086  if (str[0] == '-')
3087  v.Negate();
3088 
3089  return v;
3090 }
3091 
3092 Integer::Integer(const char *str)
3093  : reg(2), sign(POSITIVE)
3094 {
3095  *this = StringToInteger(str);
3096 }
3097 
3098 Integer::Integer(const wchar_t *str)
3099  : reg(2), sign(POSITIVE)
3100 {
3101  *this = StringToInteger(str);
3102 }
3103 
3104 unsigned int Integer::WordCount() const
3105 {
3106  return (unsigned int)CountWords(reg, reg.size());
3107 }
3108 
3109 unsigned int Integer::ByteCount() const
3110 {
3111  unsigned wordCount = WordCount();
3112  if (wordCount)
3113  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3114  else
3115  return 0;
3116 }
3117 
3118 unsigned int Integer::BitCount() const
3119 {
3120  unsigned wordCount = WordCount();
3121  if (wordCount)
3122  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3123  else
3124  return 0;
3125 }
3126 
3127 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3128 {
3129  StringStore store(input, inputLen);
3130  Decode(store, inputLen, s);
3131 }
3132 
3134 {
3135  assert(bt.MaxRetrievable() >= inputLen);
3136 
3137  byte b;
3138  bt.Peek(b);
3139  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3140 
3141  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3142  {
3143  bt.Skip(1);
3144  inputLen--;
3145  bt.Peek(b);
3146  }
3147 
3148  // The call to CleanNew is optimized away above -O0/-Og.
3149  const size_t size = RoundupSize(BytesToWords(inputLen));
3150  reg.CleanNew(size);
3151 
3152  assert(reg.SizeInBytes() >= inputLen);
3153  for (size_t i=inputLen; i > 0; i--)
3154  {
3155  bt.Get(b);
3156  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3157  }
3158 
3159  if (sign == NEGATIVE)
3160  {
3161  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3162  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3163  TwosComplement(reg, reg.size());
3164  }
3165 }
3166 
3167 size_t Integer::MinEncodedSize(Signedness signedness) const
3168 {
3169  unsigned int outputLen = STDMAX(1U, ByteCount());
3170  if (signedness == UNSIGNED)
3171  return outputLen;
3172  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3173  outputLen++;
3174  if (IsNegative() && *this < -Power2(outputLen*8-1))
3175  outputLen++;
3176  return outputLen;
3177 }
3178 
3179 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3180 {
3181  assert(output && outputLen);
3182  ArraySink sink(output, outputLen);
3183  Encode(sink, outputLen, signedness);
3184 }
3185 
3186 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3187 {
3188  if (signedness == UNSIGNED || NotNegative())
3189  {
3190  for (size_t i=outputLen; i > 0; i--)
3191  bt.Put(GetByte(i-1));
3192  }
3193  else
3194  {
3195  // take two's complement of *this
3196  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3197  temp.Encode(bt, outputLen, UNSIGNED);
3198  }
3199 }
3200 
3202 {
3203  DERGeneralEncoder enc(bt, INTEGER);
3205  enc.MessageEnd();
3206 }
3207 
3208 void Integer::BERDecode(const byte *input, size_t len)
3209 {
3210  StringStore store(input, len);
3211  BERDecode(store);
3212 }
3213 
3215 {
3216  BERGeneralDecoder dec(bt, INTEGER);
3217  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3218  BERDecodeError();
3219  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3220  dec.MessageEnd();
3221 }
3222 
3224 {
3225  DERGeneralEncoder enc(bt, OCTET_STRING);
3226  Encode(enc, length);
3227  enc.MessageEnd();
3228 }
3229 
3231 {
3232  BERGeneralDecoder dec(bt, OCTET_STRING);
3233  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3234  BERDecodeError();
3235  Decode(dec, length);
3236  dec.MessageEnd();
3237 }
3238 
3239 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3240 {
3241  ArraySink sink(output, len);
3242  return OpenPGPEncode(sink);
3243 }
3244 
3246 {
3247  word16 bitCount = word16(BitCount());
3248  bt.PutWord16(bitCount);
3249  size_t byteCount = BitsToBytes(bitCount);
3250  Encode(bt, byteCount);
3251  return 2 + byteCount;
3252 }
3253 
3254 void Integer::OpenPGPDecode(const byte *input, size_t len)
3255 {
3256  StringStore store(input, len);
3257  OpenPGPDecode(store);
3258 }
3259 
3261 {
3262  word16 bitCount;
3263  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3264  throw OpenPGPDecodeErr();
3265  Decode(bt, BitsToBytes(bitCount));
3266 }
3267 
3269 {
3270  const size_t nbytes = nbits/8 + 1;
3271  SecByteBlock buf(nbytes);
3272  rng.GenerateBlock(buf, nbytes);
3273  if (nbytes)
3274  buf[0] = (byte)Crop(buf[0], nbits % 8);
3275  Decode(buf, nbytes, UNSIGNED);
3276 }
3277 
3278 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3279 {
3280  if (min > max)
3281  throw InvalidArgument("Integer: Min must be no greater than Max");
3282 
3283  Integer range = max - min;
3284  const unsigned int nbits = range.BitCount();
3285 
3286  do
3287  {
3288  Randomize(rng, nbits);
3289  }
3290  while (*this > range);
3291 
3292  *this += min;
3293 }
3294 
3295 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3296 {
3297  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3298 }
3299 
3301 {
3302 public:
3303  KDF2_RNG(const byte *seed, size_t seedSize)
3304  : m_counter(0), m_counterAndSeed(seedSize + 4)
3305  {
3306  memcpy(m_counterAndSeed + 4, seed, seedSize);
3307  }
3308 
3309  void GenerateBlock(byte *output, size_t size)
3310  {
3311  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3312  ++m_counter;
3313  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3314  }
3315 
3316 private:
3317  word32 m_counter;
3318  SecByteBlock m_counterAndSeed;
3319 };
3320 
3321 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3322 {
3323  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3324  Integer max;
3325  if (!params.GetValue("Max", max))
3326  {
3327  int bitLength;
3328  if (params.GetIntValue("BitLength", bitLength))
3329  max = Integer::Power2(bitLength);
3330  else
3331  throw InvalidArgument("Integer: missing Max argument");
3332  }
3333  if (min > max)
3334  throw InvalidArgument("Integer: Min must be no greater than Max");
3335 
3336  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3337  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3338 
3339  if (equiv.IsNegative() || equiv >= mod)
3340  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3341 
3342  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3343 
3344  member_ptr<KDF2_RNG> kdf2Rng;
3346  if (params.GetValue(Name::Seed(), seed))
3347  {
3348  ByteQueue bq;
3349  DERSequenceEncoder seq(bq);
3350  min.DEREncode(seq);
3351  max.DEREncode(seq);
3352  equiv.DEREncode(seq);
3353  mod.DEREncode(seq);
3354  DEREncodeUnsigned(seq, rnType);
3355  DEREncodeOctetString(seq, seed.begin(), seed.size());
3356  seq.MessageEnd();
3357 
3358  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3359  bq.Get(finalSeed, finalSeed.size());
3360  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3361  }
3362  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3363 
3364  switch (rnType)
3365  {
3366  case ANY:
3367  if (mod == One())
3368  Randomize(rng, min, max);
3369  else
3370  {
3371  Integer min1 = min + (equiv-min)%mod;
3372  if (max < min1)
3373  return false;
3374  Randomize(rng, Zero(), (max - min1) / mod);
3375  *this *= mod;
3376  *this += min1;
3377  }
3378  return true;
3379 
3380  case PRIME:
3381  {
3382  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3383 
3384  int i;
3385  i = 0;
3386  while (1)
3387  {
3388  if (++i==16)
3389  {
3390  // check if there are any suitable primes in [min, max]
3391  Integer first = min;
3392  if (FirstPrime(first, max, equiv, mod, pSelector))
3393  {
3394  // if there is only one suitable prime, we're done
3395  *this = first;
3396  if (!FirstPrime(first, max, equiv, mod, pSelector))
3397  return true;
3398  }
3399  else
3400  return false;
3401  }
3402 
3403  Randomize(rng, min, max);
3404  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3405  return true;
3406  }
3407  }
3408 
3409  default:
3410  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3411  }
3412 }
3413 
3414 std::istream& operator>>(std::istream& in, Integer &a)
3415 {
3416  char c;
3417  unsigned int length = 0;
3418  SecBlock<char> str(length + 16);
3419 
3420  std::ws(in);
3421 
3422  do
3423  {
3424  in.read(&c, 1);
3425  str[length++] = c;
3426  if (length >= str.size())
3427  str.Grow(length + 16);
3428  }
3429  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3430 
3431  if (in.gcount())
3432  in.putback(c);
3433  str[length-1] = '\0';
3434  a = Integer(str);
3435 
3436  return in;
3437 }
3438 
3439 std::ostream& operator<<(std::ostream& out, const Integer &a)
3440 {
3441  // Get relevant conversion specifications from ostream.
3442  const long f = out.flags() & std::ios::basefield; // Get base digits.
3443  int base, block;
3444  char suffix;
3445  switch(f)
3446  {
3447  case std::ios::oct :
3448  base = 8;
3449  block = 8;
3450  suffix = 'o';
3451  break;
3452  case std::ios::hex :
3453  base = 16;
3454  block = 4;
3455  suffix = 'h';
3456  break;
3457  default :
3458  base = 10;
3459  block = 3;
3460  suffix = '.';
3461  }
3462 
3463  Integer temp1=a, temp2;
3464 
3465  if (a.IsNegative())
3466  {
3467  out << '-';
3468  temp1.Negate();
3469  }
3470 
3471  if (!a)
3472  out << '0';
3473 
3474  static const char upper[]="0123456789ABCDEF";
3475  static const char lower[]="0123456789abcdef";
3476 
3477  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3478  unsigned int i=0;
3479  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3480 
3481  while (!!temp1)
3482  {
3483  word digit;
3484  Integer::Divide(digit, temp2, temp1, base);
3485  s[i++]=vec[digit];
3486  temp1.swap(temp2);
3487  }
3488 
3489  while (i--)
3490  {
3491  out << s[i];
3492 // if (i && !(i%block))
3493 // out << ",";
3494  }
3495 
3496  return out << suffix;
3497 }
3498 
3499 Integer& Integer::operator++()
3500 {
3501  if (NotNegative())
3502  {
3503  if (Increment(reg, reg.size()))
3504  {
3505  reg.CleanGrow(2*reg.size());
3506  reg[reg.size()/2]=1;
3507  }
3508  }
3509  else
3510  {
3511  word borrow = Decrement(reg, reg.size());
3512  assert(!borrow);
3513  CRYPTOPP_UNUSED(borrow);
3514 
3515  if (WordCount()==0)
3516  *this = Zero();
3517  }
3518  return *this;
3519 }
3520 
3521 Integer& Integer::operator--()
3522 {
3523  if (IsNegative())
3524  {
3525  if (Increment(reg, reg.size()))
3526  {
3527  reg.CleanGrow(2*reg.size());
3528  reg[reg.size()/2]=1;
3529  }
3530  }
3531  else
3532  {
3533  if (Decrement(reg, reg.size()))
3534  *this = -One();
3535  }
3536  return *this;
3537 }
3538 
3539 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3540 {
3541  int carry;
3542  if (a.reg.size() == b.reg.size())
3543  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3544  else if (a.reg.size() > b.reg.size())
3545  {
3546  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3547  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3548  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3549  }
3550  else
3551  {
3552  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3553  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3554  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3555  }
3556 
3557  if (carry)
3558  {
3559  sum.reg.CleanGrow(2*sum.reg.size());
3560  sum.reg[sum.reg.size()/2] = 1;
3561  }
3562  sum.sign = Integer::POSITIVE;
3563 }
3564 
3565 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3566 {
3567  unsigned aSize = a.WordCount();
3568  aSize += aSize%2;
3569  unsigned bSize = b.WordCount();
3570  bSize += bSize%2;
3571 
3572  if (aSize == bSize)
3573  {
3574  if (Compare(a.reg, b.reg, aSize) >= 0)
3575  {
3576  Subtract(diff.reg, a.reg, b.reg, aSize);
3577  diff.sign = Integer::POSITIVE;
3578  }
3579  else
3580  {
3581  Subtract(diff.reg, b.reg, a.reg, aSize);
3582  diff.sign = Integer::NEGATIVE;
3583  }
3584  }
3585  else if (aSize > bSize)
3586  {
3587  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3588  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3589  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3590  assert(!borrow);
3591  diff.sign = Integer::POSITIVE;
3592  }
3593  else
3594  {
3595  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3596  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3597  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3598  assert(!borrow);
3599  diff.sign = Integer::NEGATIVE;
3600  }
3601 }
3602 
3603 // MSVC .NET 2003 workaround
3604 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3605 {
3606  return a < b ? b : a;
3607 }
3608 
3609 Integer Integer::Plus(const Integer& b) const
3610 {
3611  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3612  if (NotNegative())
3613  {
3614  if (b.NotNegative())
3615  PositiveAdd(sum, *this, b);
3616  else
3617  PositiveSubtract(sum, *this, b);
3618  }
3619  else
3620  {
3621  if (b.NotNegative())
3622  PositiveSubtract(sum, b, *this);
3623  else
3624  {
3625  PositiveAdd(sum, *this, b);
3626  sum.sign = Integer::NEGATIVE;
3627  }
3628  }
3629  return sum;
3630 }
3631 
3632 Integer& Integer::operator+=(const Integer& t)
3633 {
3634  reg.CleanGrow(t.reg.size());
3635  if (NotNegative())
3636  {
3637  if (t.NotNegative())
3638  PositiveAdd(*this, *this, t);
3639  else
3640  PositiveSubtract(*this, *this, t);
3641  }
3642  else
3643  {
3644  if (t.NotNegative())
3645  PositiveSubtract(*this, t, *this);
3646  else
3647  {
3648  PositiveAdd(*this, *this, t);
3649  sign = Integer::NEGATIVE;
3650  }
3651  }
3652  return *this;
3653 }
3654 
3655 Integer Integer::Minus(const Integer& b) const
3656 {
3657  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3658  if (NotNegative())
3659  {
3660  if (b.NotNegative())
3661  PositiveSubtract(diff, *this, b);
3662  else
3663  PositiveAdd(diff, *this, b);
3664  }
3665  else
3666  {
3667  if (b.NotNegative())
3668  {
3669  PositiveAdd(diff, *this, b);
3670  diff.sign = Integer::NEGATIVE;
3671  }
3672  else
3673  PositiveSubtract(diff, b, *this);
3674  }
3675  return diff;
3676 }
3677 
3678 Integer& Integer::operator-=(const Integer& t)
3679 {
3680  reg.CleanGrow(t.reg.size());
3681  if (NotNegative())
3682  {
3683  if (t.NotNegative())
3684  PositiveSubtract(*this, *this, t);
3685  else
3686  PositiveAdd(*this, *this, t);
3687  }
3688  else
3689  {
3690  if (t.NotNegative())
3691  {
3692  PositiveAdd(*this, *this, t);
3693  sign = Integer::NEGATIVE;
3694  }
3695  else
3696  PositiveSubtract(*this, t, *this);
3697  }
3698  return *this;
3699 }
3700 
3701 Integer& Integer::operator<<=(size_t n)
3702 {
3703  const size_t wordCount = WordCount();
3704  const size_t shiftWords = n / WORD_BITS;
3705  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3706 
3707  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3708  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3709  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3710  return *this;
3711 }
3712 
3713 Integer& Integer::operator>>=(size_t n)
3714 {
3715  const size_t wordCount = WordCount();
3716  const size_t shiftWords = n / WORD_BITS;
3717  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3718 
3719  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3720  if (wordCount > shiftWords)
3721  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3722  if (IsNegative() && WordCount()==0) // avoid -0
3723  *this = Zero();
3724  return *this;
3725 }
3726 
3727 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3728 {
3729  size_t aSize = RoundupSize(a.WordCount());
3730  size_t bSize = RoundupSize(b.WordCount());
3731 
3732  product.reg.CleanNew(RoundupSize(aSize+bSize));
3733  product.sign = Integer::POSITIVE;
3734 
3735  IntegerSecBlock workspace(aSize + bSize);
3736  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3737 }
3738 
3739 void Multiply(Integer &product, const Integer &a, const Integer &b)
3740 {
3741  PositiveMultiply(product, a, b);
3742 
3743  if (a.NotNegative() != b.NotNegative())
3744  product.Negate();
3745 }
3746 
3748 {
3749  Integer product;
3750  Multiply(product, *this, b);
3751  return product;
3752 }
3753 
3754 /*
3755 void PositiveDivide(Integer &remainder, Integer &quotient,
3756  const Integer &dividend, const Integer &divisor)
3757 {
3758  remainder.reg.CleanNew(divisor.reg.size());
3759  remainder.sign = Integer::POSITIVE;
3760  quotient.reg.New(0);
3761  quotient.sign = Integer::POSITIVE;
3762  unsigned i=dividend.BitCount();
3763  while (i--)
3764  {
3765  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3766  remainder.reg[0] |= dividend[i];
3767  if (overflow || remainder >= divisor)
3768  {
3769  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3770  quotient.SetBit(i);
3771  }
3772  }
3773 }
3774 */
3775 
3776 void PositiveDivide(Integer &remainder, Integer &quotient,
3777  const Integer &a, const Integer &b)
3778 {
3779  unsigned aSize = a.WordCount();
3780  unsigned bSize = b.WordCount();
3781 
3782  if (!bSize)
3783  throw Integer::DivideByZero();
3784 
3785  if (aSize < bSize)
3786  {
3787  remainder = a;
3788  remainder.sign = Integer::POSITIVE;
3789  quotient = Integer::Zero();
3790  return;
3791  }
3792 
3793  aSize += aSize%2; // round up to next even number
3794  bSize += bSize%2;
3795 
3796  remainder.reg.CleanNew(RoundupSize(bSize));
3797  remainder.sign = Integer::POSITIVE;
3798  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3799  quotient.sign = Integer::POSITIVE;
3800 
3801  IntegerSecBlock T(aSize+3*(bSize+2));
3802  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3803 }
3804 
3805 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3806 {
3807  PositiveDivide(remainder, quotient, dividend, divisor);
3808 
3809  if (dividend.IsNegative())
3810  {
3811  quotient.Negate();
3812  if (remainder.NotZero())
3813  {
3814  --quotient;
3815  remainder = divisor.AbsoluteValue() - remainder;
3816  }
3817  }
3818 
3819  if (divisor.IsNegative())
3820  quotient.Negate();
3821 }
3822 
3823 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3824 {
3825  q = a;
3826  q >>= n;
3827 
3828  const size_t wordCount = BitsToWords(n);
3829  if (wordCount <= a.WordCount())
3830  {
3831  r.reg.resize(RoundupSize(wordCount));
3832  CopyWords(r.reg, a.reg, wordCount);
3833  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
3834  if (n % WORD_BITS != 0)
3835  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
3836  }
3837  else
3838  {
3839  r.reg.resize(RoundupSize(a.WordCount()));
3840  CopyWords(r.reg, a.reg, r.reg.size());
3841  }
3842  r.sign = POSITIVE;
3843 
3844  if (a.IsNegative() && r.NotZero())
3845  {
3846  --q;
3847  r = Power2(n) - r;
3848  }
3849 }
3850 
3851 Integer Integer::DividedBy(const Integer &b) const
3852 {
3853  Integer remainder, quotient;
3854  Integer::Divide(remainder, quotient, *this, b);
3855  return quotient;
3856 }
3857 
3859 {
3860  Integer remainder, quotient;
3861  Integer::Divide(remainder, quotient, *this, b);
3862  return remainder;
3863 }
3864 
3865 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
3866 {
3867  if (!divisor)
3868  throw Integer::DivideByZero();
3869 
3870  assert(divisor);
3871 
3872  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
3873  {
3874  quotient = dividend >> (BitPrecision(divisor)-1);
3875  remainder = dividend.reg[0] & (divisor-1);
3876  return;
3877  }
3878 
3879  unsigned int i = dividend.WordCount();
3880  quotient.reg.CleanNew(RoundupSize(i));
3881  remainder = 0;
3882  while (i--)
3883  {
3884  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
3885  remainder = DWord(dividend.reg[i], remainder) % divisor;
3886  }
3887 
3888  if (dividend.NotNegative())
3889  quotient.sign = POSITIVE;
3890  else
3891  {
3892  quotient.sign = NEGATIVE;
3893  if (remainder)
3894  {
3895  --quotient;
3896  remainder = divisor - remainder;
3897  }
3898  }
3899 }
3900 
3901 Integer Integer::DividedBy(word b) const
3902 {
3903  word remainder;
3904  Integer quotient;
3905  Integer::Divide(remainder, quotient, *this, b);
3906  return quotient;
3907 }
3908 
3909 word Integer::Modulo(word divisor) const
3910 {
3911  if (!divisor)
3912  throw Integer::DivideByZero();
3913 
3914  assert(divisor);
3915 
3916  word remainder;
3917 
3918  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
3919  remainder = reg[0] & (divisor-1);
3920  else
3921  {
3922  unsigned int i = WordCount();
3923 
3924  if (divisor <= 5)
3925  {
3926  DWord sum(0, 0);
3927  while (i--)
3928  sum += reg[i];
3929  remainder = sum % divisor;
3930  }
3931  else
3932  {
3933  remainder = 0;
3934  while (i--)
3935  remainder = DWord(reg[i], remainder) % divisor;
3936  }
3937  }
3938 
3939  if (IsNegative() && remainder)
3940  remainder = divisor - remainder;
3941 
3942  return remainder;
3943 }
3944 
3946 {
3947  if (!!(*this)) // don't flip sign if *this==0
3948  sign = Sign(1-sign);
3949 }
3950 
3951 int Integer::PositiveCompare(const Integer& t) const
3952 {
3953  unsigned size = WordCount(), tSize = t.WordCount();
3954 
3955  if (size == tSize)
3956  return CryptoPP::Compare(reg, t.reg, size);
3957  else
3958  return size > tSize ? 1 : -1;
3959 }
3960 
3961 int Integer::Compare(const Integer& t) const
3962 {
3963  if (NotNegative())
3964  {
3965  if (t.NotNegative())
3966  return PositiveCompare(t);
3967  else
3968  return 1;
3969  }
3970  else
3971  {
3972  if (t.NotNegative())
3973  return -1;
3974  else
3975  return -PositiveCompare(t);
3976  }
3977 }
3978 
3980 {
3981  if (!IsPositive())
3982  return Zero();
3983 
3984  // overestimate square root
3985  Integer x, y = Power2((BitCount()+1)/2);
3986  assert(y*y >= *this);
3987 
3988  do
3989  {
3990  x = y;
3991  y = (x + *this/x) >> 1;
3992  } while (y<x);
3993 
3994  return x;
3995 }
3996 
3997 bool Integer::IsSquare() const
3998 {
3999  Integer r = SquareRoot();
4000  return *this == r.Squared();
4001 }
4002 
4003 bool Integer::IsUnit() const
4004 {
4005  return (WordCount() == 1) && (reg[0] == 1);
4006 }
4007 
4009 {
4010  return IsUnit() ? *this : Zero();
4011 }
4012 
4013 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4014 {
4015  return x*y%m;
4016 }
4017 
4018 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4019 {
4020  ModularArithmetic mr(m);
4021  return mr.Exponentiate(x, e);
4022 }
4023 
4025 {
4026  return EuclideanDomainOf<Integer>().Gcd(a, b);
4027 }
4028 
4030 {
4031  assert(m.NotNegative());
4032 
4033  if (IsNegative())
4034  return Modulo(m).InverseMod(m);
4035 
4036  if (m.IsEven())
4037  {
4038  if (!m || IsEven())
4039  return Zero(); // no inverse
4040  if (*this == One())
4041  return One();
4042 
4043  Integer u = m.Modulo(*this).InverseMod(*this);
4044  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4045  }
4046 
4047  SecBlock<word> T(m.reg.size() * 4);
4048  Integer r((word)0, m.reg.size());
4049  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4050  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4051  return r;
4052 }
4053 
4054 word Integer::InverseMod(word mod) const
4055 {
4056  word g0 = mod, g1 = *this % mod;
4057  word v0 = 0, v1 = 1;
4058  word y;
4059 
4060  while (g1)
4061  {
4062  if (g1 == 1)
4063  return v1;
4064  y = g0 / g1;
4065  g0 = g0 % g1;
4066  v0 += y * v1;
4067 
4068  if (!g0)
4069  break;
4070  if (g0 == 1)
4071  return mod-v0;
4072  y = g1 / g0;
4073  g1 = g1 % g0;
4074  v1 += y * v0;
4075  }
4076  return 0;
4077 }
4078 
4079 // ********************************************************
4080 
4081 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
4082 {
4083  BERSequenceDecoder seq(bt);
4084  OID oid(seq);
4085  if (oid != ASN1::prime_field())
4086  BERDecodeError();
4087  m_modulus.BERDecode(seq);
4088  seq.MessageEnd();
4089  m_result.reg.resize(m_modulus.reg.size());
4090 }
4091 
4092 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
4093 {
4094  DERSequenceEncoder seq(bt);
4095  ASN1::prime_field().DEREncode(seq);
4096  m_modulus.DEREncode(seq);
4097  seq.MessageEnd();
4098 }
4099 
4100 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
4101 {
4102  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4103 }
4104 
4105 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
4106 {
4107  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4108 }
4109 
4110 const Integer& ModularArithmetic::Half(const Integer &a) const
4111 {
4112  if (a.reg.size()==m_modulus.reg.size())
4113  {
4114  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4115  return m_result;
4116  }
4117  else
4118  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4119 }
4120 
4121 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4122 {
4123  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4124  {
4125  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4126  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4127  {
4128  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4129  }
4130  return m_result;
4131  }
4132  else
4133  {
4134  m_result1 = a+b;
4135  if (m_result1 >= m_modulus)
4136  m_result1 -= m_modulus;
4137  return m_result1;
4138  }
4139 }
4140 
4141 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
4142 {
4143  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4144  {
4145  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4146  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4147  {
4148  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4149  }
4150  }
4151  else
4152  {
4153  a+=b;
4154  if (a>=m_modulus)
4155  a-=m_modulus;
4156  }
4157 
4158  return a;
4159 }
4160 
4161 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4162 {
4163  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4164  {
4165  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4166  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4167  return m_result;
4168  }
4169  else
4170  {
4171  m_result1 = a-b;
4172  if (m_result1.IsNegative())
4173  m_result1 += m_modulus;
4174  return m_result1;
4175  }
4176 }
4177 
4178 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
4179 {
4180  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4181  {
4182  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4183  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4184  }
4185  else
4186  {
4187  a-=b;
4188  if (a.IsNegative())
4189  a+=m_modulus;
4190  }
4191 
4192  return a;
4193 }
4194 
4195 const Integer& ModularArithmetic::Inverse(const Integer &a) const
4196 {
4197  if (!a)
4198  return a;
4199 
4200  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4201  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4202  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4203 
4204  return m_result;
4205 }
4206 
4207 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4208 {
4209  if (m_modulus.IsOdd())
4210  {
4211  MontgomeryRepresentation dr(m_modulus);
4212  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4213  }
4214  else
4215  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4216 }
4217 
4218 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4219 {
4220  if (m_modulus.IsOdd())
4221  {
4222  MontgomeryRepresentation dr(m_modulus);
4223  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4224  for (unsigned int i=0; i<exponentsCount; i++)
4225  results[i] = dr.ConvertOut(results[i]);
4226  }
4227  else
4228  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4229 }
4230 
4231 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
4232  : ModularArithmetic(m),
4233  m_u((word)0, m_modulus.reg.size()),
4234  m_workspace(5*m_modulus.reg.size())
4235 {
4236  if (!m_modulus.IsOdd())
4237  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4238 
4239  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4240 }
4241 
4242 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
4243 {
4244  word *const T = m_workspace.begin();
4245  word *const R = m_result.reg.begin();
4246  const size_t N = m_modulus.reg.size();
4247  assert(a.reg.size()<=N && b.reg.size()<=N);
4248 
4249  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4250  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4251  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4252  return m_result;
4253 }
4254 
4255 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
4256 {
4257  word *const T = m_workspace.begin();
4258  word *const R = m_result.reg.begin();
4259  const size_t N = m_modulus.reg.size();
4260  assert(a.reg.size()<=N);
4261 
4262  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4263  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4264  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4265  return m_result;
4266 }
4267 
4268 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
4269 {
4270  word *const T = m_workspace.begin();
4271  word *const R = m_result.reg.begin();
4272  const size_t N = m_modulus.reg.size();
4273  assert(a.reg.size()<=N);
4274 
4275  CopyWords(T, a.reg, a.reg.size());
4276  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4277  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4278  return m_result;
4279 }
4280 
4281 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
4282 {
4283 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4284  word *const T = m_workspace.begin();
4285  word *const R = m_result.reg.begin();
4286  const size_t N = m_modulus.reg.size();
4287  assert(a.reg.size()<=N);
4288 
4289  CopyWords(T, a.reg, a.reg.size());
4290  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4291  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4292  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4293 
4294 // cout << "k=" << k << " N*32=" << 32*N << endl;
4295 
4296  if (k>N*WORD_BITS)
4297  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4298  else
4299  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4300 
4301  return m_result;
4302 }
4303 
4304 // Specialization declared in misc.h to allow us to print integers
4305 // with additional control options, like arbirary bases and uppercase.
4306 template <> CRYPTOPP_DLL
4307 std::string IntToString<Integer>(Integer value, unsigned int base)
4308 {
4309  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4310  static const unsigned int BIT_32 = (1U << 31);
4311  const bool UPPER = !!(base & BIT_32);
4312  static const unsigned int BIT_31 = (1U << 30);
4313  const bool BASE = !!(base & BIT_31);
4314 
4315  const char CH = UPPER ? 'A' : 'a';
4316  base &= ~(BIT_32|BIT_31);
4317  assert(base >= 2 && base <= 32);
4318 
4319  if (value == 0)
4320  return "0";
4321 
4322  bool negative = false, zero = false;
4323  if (value.IsNegative())
4324  {
4325  negative = true;
4326  value.Negate();
4327  }
4328 
4329  if (!value)
4330  zero = true;
4331 
4332  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4333  Integer temp;
4334 
4335  unsigned int i=0;
4336  while (!!value)
4337  {
4338  word digit;
4339  Integer::Divide(digit, temp, value, word(base));
4340  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4341  value.swap(temp);
4342  }
4343 
4344  std::string result;
4345  result.reserve(i+2);
4346 
4347  if (negative)
4348  result += '-';
4349 
4350  if (zero)
4351  result += '0';
4352 
4353  while (i--)
4354  result += s[i];
4355 
4356  if (BASE)
4357  {
4358  if (base == 10)
4359  result += '.';
4360  else if (base == 16)
4361  result += 'h';
4362  else if (base == 8)
4363  result += 'o';
4364  else if (base == 2)
4365  result += 'b';
4366  }
4367 
4368  return result;
4369 }
4370 
4371 // Specialization declared in misc.h to avoid Coverity findings.
4372 template <> CRYPTOPP_DLL
4373 std::string IntToString<unsigned long long>(unsigned long long value, unsigned int base)
4374 {
4375  // Hack... set the high bit for uppercase.
4376  static const unsigned int HIGH_BIT = (1U << 31);
4377  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4378  base &= ~HIGH_BIT;
4379 
4380  assert(base >= 2);
4381  if (value == 0)
4382  return "0";
4383 
4384  std::string result;
4385  while (value > 0)
4386  {
4387  unsigned long long digit = value % base;
4388  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4389  value /= base;
4390  }
4391  return result;
4392 }
4393 
4394 NAMESPACE_END
4395 
4396 #if WORKAROUND_ARMEL_BUG
4397 # pragma GCC pop_options
4398 #endif
4399 
4400 #if WORKAROUND_ARM64_BUG
4401 # pragma GCC pop_options
4402 #endif
4403 
4404 #endif
used to pass byte array input as part of a NameValuePairs object
Definition: algparam.h:30
An invalid argument was detected.
Definition: cryptlib.h:166
Classes for working with NameValuePairs.
a number which is probabilistically prime
Definition: integer.h:77
Utility functions for the Crypto++ library.
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:233
bool GetBit(size_t i) const
return the i-th bit, i=0 being the least significant bit
Definition: integer.cpp:2958
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:640
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3179
an unsigned value
Definition: integer.h:67
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:537
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:332
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode Unsigned.
Definition: asn.h:316
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:329
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:650
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4024
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:693
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:670
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:543
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:674
Secure memory block with allocator and cleanup.
Definition: secblock.h:422
unsigned int WordCount() const
number of significant words = ceiling(ByteCount()/sizeof(word))
Definition: integer.cpp:3104
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3254
Signedness
Used when importing and exporting integers.
Definition: integer.h:65
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:509
Object identifiers for algorthms and schemes.
Square
Definition: square.h:21
Classes for automatic resource management.
byte GetByte(size_t i) const
return the i-th byte
Definition: integer.cpp:2980
Library configuration file.
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3823
Ring of congruence classes modulo n.
Definition: modarith.h:27
Interface for random number generators.
Definition: cryptlib.h:1085
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:660
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3268
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3167
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:2988
the value is negative
Definition: integer.h:59
SecByteBlock is a SecBlock<byte> typedef.
Definition: secblock.h:719
BER Sequence Decoder.
Definition: asn.h:197
Interface for buffered transformations.
Definition: cryptlib.h:1247
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3223
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:27
bool IsConvertableToLong() const
return true if *this can be represented as a signed long
Definition: integer.cpp:2861
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4008
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:2931
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:355
Abstract Ring.
Definition: algebra.h:52
Sign
Used internally to represent the integer.
Definition: integer.h:55
Pointer that overloads operator→
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:3997
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3239
Classes and functions for secure memory allocations.
unsigned int BitCount() const
number of significant bits = floor(log2(abs(*this))) + 1
Definition: integer.cpp:3118
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4003
Copy input to a memory buffer.
Definition: filters.h:775
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:3979
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:319
a number with no special properties
Definition: integer.h:75
Integer Squared() const
Definition: integer.h:452
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1268
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:487
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3018
Integer()
Creates the zero integer.
Definition: integer.cpp:2821
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:590
signed long ConvertToLong() const
return equivalent signed long if possible, otherwise undefined
Definition: integer.cpp:2875
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:252
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:3945
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:638
Integer Times(const Integer &b) const
Definition: integer.cpp:3747
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3230
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:946
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2910
Multiple precision integer with arithmetic operations.
Definition: integer.h:31
a signed value
Definition: integer.h:69
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:2936
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:556
RandomNumberType
Properties of a random integer.
Definition: integer.h:73
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:386
string-based implementation of Store interface
Definition: filters.h:808
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3805
Byte Queue.
Definition: queue.h:20
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:2966
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:753
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation&#39;s attachment interface in cryptlib.h.
Classes and functions for number theoretic operations.
std::string IntToString< unsigned long long >(unsigned long long value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4373
size_t DEREncodeOctetString(BufferedTransformation &out, const byte *str, size_t strLen)
ASN Strings.
Definition: asn.cpp:104
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3201
Exception thrown when division by 0 is encountered.
Definition: integer.h:37
DER Sequence Encoder.
Definition: asn.h:207
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:869
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:45
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:137
DER General Encoder.
Definition: asn.h:174
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3309
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4029
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3127
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:717
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:499
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:2926
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:396
EuclideanDomainOf.
Definition: algebra.h:165
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:655
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3208
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4307
lword GetBits(size_t i, size_t n) const
return n lowest bits of *this >> i
Definition: integer.cpp:2995
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:518
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:523
BER General Decoder.
Definition: asn.h:137
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:3961
Object Identifier.
Definition: asn.h:91
Integer Modulo(const Integer &b) const
Definition: integer.cpp:3858
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:565
unsigned int ByteCount() const
number of significant bytes = ceiling(BitCount()/8)
Definition: integer.cpp:3109
the value is positive or 0
Definition: integer.h:57
Interface for retrieving values given their names.
Definition: cryptlib.h:261