Crypto++  5.6.3
Free C++ class library of cryptographic schemes
rdrand.S
1 ;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
2 ;; Copyright assigned to the Crypto++ project.
3 
4 ;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool chains.
5 ;; Additionally, the inline assembly code produced by GCC and Clang is not that
6 ;; impressive. However, using this code requires NASM and an edit to the GNUmakefile.
7 
8 ;; nasm -f elf32 rdrand.S -DX86 -g -o rdrand-x86.o
9 ;; nasm -f elfx32 rdrand.S -DX32 -g -o rdrand-x32.o
10 ;; nasm -f elf64 rdrand.S -DX64 -g -o rdrand-x64.o
11 
12 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14 
15 ;; Naming convention used in rdrand.{h|cpp|asm|S}
16 ;; MSC = Microsoft Compiler (and compatibles)
17 ;; GCC = GNU Compiler (and compatibles)
18 ;; ALL = MSC and GCC (and compatibles)
19 ;; RRA = RDRAND, Assembly
20 ;; RSA = RDSEED, Assembly
21 ;; RRI = RDRAND, Intrinsic
22 ;; RSA = RDSEED, Intrinsic
23 
24 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 
27 ;; C/C++ Function prototypes
28 ;; X86, X32 and X64:
29 ;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety);
30 
31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 
34 ;; Return values
35 %define RDRAND_SUCCESS 1
36 %define RDRAND_FAILURE 0
37 
38 %define RDSEED_SUCCESS 1
39 %define RDSEED_FAILURE 0
40 
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43 
44 %ifdef X86 or X32 ;; Set via the command line
45 
46 ;; Arg1, byte* buffer
47 ;; Arg2, size_t bsize
48 ;; Arg3, unsigned int safety
49 ;; EAX (out): success (1), failure (0)
50 
51 global NASM_RRA_GenerateBlock
52 section .text
53 
54 %ifdef X86
55 align 8
56 cpu 486
57 %else
58 align 16
59 %endif
60 
61 NASM_RRA_GenerateBlock:
62 
63 %ifdef X86
64 %define arg1 [ebp+04h]
65 %define arg2 [ebp+08h]
66 %define arg3 [ebp+0ch]
67 %define MWSIZE 04h ;; machine word size
68 %else
69 %define MWSIZE 08h ;; machine word size
70 %endif
71 
72  %define buffer edi
73  %define bsize esi
74  %define safety edx
75 
76 %ifdef X86
77 .Load_Arguments:
78 
79  mov buffer, arg1
80  mov bsize, arg2
81  mov safety, arg3
82 %endif
83 
84 .Validate_Pointer:
85 
86  cmp buffer, 0
87  je .GenerateBlock_PreRet
88 
89  ;; Top of While loop
90 .GenerateBlock_Top:
91 
92  ;; Check remaining size
93  cmp bsize, 0
94  je .GenerateBlock_Success
95 
96 %ifdef X86
97 .Call_RDRAND_EAX:
98 %else
99 .Call_RDRAND_RAX:
100  DB 48h ;; X32 can use the full register, issue the REX.w prefix
101 %endif
102  ;; RDRAND is not available prior to VS2012. Just emit
103  ;; the byte codes using DB. This is `rdrand eax`.
104  DB 0Fh, 07h, F0h
105 
106  ;; If CF=1, the number returned by RDRAND is valid.
107  ;; If CF=0, a random number was not available.
108  jc .RDRAND_succeeded
109 
110 .RDRAND_failed:
111 
112  ;; Exit if we've reached the limit
113  cmp safety, 0
114  je .GenerateBlock_Failure
115 
116  dec safety
117  jmp .GenerateBlock_Top
118 
119 .RDRAND_succeeded:
120 
121  cmp bsize, MWSIZE
122  jb .Partial_Machine_Word
123 
124 .Full_Machine_Word:
125 
126 %ifdef X32
127  mov [buffer+4], eax ;; We can only move 4 at a time
128  DB 048h ;; Combined, these result in
129  shr eax, 32 ;; `shr rax, 32`
130 %endif
131 
132  mov [buffer], eax
133  add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
134  sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
135 
136  ;; Continue
137  jmp .GenerateBlock_Top
138 
139  ;; 1,2,3 bytes remain for X86
140  ;; 1,2,3,4,5,6,7 remain for X32
141 .Partial_Machine_Word:
142 
143 %ifdef X32
144  ;; Test bit 2 to see if size is at least 4
145  test bsize, 4
146  jz .Bit_2_Not_Set
147 
148  mov [buffer], eax
149  add buffer, 4
150 
151  DB 048h ;; Combined, these result in
152  shr eax, 32 ;; `shr rax, 32`
153 
154 .Bit_2_Not_Set:
155 %endif
156 
157  ;; Test bit 1 to see if size is at least 2
158  test bsize, 2
159  jz .Bit_1_Not_Set
160 
161  mov [buffer], ax
162  shr eax, 16
163  add buffer, 2
164 
165 .Bit_1_Not_Set:
166 
167  ;; Test bit 0 to see if size is at least 1
168  test bsize, 1
169  jz .GenerateBlock_Success
170 
171  mov [buffer], al
172 
173 .Bit_0_Not_Set:
174 
175  ;; We've hit all the bits
176  jmp .GenerateBlock_Success
177 
178 .GenerateBlock_PreRet:
179 
180  ;; Test for success (was the request completely fulfilled?)
181  cmp bsize, 0
182  je .GenerateBlock_Success
183 
184 .GenerateBlock_Failure:
185 
186  xor eax, eax
187  mov al, RDRAND_FAILURE
188  ret
189 
190 .GenerateBlock_Success:
191 
192  xor eax, eax
193  mov al, RDRAND_SUCCESS
194  ret
195 
196 %endif ;; X86 and X32
197 
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
200 
201 %ifdef X64 ;; Set via the command line
202 
203 global NASM_RRA_GenerateBlock
204 section .text
205 align 16
206 
207 ;; Arg1, byte* buffer
208 ;; Arg2, size_t bsize
209 ;; Arg3, unsigned int safety
210 ;; RAX (out): success (1), failure (0)
211 
212 NASM_RRA_GenerateBlock:
213 
214 %define MWSIZE 08h ;; machine word size
215 %define buffer rdi
216 %define bsize rsi
217 %define safety edx
218 
219  ;; No need for Load_Arguments due to fastcall
220 
221 .Validate_Pointer:
222 
223  ;; Validate pointer
224  cmp buffer, 0
225  je .GenerateBlock_PreRet
226 
227  ;; Top of While loop
228 .GenerateBlock_Top:
229 
230  ;; Check remaining size
231  cmp bsize, 0
232  je .GenerateBlock_Success
233 
234 .Call_RDRAND_RAX:
235  ;; RDRAND is not available prior to VS2012. Just emit
236  ;; the byte codes using DB. This is `rdrand rax`.
237  DB 048h, 0Fh, 0C7h, 0F0h
238 
239  ;; If CF=1, the number returned by RDRAND is valid.
240  ;; If CF=0, a random number was not available.
241  jc .RDRAND_succeeded
242 
243 .RDRAND_failed:
244 
245  ;; Exit if we've reached the limit
246  cmp safety, 0h
247  je .GenerateBlock_Failure
248 
249  dec safety
250  jmp .GenerateBlock_Top
251 
252 .RDRAND_succeeded:
253 
254  cmp bsize, MWSIZE
255  jb .Partial_Machine_Word
256 
257 .Full_Machine_Word:
258 
259  mov [buffer], rax
260  add buffer, MWSIZE
261  sub bsize, MWSIZE
262 
263  ;; Continue
264  jmp .GenerateBlock_Top
265 
266  ;; 1,2,3,4,5,6,7 bytes remain
267 .Partial_Machine_Word:
268 
269  ;; Test bit 2 to see if size is at least 4
270  test bsize, 4
271  jz .Bit_2_Not_Set
272 
273  mov [buffer], eax
274  shr rax, 32
275  add buffer, 4
276 
277 .Bit_2_Not_Set:
278 
279  ;; Test bit 1 to see if size is at least 2
280  test bsize, 2
281  jz .Bit_1_Not_Set
282 
283  mov [buffer], ax
284  shr eax, 16
285  add buffer, 2
286 
287 .Bit_1_Not_Set:
288 
289  ;; Test bit 0 to see if size is at least 1
290  test bsize, 1
291  jz .GenerateBlock_Success
292 
293  mov [buffer], al
294 
295 .Bit_0_Not_Set:
296 
297  ;; We've hit all the bits
298  jmp .GenerateBlock_Success
299 
300 .GenerateBlock_PreRet:
301 
302  ;; Test for success (was the request completely fulfilled?)
303  cmp bsize, 0
304  je .GenerateBlock_Success
305 
306 .GenerateBlock_Failure:
307 
308  xor rax, rax
309  mov al, RDRAND_FAILURE
310  ret
311 
312 .GenerateBlock_Success:
313 
314  xor rax, rax
315  mov al, RDRAND_SUCCESS
316  ret
317 
318 %endif ;; X64
319 
320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
322 
323 %ifdef X86 or X32 ;; Set via the command line
324 
325 ;; Arg1, byte* buffer
326 ;; Arg2, size_t bsize
327 ;; Arg3, unsigned int safety
328 ;; EAX (out): success (1), failure (0)
329 
330 global NASM_RSA_GenerateBlock
331 section .text
332 align 8
333 
334 %ifdef X86
335 align 8
336 cpu 486
337 %else
338 align 16
339 %endif
340 
341 NASM_RSA_GenerateBlock:
342 
343 %ifdef X86
344 %define arg1 [ebp+04h]
345 %define arg2 [ebp+08h]
346 %define arg3 [ebp+0ch]
347 %define MWSIZE 04h ;; machine word size
348 %else
349 %define MWSIZE 08h ;; machine word size
350 %endif
351 
352  %define buffer edi
353  %define bsize esi
354  %define safety edx
355 
356 %ifdef X86
357 .Load_Arguments:
358 
359  mov buffer, arg1
360  mov bsize, arg2
361  mov safety, arg3
362 %endif
363 
364 .Validate_Pointer:
365 
366  cmp buffer, 0
367  je .GenerateBlock_PreRet
368 
369  ;; Top of While loop
370 .GenerateBlock_Top:
371 
372  ;; Check remaining size
373  cmp bsize, 0
374  je .GenerateBlock_Success
375 
376 %ifdef X86
377 .Call_RDSEED_EAX:
378 %else
379 .Call_RDSEED_RAX:
380  DB 48h ;; X32 can use the full register, issue the REX.w prefix
381 %endif
382  ;; RDSEED is not available prior to VS2012. Just emit
383  ;; the byte codes using DB. This is `rdseed eax`.
384  DB 0Fh, 0C7h, 0F8h
385 
386  ;; If CF=1, the number returned by RDSEED is valid.
387  ;; If CF=0, a random number was not available.
388  jc .RDSEED_succeeded
389 
390 .RDSEED_failed:
391 
392  ;; Exit if we've reached the limit
393  cmp safety, 0
394  je .GenerateBlock_Failure
395 
396  dec safety
397  jmp .GenerateBlock_Top
398 
399 .RDSEED_succeeded:
400 
401  cmp bsize, MWSIZE
402  jb .Partial_Machine_Word
403 
404 .Full_Machine_Word:
405 
406  mov [buffer], eax
407  add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
408  sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
409 
410  ;; Continue
411  jmp .GenerateBlock_Top
412 
413  ;; 1,2,3 bytes remain for X86
414  ;; 1,2,3,4,5,6,7 remain for X32
415 .Partial_Machine_Word:
416 
417 %ifdef X32
418  ;; Test bit 2 to see if size is at least 4
419  test bsize, 4
420  jz .Bit_2_Not_Set
421 
422  mov [buffer], eax
423  add buffer, 4
424 
425  DB 048h ;; Combined, these result in
426  shr eax, 32 ;; `shr rax, 32`
427 
428 .Bit_2_Not_Set:
429 %endif
430 
431  ;; Test bit 1 to see if size is at least 2
432  test bsize, 2
433  jz .Bit_1_Not_Set
434 
435  mov [buffer], ax
436  shr eax, 16
437  add buffer, 2
438 
439 .Bit_1_Not_Set:
440 
441  ;; Test bit 0 to see if size is at least 1
442  test bsize, 1
443  jz .GenerateBlock_Success
444 
445  mov [buffer], al
446 
447 .Bit_0_Not_Set:
448 
449  ;; We've hit all the bits
450  jmp .GenerateBlock_Success
451 
452 .GenerateBlock_PreRet:
453 
454  ;; Test for success (was the request completely fulfilled?)
455  cmp bsize, 0
456  je .GenerateBlock_Success
457 
458 .GenerateBlock_Failure:
459 
460  xor eax, eax
461  mov al, RDSEED_FAILURE
462  ret
463 
464 .GenerateBlock_Success:
465 
466  xor eax, eax
467  mov al, RDSEED_SUCCESS
468  ret
469 
470 %endif ;; X86 and X32
471 
472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
474 
475 %ifdef X64 ;; Set via the command line
476 
477 global NASM_RSA_GenerateBlock
478 section .text
479 align 16
480 
481 ;; Arg1, byte* buffer
482 ;; Arg2, size_t bsize
483 ;; Arg3, unsigned int safety
484 ;; RAX (out): success (1), failure (0)
485 
486 NASM_RSA_GenerateBlock:
487 
488 %define MWSIZE 08h ;; machine word size
489 %define buffer rdi
490 %define bsize rsi
491 %define safety edx
492 
493  ;; No need for Load_Arguments due to fastcall
494 
495 .Validate_Pointer:
496 
497  ;; Validate pointer
498  cmp buffer, 0
499  je .GenerateBlock_PreRet
500 
501  ;; Top of While loop
502 .GenerateBlock_Top:
503 
504  ;; Check remaining size
505  cmp bsize, 0
506  je .GenerateBlock_Success
507 
508 .Call_RDSEED_RAX:
509  ;; RDSEED is not available prior to VS2012. Just emit
510  ;; the byte codes using DB. This is `rdseed rax`.
511  DB 048h, 0Fh, 0C7h, 0F8h
512 
513  ;; If CF=1, the number returned by RDSEED is valid.
514  ;; If CF=0, a random number was not available.
515  jc .RDSEED_succeeded
516 
517 .RDSEED_failed:
518 
519  ;; Exit if we've reached the limit
520  cmp safety, 0
521  je .GenerateBlock_Failure
522 
523  dec safety
524  jmp .GenerateBlock_Top
525 
526 .RDSEED_succeeded:
527 
528  cmp bsize, MWSIZE
529  jb .Partial_Machine_Word
530 
531 .Full_Machine_Word:
532 
533  mov [buffer], rax
534  add buffer, MWSIZE
535  sub bsize, MWSIZE
536 
537  ;; Continue
538  jmp .GenerateBlock_Top
539 
540  ;; 1,2,3,4,5,6,7 bytes remain
541 .Partial_Machine_Word:
542 
543  ;; Test bit 2 to see if size is at least 4
544  test bsize, 4
545  jz .Bit_2_Not_Set
546 
547  mov [buffer], eax
548  shr rax, 32
549  add buffer, 4
550 
551 .Bit_2_Not_Set:
552 
553  ;; Test bit 1 to see if size is at least 2
554  test bsize, 2
555  jz .Bit_1_Not_Set
556 
557  mov [buffer], ax
558  shr eax, 16
559  add buffer, 2
560 
561 .Bit_1_Not_Set:
562 
563  ;; Test bit 0 to see if size is at least 1
564  test bsize, 1
565  jz .GenerateBlock_Success
566 
567  mov [buffer], al
568 
569 .Bit_0_Not_Set:
570 
571  ;; We've hit all the bits
572  jmp .GenerateBlock_Success
573 
574 .GenerateBlock_PreRet:
575 
576  ;; Test for success (was the request completely fulfilled?)
577  cmp bsize, 0
578  je .GenerateBlock_Success
579 
580 .GenerateBlock_Failure:
581 
582  xor rax, rax
583  mov al, RDSEED_FAILURE
584  ret
585 
586 .GenerateBlock_Success:
587 
588  xor rax, rax
589  mov al, RDSEED_SUCCESS
590  ret
591 
592 %endif ;; _M_X64
593 
594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
595 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
596