-rw-r--r-- 24544 high-ctidh-20210523/fp2048.S
/* DO NOT EDIT! generated by ./autogen */ .intel_syntax noprefix #include "uintbig_namespace.h" #include "fp_namespace.h" .section .rodata .set pbits,2047 .set pbytes,256 .set plimbs,32 .inv_min_p_mod_r: /* -p^-1 mod 2^64 */ .quad 0x22249689c54c7495 .global fp_0 fp_0: .zero 256 .global fp_1 fp_1: /* 2^2048 mod p */ .quad 0x994d7dbe41f62437, 0x6aaf42d975b174b6, 0x3f037f5ba7c4a965, 0x5ccaed897fd53a00 .quad 0xd2973e879030fb33, 0x08c3a6b0fcf19681, 0x33301470a926eefd, 0x33e715b0a4a9b9e9 .quad 0x8737cc516cf9ace5, 0xf5464238325eccd4, 0x393cd9de4f760e82, 0x059880446fb9a315 .quad 0x8b19e3b333b22e4a, 0x65ac4ae7830805fa, 0xd71b975ca89c8fcd, 0x37314ebe2cf1f23b .quad 0x565f6b8c9e61cfb9, 0x87712cf7de06573f, 0x6d8736050fb35ad2, 0xe3efa60224957edb .quad 0x444a4fc8b855012d, 0xac7f2394665a0905, 0xcff83c43b74af366, 0x167df91c271503fd .quad 0xd70947c16f7fc287, 0x65069931a3a5d5b7, 0xf713ec84671a7fce, 0x6c8a0b9c659af905 .quad 0x6600692af35042c7, 0x17670145e45b2b04, 0x38030a4d47b3b374, 0x355309fecf901ad2 .global fp_2 fp_2: /* 2^2049 mod p */ .quad 0xbb0a256699e8ff2b, 0xf8ee46a6129e1054, 0xe85d7e8087758b41, 0xd8842a40d4f18755 .quad 0x40b63c91a5c79f77, 0x69c884f24e33b484, 0x221ada5c355ad84e, 0x23c5dd46d58c0720 .quad 0xe62cdcbdfe46936c, 0x91a3efd87587ddef, 0xdae351b164137731, 0xb7b92b4a5a067c86 .quad 0x99e7134ccdf516ac, 0x4292041c31bd6348, 0xa095b682dec2a4df, 0xd61db7bbbe348a8b .quad 0xc98950481c398f5a, 0x915d68ed060ecb93, 0xaa3b7e0bcf4d2940, 0xbe84835a555cd2aa .quad 0xf4ad64d458c65815, 0x927dfdaf997cbfb6, 0x3a988c9e010437ef, 0xdf25efec5b310950 .quad 0xf5c05218aed4c5e5, 0x9664bac92882f2ac, 0x95d927df9b3dd4e1, 0xfd421b1797beefb8 .quad 0xee00f56437bb467b, 0xe145ada314d4b9b4, 0xd8071809a74df80e, 0x271717528efae93f .r_squared_mod_p: /* (2^2048)^2 mod p */ .quad 0x6e940162ecb00f8c, 0x24c744036302b024, 0xc892a6ce3f16637b, 0x37733ccc6ac611b0 .quad 0xe5361a2cfb50bcf7, 0xe06f5b5f3f269a1f, 0x67d1d25e92181152, 0xab5a12bd6902fcfc .quad 0x82c060ea7daaebd0, 0x22029cf3781c2b9e, 0x0cfdf6fb51053d83, 0xd0af69954ce04fb3 .quad 0x015a5254e7ef0c6d, 0x834839d5541a461a, 0x6768d972de4e269a, 0x71a06dcb2a0a7a7f .quad 0x0e11b7d51a8ef22f, 0x9d655dcdeb3d4934, 0x28dfab822c934ea8, 0x60a2ba69a0862d99 .quad 0xf1286b89b68ee540, 0xb16c9ae335599258, 0x4a30cb66a64be15f, 0xd021ee65d62b98ce .quad 0xae101149f5e60533, 0x2ad923e56bee4dc3, 0x5ccb61e26b6f93b5, 0xa4ee09d81cc595af .quad 0xdc3497d89520ca1f, 0x2ab20fe509190878, 0xcb8fab09772c236e, 0x2dc5652f8085f6a5 .section .data .global fp_mulsq_count fp_mulsq_count: .quad 0 .global fp_sq_count fp_sq_count: .quad 0 .global fp_addsub_count fp_addsub_count: .quad 0 .section .text .p2align 4,,15 .global fp_copy fp_copy: cld mov rcx, plimbs rep movsq ret .global fp_cmov fp_cmov: movzx rax, dl neg rax .set k, 0 .rept plimbs mov rcx, [rdi + 8*k] mov rdx, [rsi + 8*k] xor rdx, rcx and rdx, rax xor rcx, rdx mov [rdi + 8*k], rcx .set k, k+1 .endr ret .global fp_cswap fp_cswap: movzx rax, dl neg rax .set k, 0 .rept plimbs mov rcx, [rdi + 8*k] mov rdx, [rsi + 8*k] mov r8, rcx xor r8, rdx and r8, rax xor rcx, r8 xor rdx, r8 mov [rdi + 8*k], rcx mov [rsi + 8*k], rdx .set k, k+1 .endr ret .reduce_once: push rbp sub rsp, 192 mov rbp, rdi mov rdi, [rbp + 0] sub rdi, [rip + uintbig_p + 0] movq [rsp + 0], rdi mov rsi, [rbp + 8] sbb rsi, [rip + uintbig_p + 8] movq [rsp + 8], rsi mov rdx, [rbp + 16] sbb rdx, [rip + uintbig_p + 16] movq [rsp + 16], rdx mov rcx, [rbp + 24] sbb rcx, [rip + uintbig_p + 24] movq [rsp + 24], rcx mov r8, [rbp + 32] sbb r8, [rip + uintbig_p + 32] movq [rsp + 32], r8 mov r9, [rbp + 40] sbb r9, [rip + uintbig_p + 40] movq [rsp + 40], r9 mov r10, [rbp + 48] sbb r10, [rip + uintbig_p + 48] movq [rsp + 48], r10 mov r11, [rbp + 56] sbb r11, [rip + uintbig_p + 56] movq [rsp + 56], r11 mov rdi, [rbp + 64] sbb rdi, [rip + uintbig_p + 64] movq [rsp + 64], rdi mov rsi, [rbp + 72] sbb rsi, [rip + uintbig_p + 72] movq [rsp + 72], rsi mov rdx, [rbp + 80] sbb rdx, [rip + uintbig_p + 80] movq [rsp + 80], rdx mov rcx, [rbp + 88] sbb rcx, [rip + uintbig_p + 88] movq [rsp + 88], rcx mov r8, [rbp + 96] sbb r8, [rip + uintbig_p + 96] movq [rsp + 96], r8 mov r9, [rbp + 104] sbb r9, [rip + uintbig_p + 104] movq [rsp + 104], r9 mov r10, [rbp + 112] sbb r10, [rip + uintbig_p + 112] movq [rsp + 112], r10 mov r11, [rbp + 120] sbb r11, [rip + uintbig_p + 120] movq [rsp + 120], r11 mov rdi, [rbp + 128] sbb rdi, [rip + uintbig_p + 128] movq [rsp + 128], rdi mov rsi, [rbp + 136] sbb rsi, [rip + uintbig_p + 136] movq [rsp + 136], rsi mov rdx, [rbp + 144] sbb rdx, [rip + uintbig_p + 144] movq [rsp + 144], rdx mov rcx, [rbp + 152] sbb rcx, [rip + uintbig_p + 152] movq [rsp + 152], rcx mov r8, [rbp + 160] sbb r8, [rip + uintbig_p + 160] movq [rsp + 160], r8 mov r9, [rbp + 168] sbb r9, [rip + uintbig_p + 168] movq [rsp + 168], r9 mov r10, [rbp + 176] sbb r10, [rip + uintbig_p + 176] movq [rsp + 176], r10 mov r11, [rbp + 184] sbb r11, [rip + uintbig_p + 184] movq [rsp + 184], r11 mov rdi, [rbp + 192] sbb rdi, [rip + uintbig_p + 192] mov rsi, [rbp + 200] sbb rsi, [rip + uintbig_p + 200] mov rdx, [rbp + 208] sbb rdx, [rip + uintbig_p + 208] mov rcx, [rbp + 216] sbb rcx, [rip + uintbig_p + 216] mov r8, [rbp + 224] sbb r8, [rip + uintbig_p + 224] mov r9, [rbp + 232] sbb r9, [rip + uintbig_p + 232] mov r10, [rbp + 240] sbb r10, [rip + uintbig_p + 240] mov r11, [rbp + 248] sbb r11, [rip + uintbig_p + 248] setnc al movzx rax, al neg rax .macro cswap2, r, m xor \r, \m and \r, rax xor \m, \r .endm cswap2 rdi, [rbp + 192] cswap2 rsi, [rbp + 200] cswap2 rdx, [rbp + 208] cswap2 rcx, [rbp + 216] cswap2 r8, [rbp + 224] cswap2 r9, [rbp + 232] cswap2 r10, [rbp + 240] cswap2 r11, [rbp + 248] movq rdi, [rsp + 0] cswap2 rdi, [rbp + 0] movq rsi, [rsp + 8] cswap2 rsi, [rbp + 8] movq rdx, [rsp + 16] cswap2 rdx, [rbp + 16] movq rcx, [rsp + 24] cswap2 rcx, [rbp + 24] movq r8, [rsp + 32] cswap2 r8, [rbp + 32] movq r9, [rsp + 40] cswap2 r9, [rbp + 40] movq r10, [rsp + 48] cswap2 r10, [rbp + 48] movq r11, [rsp + 56] cswap2 r11, [rbp + 56] movq rdi, [rsp + 64] cswap2 rdi, [rbp + 64] movq rsi, [rsp + 72] cswap2 rsi, [rbp + 72] movq rdx, [rsp + 80] cswap2 rdx, [rbp + 80] movq rcx, [rsp + 88] cswap2 rcx, [rbp + 88] movq r8, [rsp + 96] cswap2 r8, [rbp + 96] movq r9, [rsp + 104] cswap2 r9, [rbp + 104] movq r10, [rsp + 112] cswap2 r10, [rbp + 112] movq r11, [rsp + 120] cswap2 r11, [rbp + 120] movq rdi, [rsp + 128] cswap2 rdi, [rbp + 128] movq rsi, [rsp + 136] cswap2 rsi, [rbp + 136] movq rdx, [rsp + 144] cswap2 rdx, [rbp + 144] movq rcx, [rsp + 152] cswap2 rcx, [rbp + 152] movq r8, [rsp + 160] cswap2 r8, [rbp + 160] movq r9, [rsp + 168] cswap2 r9, [rbp + 168] movq r10, [rsp + 176] cswap2 r10, [rbp + 176] movq r11, [rsp + 184] cswap2 r11, [rbp + 184] add rsp, 192 pop rbp ret .global fp_add2 fp_add2: mov rdx, rdi .global fp_add3 fp_add3: addq [fp_addsub_count+rip],1 push rdi call uintbig_add3 pop rdi jmp .reduce_once .global fp_sub2 fp_sub2: mov rdx, rdi xchg rsi, rdx .global fp_sub3 fp_sub3: addq [fp_addsub_count+rip],1 push rdi call uintbig_sub3 pop rdi neg rax sub rsp, pbytes mov rcx, [rip + uintbig_p + 0] and rcx, rax mov [rsp + 0],rcx .set k, 1 .rept plimbs-1 mov rcx, [rip + uintbig_p + 8*k] and rcx, rax mov [rsp + 8*k], rcx .set k, k+1 .endr mov rcx, [rsp + 0] add rcx, [rdi + 0] mov [rdi + 0], rcx .set k, 1 .rept plimbs-1 mov rcx, [rsp + 8*k] adc rcx, [rdi + 8*k] mov [rdi + 8*k], rcx .set k, k+1 .endr add rsp, pbytes ret /* Montgomery arithmetic */ .global fp_mul2 fp_mul2: mov rdx, rdi .global fp_mul3 fp_mul3: push rbp push rbx addq [fp_mulsq_count+rip],1 sub rsp,272 mov [rsp+264],rdi mov rdi,rsi mov rsi,rdx /* XXX: put directly into output */ xor rax,rax mov [rsp+0],rax mov [rsp+8],rax mov [rsp+16],rax mov [rsp+24],rax mov [rsp+32],rax mov [rsp+40],rax mov [rsp+48],rax mov [rsp+56],rax mov [rsp+64],rax mov [rsp+72],rax mov [rsp+80],rax mov [rsp+88],rax mov [rsp+96],rax mov [rsp+104],rax mov [rsp+112],rax mov [rsp+120],rax mov [rsp+128],rax mov [rsp+136],rax mov [rsp+144],rax mov [rsp+152],rax mov [rsp+160],rax mov [rsp+168],rax mov [rsp+176],rax mov [rsp+184],rax mov [rsp+192],rax mov [rsp+200],rax mov [rsp+208],rax mov [rsp+216],rax mov [rsp+224],rax mov [rsp+232],rax mov [rsp+240],rax mov [rsp+248],rax mov [rsp+256],rax .macro MULSTEP, k, I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, I20, I21, I22, I23, I24, I25, I26, I27, I28, I29, I30, I31, I32 mov r11,[rsp+\I0] mov rdx, [rsi + 0] mulx rcx, rdx, [rdi + 8*\k] add rdx, r11 mulx rcx, rdx, [rip + .inv_min_p_mod_r] xor rax, rax /* clear flags */ mulx rbx, rax, [rip + uintbig_p + 0] adox r11, rax mov [rsp+\I0],r11 mov r11,[rsp+\I1] mulx rcx, rax, [rip + uintbig_p + 8] adcx r11, rbx adox r11, rax mov [rsp+\I1],r11 mov r11,[rsp+\I2] mulx rbx, rax, [rip + uintbig_p + 16] adcx r11, rcx adox r11, rax mov [rsp+\I2],r11 mov r11,[rsp+\I3] mulx rcx, rax, [rip + uintbig_p + 24] adcx r11, rbx adox r11, rax mov [rsp+\I3],r11 mov r11,[rsp+\I4] mulx rbx, rax, [rip + uintbig_p + 32] adcx r11, rcx adox r11, rax mov [rsp+\I4],r11 mov r11,[rsp+\I5] mulx rcx, rax, [rip + uintbig_p + 40] adcx r11, rbx adox r11, rax mov [rsp+\I5],r11 mov r11,[rsp+\I6] mulx rbx, rax, [rip + uintbig_p + 48] adcx r11, rcx adox r11, rax mov [rsp+\I6],r11 mov r11,[rsp+\I7] mulx rcx, rax, [rip + uintbig_p + 56] adcx r11, rbx adox r11, rax mov [rsp+\I7],r11 mov r11,[rsp+\I8] mulx rbx, rax, [rip + uintbig_p + 64] adcx r11, rcx adox r11, rax mov [rsp+\I8],r11 mov r11,[rsp+\I9] mulx rcx, rax, [rip + uintbig_p + 72] adcx r11, rbx adox r11, rax mov [rsp+\I9],r11 mov r11,[rsp+\I10] mulx rbx, rax, [rip + uintbig_p + 80] adcx r11, rcx adox r11, rax mov [rsp+\I10],r11 mov r11,[rsp+\I11] mulx rcx, rax, [rip + uintbig_p + 88] adcx r11, rbx adox r11, rax mov [rsp+\I11],r11 mov r11,[rsp+\I12] mulx rbx, rax, [rip + uintbig_p + 96] adcx r11, rcx adox r11, rax mov [rsp+\I12],r11 mov r11,[rsp+\I13] mulx rcx, rax, [rip + uintbig_p + 104] adcx r11, rbx adox r11, rax mov [rsp+\I13],r11 mov r11,[rsp+\I14] mulx rbx, rax, [rip + uintbig_p + 112] adcx r11, rcx adox r11, rax mov [rsp+\I14],r11 mov r11,[rsp+\I15] mulx rcx, rax, [rip + uintbig_p + 120] adcx r11, rbx adox r11, rax mov [rsp+\I15],r11 mov r11,[rsp+\I16] mulx rbx, rax, [rip + uintbig_p + 128] adcx r11, rcx adox r11, rax mov [rsp+\I16],r11 mov r11,[rsp+\I17] mulx rcx, rax, [rip + uintbig_p + 136] adcx r11, rbx adox r11, rax mov [rsp+\I17],r11 mov r11,[rsp+\I18] mulx rbx, rax, [rip + uintbig_p + 144] adcx r11, rcx adox r11, rax mov [rsp+\I18],r11 mov r11,[rsp+\I19] mulx rcx, rax, [rip + uintbig_p + 152] adcx r11, rbx adox r11, rax mov [rsp+\I19],r11 mov r11,[rsp+\I20] mulx rbx, rax, [rip + uintbig_p + 160] adcx r11, rcx adox r11, rax mov [rsp+\I20],r11 mov r11,[rsp+\I21] mulx rcx, rax, [rip + uintbig_p + 168] adcx r11, rbx adox r11, rax mov [rsp+\I21],r11 mov r11,[rsp+\I22] mulx rbx, rax, [rip + uintbig_p + 176] adcx r11, rcx adox r11, rax mov [rsp+\I22],r11 mov r11,[rsp+\I23] mulx rcx, rax, [rip + uintbig_p + 184] adcx r11, rbx adox r11, rax mov [rsp+\I23],r11 mov r11,[rsp+\I24] mulx rbx, rax, [rip + uintbig_p + 192] adcx r11, rcx adox r11, rax mov [rsp+\I24],r11 mov r11,[rsp+\I25] mulx rcx, rax, [rip + uintbig_p + 200] adcx r11, rbx adox r11, rax mov [rsp+\I25],r11 mov r11,[rsp+\I26] mulx rbx, rax, [rip + uintbig_p + 208] adcx r11, rcx adox r11, rax mov [rsp+\I26],r11 mov r11,[rsp+\I27] mulx rcx, rax, [rip + uintbig_p + 216] adcx r11, rbx adox r11, rax mov [rsp+\I27],r11 mov r11,[rsp+\I28] mulx rbx, rax, [rip + uintbig_p + 224] adcx r11, rcx adox r11, rax mov [rsp+\I28],r11 mov r11,[rsp+\I29] mulx rcx, rax, [rip + uintbig_p + 232] adcx r11, rbx adox r11, rax mov [rsp+\I29],r11 mov r11,[rsp+\I30] mulx rbx, rax, [rip + uintbig_p + 240] adcx r11, rcx adox r11, rax mov [rsp+\I30],r11 mov r11,[rsp+\I31] mulx rcx, rax, [rip + uintbig_p + 248] adcx r11, rbx adox r11, rax mov [rsp+\I31],r11 mov r11,[rsp+\I32] mov rax, 0 adcx r11, rcx adox r11, rax mov [rsp+\I32],r11 mov rdx, [rdi + 8*\k] xor rax, rax /* clear flags */ mov r11,[rsp+\I0] mulx rbx, rax, [rsi + 0] adox r11, rax mov [rsp+\I0],r11 mov r11,[rsp+\I1] mulx rcx, rax, [rsi + 8] adcx r11, rbx adox r11, rax mov [rsp+\I1],r11 mov r11,[rsp+\I2] mulx rbx, rax, [rsi + 16] adcx r11, rcx adox r11, rax mov [rsp+\I2],r11 mov r11,[rsp+\I3] mulx rcx, rax, [rsi + 24] adcx r11, rbx adox r11, rax mov [rsp+\I3],r11 mov r11,[rsp+\I4] mulx rbx, rax, [rsi + 32] adcx r11, rcx adox r11, rax mov [rsp+\I4],r11 mov r11,[rsp+\I5] mulx rcx, rax, [rsi + 40] adcx r11, rbx adox r11, rax mov [rsp+\I5],r11 mov r11,[rsp+\I6] mulx rbx, rax, [rsi + 48] adcx r11, rcx adox r11, rax mov [rsp+\I6],r11 mov r11,[rsp+\I7] mulx rcx, rax, [rsi + 56] adcx r11, rbx adox r11, rax mov [rsp+\I7],r11 mov r11,[rsp+\I8] mulx rbx, rax, [rsi + 64] adcx r11, rcx adox r11, rax mov [rsp+\I8],r11 mov r11,[rsp+\I9] mulx rcx, rax, [rsi + 72] adcx r11, rbx adox r11, rax mov [rsp+\I9],r11 mov r11,[rsp+\I10] mulx rbx, rax, [rsi + 80] adcx r11, rcx adox r11, rax mov [rsp+\I10],r11 mov r11,[rsp+\I11] mulx rcx, rax, [rsi + 88] adcx r11, rbx adox r11, rax mov [rsp+\I11],r11 mov r11,[rsp+\I12] mulx rbx, rax, [rsi + 96] adcx r11, rcx adox r11, rax mov [rsp+\I12],r11 mov r11,[rsp+\I13] mulx rcx, rax, [rsi + 104] adcx r11, rbx adox r11, rax mov [rsp+\I13],r11 mov r11,[rsp+\I14] mulx rbx, rax, [rsi + 112] adcx r11, rcx adox r11, rax mov [rsp+\I14],r11 mov r11,[rsp+\I15] mulx rcx, rax, [rsi + 120] adcx r11, rbx adox r11, rax mov [rsp+\I15],r11 mov r11,[rsp+\I16] mulx rbx, rax, [rsi + 128] adcx r11, rcx adox r11, rax mov [rsp+\I16],r11 mov r11,[rsp+\I17] mulx rcx, rax, [rsi + 136] adcx r11, rbx adox r11, rax mov [rsp+\I17],r11 mov r11,[rsp+\I18] mulx rbx, rax, [rsi + 144] adcx r11, rcx adox r11, rax mov [rsp+\I18],r11 mov r11,[rsp+\I19] mulx rcx, rax, [rsi + 152] adcx r11, rbx adox r11, rax mov [rsp+\I19],r11 mov r11,[rsp+\I20] mulx rbx, rax, [rsi + 160] adcx r11, rcx adox r11, rax mov [rsp+\I20],r11 mov r11,[rsp+\I21] mulx rcx, rax, [rsi + 168] adcx r11, rbx adox r11, rax mov [rsp+\I21],r11 mov r11,[rsp+\I22] mulx rbx, rax, [rsi + 176] adcx r11, rcx adox r11, rax mov [rsp+\I22],r11 mov r11,[rsp+\I23] mulx rcx, rax, [rsi + 184] adcx r11, rbx adox r11, rax mov [rsp+\I23],r11 mov r11,[rsp+\I24] mulx rbx, rax, [rsi + 192] adcx r11, rcx adox r11, rax mov [rsp+\I24],r11 mov r11,[rsp+\I25] mulx rcx, rax, [rsi + 200] adcx r11, rbx adox r11, rax mov [rsp+\I25],r11 mov r11,[rsp+\I26] mulx rbx, rax, [rsi + 208] adcx r11, rcx adox r11, rax mov [rsp+\I26],r11 mov r11,[rsp+\I27] mulx rcx, rax, [rsi + 216] adcx r11, rbx adox r11, rax mov [rsp+\I27],r11 mov r11,[rsp+\I28] mulx rbx, rax, [rsi + 224] adcx r11, rcx adox r11, rax mov [rsp+\I28],r11 mov r11,[rsp+\I29] mulx rcx, rax, [rsi + 232] adcx r11, rbx adox r11, rax mov [rsp+\I29],r11 mov r11,[rsp+\I30] mulx rbx, rax, [rsi + 240] adcx r11, rcx adox r11, rax mov [rsp+\I30],r11 mov r11,[rsp+\I31] mulx rcx, rax, [rsi + 248] adcx r11, rbx adox r11, rax mov [rsp+\I31],r11 mov r11,[rsp+\I32] mov rax, 0 adcx r11, rcx adox r11, rax mov [rsp+\I32],r11 .endm MULSTEP 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0 MULSTEP 1, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8 MULSTEP 2, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16 MULSTEP 3, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24 MULSTEP 4, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32 MULSTEP 5, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40 MULSTEP 6, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48 MULSTEP 7, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56 MULSTEP 8, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64 MULSTEP 9, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72 MULSTEP 10, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80 MULSTEP 11, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88 MULSTEP 12, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96 MULSTEP 13, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104 MULSTEP 14, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112 MULSTEP 15, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120 MULSTEP 16, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128 MULSTEP 17, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136 MULSTEP 18, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144 MULSTEP 19, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152 MULSTEP 20, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160 MULSTEP 21, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168 MULSTEP 22, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176 MULSTEP 23, 192, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184 MULSTEP 24, 200, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192 MULSTEP 25, 208, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200 MULSTEP 26, 216, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208 MULSTEP 27, 224, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216 MULSTEP 28, 232, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224 MULSTEP 29, 240, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232 MULSTEP 30, 248, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240 MULSTEP 31, 256, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248 mov rdi,[rsp+264] mov r11,[rsp+0] mov [rdi+0],r11 mov r11,[rsp+8] mov [rdi+8],r11 mov r11,[rsp+16] mov [rdi+16],r11 mov r11,[rsp+24] mov [rdi+24],r11 mov r11,[rsp+32] mov [rdi+32],r11 mov r11,[rsp+40] mov [rdi+40],r11 mov r11,[rsp+48] mov [rdi+48],r11 mov r11,[rsp+56] mov [rdi+56],r11 mov r11,[rsp+64] mov [rdi+64],r11 mov r11,[rsp+72] mov [rdi+72],r11 mov r11,[rsp+80] mov [rdi+80],r11 mov r11,[rsp+88] mov [rdi+88],r11 mov r11,[rsp+96] mov [rdi+96],r11 mov r11,[rsp+104] mov [rdi+104],r11 mov r11,[rsp+112] mov [rdi+112],r11 mov r11,[rsp+120] mov [rdi+120],r11 mov r11,[rsp+128] mov [rdi+128],r11 mov r11,[rsp+136] mov [rdi+136],r11 mov r11,[rsp+144] mov [rdi+144],r11 mov r11,[rsp+152] mov [rdi+152],r11 mov r11,[rsp+160] mov [rdi+160],r11 mov r11,[rsp+168] mov [rdi+168],r11 mov r11,[rsp+176] mov [rdi+176],r11 mov r11,[rsp+184] mov [rdi+184],r11 mov r11,[rsp+192] mov [rdi+192],r11 mov r11,[rsp+200] mov [rdi+200],r11 mov r11,[rsp+208] mov [rdi+208],r11 mov r11,[rsp+216] mov [rdi+216],r11 mov r11,[rsp+224] mov [rdi+224],r11 mov r11,[rsp+232] mov [rdi+232],r11 mov r11,[rsp+240] mov [rdi+240],r11 mov r11,[rsp+248] mov [rdi+248],r11 add rsp,272 pop rbx pop rbp jmp .reduce_once .global fp_sq1 fp_sq1: mov rsi, rdi .global fp_sq2 fp_sq2: /* TODO implement optimized Montgomery squaring */ mov rdx, rsi addq [fp_sq_count+rip],1 jmp fp_mul3