init: v1.0.0

This commit is contained in:
yaole
2026-05-27 23:03:00 +08:00
commit 8d97f750eb
466 changed files with 80067 additions and 0 deletions
+187
View File
@@ -0,0 +1,187 @@
// +build amd64,!generic
#define storeBlock(a0,a1,a2,a3, r) \
MOVQ a0, 0+r \
MOVQ a1, 8+r \
MOVQ a2, 16+r \
MOVQ a3, 24+r
#define loadBlock(r, a0,a1,a2,a3) \
MOVQ 0+r, a0 \
MOVQ 8+r, a1 \
MOVQ 16+r, a2 \
MOVQ 24+r, a3
#define loadModulus(p0,p1,p2,p3) \
MOVD ·p2+0(SB), p0 \
MOVD ·p2+8(SB), p1 \
MOVD ·p2+16(SB), p2 \
MOVD ·p2+24(SB), p3
#define loadR(p0,p1,p2,p3) \
MOVD ·r+0(SB), p0 \
MOVD ·r+8(SB), p1 \
MOVD ·r+16(SB), p2 \
MOVD ·r+24(SB), p3
#define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \
\ // b = a-p
MOVQ a0, b0 \
MOVQ a1, b1 \
MOVQ a2, b2 \
MOVQ a3, b3 \
MOVQ a4, b4 \
\
SUBQ ·p2+0(SB), b0 \
SBBQ ·p2+8(SB), b1 \
SBBQ ·p2+16(SB), b2 \
SBBQ ·p2+24(SB), b3 \
SBBQ $0, b4 \
\
\ // if b is negative then return a
\ // else return b
\ // CMOVQCC = CMOVAEQ (AT&T), i.e., a >= p then move a-p to a.
CMOVQCC b0, a0 \
CMOVQCC b1, a1 \
CMOVQCC b2, a2 \
CMOVQCC b3, a3
#include "mul_amd64.h"
#include "mul_bmi2_amd64.h"
// a > p
TEXT ·gfpNeg(SB),0,$0-16
loadModulus(R8,R9,R10,R11)
loadR(R12,R13,R14,R15)
// Let R = 2^{256} and r = R % p
// /- p - a if p >= a
// R11:R8 = -|
// \- R + p - a if p < a
MOVQ a+8(FP), DI
SUBQ 0(DI), R8
SBBQ 8(DI), R9
SBBQ 16(DI), R10
SBBQ 24(DI), R11
// /- 0 if p >= a
// R15:R12 = -|
// \- r if p < a
MOVQ $0, AX
CMOVQCC AX, R12
CMOVQCC AX, R13
CMOVQCC AX, R14
CMOVQCC AX, R15
// /- p - a if p >= a
// R11:R8 = -|
// \- R + p - a - r if p < a
// Note R + p - a - r = p - a mod p and
// 0 < R + p - a - r < R
SUBQ R12, R8
SBBQ R13, R9
SBBQ R14, R10
SBBQ R15, R11
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
// cloudflare
// a > p mod r
// TEXT ·gfpNeg(SB),0,$0-16
// MOVQ ·p2+0(SB), R8
// MOVQ ·p2+8(SB), R9
// MOVQ ·p2+16(SB), R10
// MOVQ ·p2+24(SB), R11
// MOVQ a+8(FP), DI
// SUBQ 0(DI), R8 // p - a
// SBBQ 8(DI), R9
// SBBQ 16(DI), R10
// SBBQ 24(DI), R11
// MOVQ $0, AX
// gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX)
// MOVQ c+0(FP), DI
// storeBlock(R8,R9,R10,R11, 0(DI))
// RET
TEXT ·gfpAdd(SB),0,$0-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
loadBlock(0(DI), R8,R9,R10,R11)
MOVQ $0, R12
ADDQ 0(SI), R8
ADCQ 8(SI), R9
ADCQ 16(SI), R10
ADCQ 24(SI), R11
ADCQ $0, R12
gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
TEXT ·gfpSub(SB),0,$0-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
loadBlock(0(DI), R8,R9,R10,R11)
MOVQ ·p2+0(SB), R12
MOVQ ·p2+8(SB), R13
MOVQ ·p2+16(SB), R14
MOVQ ·p2+24(SB), R15
MOVQ $0, AX
// a - b or R + a - b
SUBQ 0(SI), R8
SBBQ 8(SI), R9
SBBQ 16(SI), R10
SBBQ 24(SI), R11
CMOVQCC AX, R12
CMOVQCC AX, R13
CMOVQCC AX, R14
CMOVQCC AX, R15
// sub 0 or sub r=R%p = R-p.
// sub r equals add p.
ADDQ R12, R8
ADCQ R13, R9
ADCQ R14, R10
ADCQ R15, R11
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
TEXT ·gfpMul(SB),0,$160-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
// Jump to a slightly different implementation if MULX isn't supported.
CMPB ·hasBMI2(SB), $0
JE nobmi2Mul
mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
storeBlock( R8, R9,R10,R11, 0(SP))
storeBlock(R12,R13,R14,R15, 32(SP))
gfpReduceBMI2()
JMP end
nobmi2Mul:
mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP))
gfpReduce(0(SP))
end:
MOVQ c+0(FP), DI
storeBlock(R12,R13,R14,R15, 0(DI))
RET