init: v1.0.0

This commit is contained in:
yaole
2026-05-27 23:03:00 +08:00
commit 8d97f750eb
466 changed files with 80067 additions and 0 deletions
+140
View File
@@ -0,0 +1,140 @@
// +build arm64,!generic
#define storeBlock(a0,a1,a2,a3, r) \
MOVD a0, 0+r \
MOVD a1, 8+r \
MOVD a2, 16+r \
MOVD a3, 24+r
#define loadBlock(r, a0,a1,a2,a3) \
MOVD 0+r, a0 \
MOVD 8+r, a1 \
MOVD 16+r, a2 \
MOVD 24+r, a3
#define loadModulus(p0,p1,p2,p3) \
MOVD ·p2+0(SB), p0 \
MOVD ·p2+8(SB), p1 \
MOVD ·p2+16(SB), p2 \
MOVD ·p2+24(SB), p3
#define loadR(p0,p1,p2,p3) \
MOVD ·r+0(SB), p0 \
MOVD ·r+8(SB), p1 \
MOVD ·r+16(SB), p2 \
MOVD ·r+24(SB), p3
#include "mul_arm64.h"
TEXT ·gfpNeg(SB),0,$0-16
MOVD a+8(FP), R0
loadBlock(0(R0), R1,R2,R3,R4)
loadModulus(R5,R6,R7,R8)
// (CS, R8:R5) = p-a
SUBS R1,R5, R5
SBCS R2,R6, R6
SBCS R3,R7, R7
SBCS R4,R8, R8
// if CS = 0, then p >= a, R8:R5 = p-a
// if CS = 1, then p < a, R8:R5 = R+p-a mod p
// Thus we need sub R if CS = 1.
// If CS = 1, R4:R1 = R, otherwise 0
loadR(R1,R2,R3,R4)
MOVD $0, R0
CSEL CS, R0, R1, R1
CSEL CS, R0, R2, R2
CSEL CS, R0, R3, R3
CSEL CS, R0, R4, R4
// R5:R8 = p-a
SUBS R1, R5, R5
SBCS R2, R6, R6
SBCS R3, R7, R7
SBCS R4, R8, R8
MOVD c+0(FP), R0
storeBlock(R5,R6,R7,R8, 0(R0))
RET
TEXT ·gfpAdd(SB),0,$0-24
MOVD a+8(FP), R0
loadBlock(0(R0), R1,R2,R3,R4)
MOVD b+16(FP), R0
loadBlock(0(R0), R5,R6,R7,R8)
loadModulus(R9,R10,R11,R12)
MOVD ZR, R0
// (R0,R4,R3,R2,R1) = a + b
ADDS R5, R1
ADCS R6, R2
ADCS R7, R3
ADCS R8, R4
ADCS ZR, R0
// (R0,R8,R7,R6,R5) = a + b - p
SUBS R9, R1, R5
SBCS R10, R2, R6
SBCS R11, R3, R7
SBCS R12, R4, R8
SBCS ZR, R0, R0
// if CS = 1, then a + b < p
CSEL CS, R5, R1, R1
CSEL CS, R6, R2, R2
CSEL CS, R7, R3, R3
CSEL CS, R8, R4, R4
MOVD c+0(FP), R0
storeBlock(R1,R2,R3,R4, 0(R0))
RET
TEXT ·gfpSub(SB),0,$0-24
MOVD a+8(FP), R0
loadBlock(0(R0), R1,R2,R3,R4)
MOVD b+16(FP), R0
loadBlock(0(R0), R5,R6,R7,R8)
loadModulus(R9,R10,R11,R12)
// R4:R1 = a - b or R + a - b
SUBS R5, R1
SBCS R6, R2
SBCS R7, R3
SBCS R8, R4
// R12:R9= 0 or p
CSEL CS, ZR, R9, R9
CSEL CS, ZR, R10, R10
CSEL CS, ZR, R11, R11
CSEL CS, ZR, R12, R12
// actually, we should sub r if R4:R1 = R + a - b.
// but R4:R1 - r = R-r + a-b = p + a-b.
// Therefore, sub r equals add p.
// Also, for a < b, the addtion carrys 0.
ADDS R9, R1
ADCS R10, R2
ADCS R11, R3
ADCS R12, R4
MOVD c+0(FP), R0
storeBlock(R1,R2,R3,R4, 0(R0))
RET
TEXT ·gfpMul(SB),0,$0-24
MOVD a+8(FP), R0
loadBlock(0(R0), R1,R2,R3,R4)
MOVD b+16(FP), R0
loadBlock(0(R0), R5,R6,R7,R8)
// R16:R9 = R4:R1 * R8:R5 = a * b
mul(R9,R10,R11,R12,R13,R14,R15,R16)
gfpReduce()
MOVD c+0(FP), R0
storeBlock(R1,R2,R3,R4, 0(R0))
RET