246 lines
5.5 KiB
Go
246 lines
5.5 KiB
Go
//go:build (!amd64 && !arm64) || generic
|
||
// +build !amd64,!arm64 generic
|
||
|
||
package bn256
|
||
|
||
/*
|
||
判断进位的规则
|
||
设B=2^w, w = 64 or 32.
|
||
|
||
假设a op b = c + carry·B.
|
||
注:若carry =0 or 1, 则a + b + carry是否产生进位和carry无关。
|
||
因为若a+b+carry当carry=0不产生进位,当carry=1产生进位,当且仅当
|
||
a+b = B-1, 则a,b中必然有一个数(不妨设为a)的某个非最高比特为0,则
|
||
a'=a+carry, a'与a的最高比特相同。因此a'+b 与a+b的进位相同。因此
|
||
我们只需要考虑carry为0的情况。
|
||
|
||
# Add
|
||
carry如下:
|
||
|
||
a[w-1] 0 0 0 1 1 1
|
||
b[w-1] 0 1 1 0 0 1
|
||
c[w-1] x 0 1 0 1 x
|
||
carry 0 1 0 1 0 1
|
||
|
||
carry>0 <=> a或b都为1(第w-1bit) 或 a,b其中一个为1,但c为0.
|
||
|
||
# Sub
|
||
carry如下:
|
||
|
||
a[w-1] 0 0 0 1 1 1
|
||
b[w-1] 0 0 1 0 1 1
|
||
c[w-1] 1 0 x x 0 1
|
||
carry 1 0 1 0 0 1
|
||
|
||
carry = (b&^a | (b|^a)&c) >> 63
|
||
carry>0 <=> a为0,b为1 或 (a,b)!=(1,0)并且c=1.
|
||
*/
|
||
|
||
// gfpCarry compute (a, head) mod p, input (a,head) < 2p
|
||
//
|
||
// 先计算 (b,carry) = a - p
|
||
//
|
||
// carry head ret
|
||
// 0(a>p) 0 b
|
||
// 0 1 b(此情形下,(a,head) > 2p, 不应出现。此时,应再调用一次gfpCarry)
|
||
// 1(a<p) 0 a
|
||
// 1 1 b
|
||
//
|
||
// so, carry &^ head = 1, return a, otherwise return b
|
||
func gfpCarry(a *gfP, head uint64) {
|
||
b := &gfP{}
|
||
|
||
var carry uint64
|
||
for i, pi := range p2 {
|
||
ai := a[i]
|
||
bi := ai - pi - carry
|
||
b[i] = bi
|
||
carry = (pi&^ai | (pi|^ai)&bi) >> 63
|
||
}
|
||
carry = carry &^ head
|
||
|
||
// If b is negative, then return a.
|
||
// Else return b.
|
||
carry = -carry
|
||
ncarry := ^carry
|
||
for i := 0; i < 4; i++ {
|
||
a[i] = (a[i] & carry) | (b[i] & ncarry)
|
||
}
|
||
}
|
||
|
||
// gfpNeg set c = -a, input a < p
|
||
func gfpNeg(c, a *gfP) {
|
||
var carry uint64
|
||
for i, pi := range p2 {
|
||
ai := a[i]
|
||
ci := pi - ai - carry
|
||
c[i] = ci
|
||
carry = (ai&^pi | (ai|^pi)&ci) >> 63
|
||
}
|
||
// FIXME: carry?
|
||
gfpCarry(c, 0)
|
||
}
|
||
|
||
// gfpAdd set c = a+b
|
||
func gfpAdd(c, a, b *gfP) {
|
||
var carry uint64
|
||
for i, ai := range a {
|
||
bi := b[i]
|
||
ci := ai + bi + carry
|
||
c[i] = ci
|
||
carry = (ai&bi | (ai|bi)&^ci) >> 63
|
||
}
|
||
gfpCarry(c, carry)
|
||
}
|
||
|
||
func gfpSub(c, a, b *gfP) {
|
||
t := &gfP{}
|
||
|
||
// t = p-b
|
||
var carry uint64
|
||
for i, pi := range p2 {
|
||
bi := b[i]
|
||
ti := pi - bi - carry
|
||
t[i] = ti
|
||
carry = (bi&^pi | (bi|^pi)&ti) >> 63
|
||
}
|
||
|
||
// c = a+t
|
||
carry = 0
|
||
for i, ai := range a {
|
||
ti := t[i]
|
||
ci := ai + ti + carry
|
||
c[i] = ci
|
||
carry = (ai&ti | (ai|ti)&^ci) >> 63
|
||
}
|
||
gfpCarry(c, carry)
|
||
}
|
||
|
||
// mul returns the multiplication of a*b. a,b are no restrictions.
|
||
func mul(a, b [4]uint64) [8]uint64 {
|
||
const (
|
||
mask16 uint64 = 0x0000ffff
|
||
mask32 uint64 = 0xffffffff
|
||
)
|
||
|
||
// Let B = 2^16, then
|
||
// buff = buff[0] + buff[1]*B + ... + buff[31]*B^31
|
||
var buff [32]uint64
|
||
for i, ai := range a {
|
||
a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48
|
||
for j, bj := range b {
|
||
// compute ai * bj and save to buff[4*(i+j):]
|
||
// (a0 + a1*B + a2*B^2 + a3*B^3) * (b0 + b2*B^2)
|
||
// = a0*b0 + a1*b0*B + (a2*b0 + a0*b2)*B^2 + (a1*b2 + a3*b0)*B^3 + a2*b2*B^4 + a3*b2*B^5
|
||
b0, b2 := bj&mask32, bj>>32
|
||
|
||
off := 4 * (i + j)
|
||
buff[off+0] += a0 * b0
|
||
buff[off+1] += a1 * b0
|
||
buff[off+2] += a2*b0 + a0*b2
|
||
buff[off+3] += a3*b0 + a1*b2
|
||
buff[off+4] += a2 * b2
|
||
buff[off+5] += a3 * b2
|
||
}
|
||
}
|
||
|
||
// buff:
|
||
// 0 1 2 3 | 4 5 6 7 | 8 9 10 11 | 12 13 14 15
|
||
// 外循环对将1,2,3加到0上
|
||
// 内循环处理0,4,8,12...
|
||
for i := uint(1); i < 4; i++ {
|
||
shift := 16 * i
|
||
|
||
var head, carry uint64
|
||
for j := uint(0); j < 8; j++ {
|
||
block := 4 * j
|
||
|
||
xi := buff[block]
|
||
yi := (buff[block+i] << shift) + head
|
||
zi := xi + yi + carry
|
||
buff[block] = zi
|
||
carry = (xi&yi | (xi|yi)&^zi) >> 63
|
||
|
||
head = buff[block+i] >> (64 - shift)
|
||
}
|
||
}
|
||
|
||
return [8]uint64{buff[0], buff[4], buff[8], buff[12], buff[16], buff[20], buff[24], buff[28]}
|
||
}
|
||
|
||
// halfMul returns a*b mod R, where R = 2^256.
|
||
func halfMul(a, b [4]uint64) [4]uint64 {
|
||
const (
|
||
mask16 uint64 = 0x0000ffff
|
||
mask32 uint64 = 0xffffffff
|
||
)
|
||
|
||
var buff [18]uint64
|
||
for i, ai := range a {
|
||
a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48
|
||
|
||
for j, bj := range b {
|
||
if i+j > 3 {
|
||
break
|
||
}
|
||
b0, b2 := bj&mask32, bj>>32
|
||
|
||
off := 4 * (i + j)
|
||
buff[off+0] += a0 * b0
|
||
buff[off+1] += a1 * b0
|
||
buff[off+2] += a2*b0 + a0*b2
|
||
buff[off+3] += a3*b0 + a1*b2
|
||
buff[off+4] += a2 * b2
|
||
buff[off+5] += a3 * b2
|
||
}
|
||
}
|
||
|
||
for i := uint(1); i < 4; i++ {
|
||
shift := 16 * i
|
||
|
||
var head, carry uint64
|
||
for j := uint(0); j < 4; j++ {
|
||
block := 4 * j
|
||
|
||
xi := buff[block]
|
||
yi := (buff[block+i] << shift) + head
|
||
zi := xi + yi + carry
|
||
buff[block] = zi
|
||
carry = (xi&yi | (xi|yi)&^zi) >> 63
|
||
|
||
head = buff[block+i] >> (64 - shift)
|
||
}
|
||
}
|
||
|
||
return [4]uint64{buff[0], buff[4], buff[8], buff[12]}
|
||
}
|
||
|
||
// gfpMul implements the Montgomery multiplication of a*b, i.e.,
|
||
// c = a*b*R^{-1} mod p
|
||
//
|
||
// Let T = a*b = T_h*R + T_l, then
|
||
//
|
||
// a*b = T_h*R + T_l
|
||
// = T_h*R + T_l + (T_l*np mod R)*P mod P
|
||
// (For np*P = -1 mod R, so T_l + (T_l*np mod R)*P = 0 mod R.)
|
||
// = higher parts of T + (T_l*np mod R)*P
|
||
func gfpMul(c, a, b *gfP) {
|
||
T := mul(*a, *b)
|
||
m := halfMul([4]uint64{T[0], T[1], T[2], T[3]}, np) // m = T_l *np mod R
|
||
t := mul([4]uint64{m[0], m[1], m[2], m[3]}, p2) // t = (T_l*np mod R)*P
|
||
|
||
// (T, carry) = a*b and (c, carry) = a*b/R
|
||
// T[0:4] must be 0.
|
||
var carry uint64
|
||
for i, Ti := range T {
|
||
ti := t[i]
|
||
zi := Ti + ti + carry
|
||
T[i] = zi
|
||
carry = (Ti&ti | (Ti|ti)&^zi) >> 63
|
||
}
|
||
|
||
*c = gfP{T[4], T[5], T[6], T[7]}
|
||
// TODO: can c >= p?
|
||
gfpCarry(c, carry)
|
||
}
|