//go:build (!amd64 && !arm64) || generic // +build !amd64,!arm64 generic package bn256 /* 判断进位的规则 设B=2^w, w = 64 or 32. 假设a op b = c + carry·B. 注:若carry =0 or 1, 则a + b + carry是否产生进位和carry无关。 因为若a+b+carry当carry=0不产生进位,当carry=1产生进位,当且仅当 a+b = B-1, 则a,b中必然有一个数(不妨设为a)的某个非最高比特为0,则 a'=a+carry, a'与a的最高比特相同。因此a'+b 与a+b的进位相同。因此 我们只需要考虑carry为0的情况。 # Add carry如下: a[w-1] 0 0 0 1 1 1 b[w-1] 0 1 1 0 0 1 c[w-1] x 0 1 0 1 x carry 0 1 0 1 0 1 carry>0 <=> a或b都为1(第w-1bit) 或 a,b其中一个为1,但c为0. # Sub carry如下: a[w-1] 0 0 0 1 1 1 b[w-1] 0 0 1 0 1 1 c[w-1] 1 0 x x 0 1 carry 1 0 1 0 0 1 carry = (b&^a | (b|^a)&c) >> 63 carry>0 <=> a为0,b为1 或 (a,b)!=(1,0)并且c=1. */ // gfpCarry compute (a, head) mod p, input (a,head) < 2p // // 先计算 (b,carry) = a - p // // carry head ret // 0(a>p) 0 b // 0 1 b(此情形下,(a,head) > 2p, 不应出现。此时,应再调用一次gfpCarry) // 1(a> 63 } carry = carry &^ head // If b is negative, then return a. // Else return b. carry = -carry ncarry := ^carry for i := 0; i < 4; i++ { a[i] = (a[i] & carry) | (b[i] & ncarry) } } // gfpNeg set c = -a, input a < p func gfpNeg(c, a *gfP) { var carry uint64 for i, pi := range p2 { ai := a[i] ci := pi - ai - carry c[i] = ci carry = (ai&^pi | (ai|^pi)&ci) >> 63 } // FIXME: carry? gfpCarry(c, 0) } // gfpAdd set c = a+b func gfpAdd(c, a, b *gfP) { var carry uint64 for i, ai := range a { bi := b[i] ci := ai + bi + carry c[i] = ci carry = (ai&bi | (ai|bi)&^ci) >> 63 } gfpCarry(c, carry) } func gfpSub(c, a, b *gfP) { t := &gfP{} // t = p-b var carry uint64 for i, pi := range p2 { bi := b[i] ti := pi - bi - carry t[i] = ti carry = (bi&^pi | (bi|^pi)&ti) >> 63 } // c = a+t carry = 0 for i, ai := range a { ti := t[i] ci := ai + ti + carry c[i] = ci carry = (ai&ti | (ai|ti)&^ci) >> 63 } gfpCarry(c, carry) } // mul returns the multiplication of a*b. a,b are no restrictions. func mul(a, b [4]uint64) [8]uint64 { const ( mask16 uint64 = 0x0000ffff mask32 uint64 = 0xffffffff ) // Let B = 2^16, then // buff = buff[0] + buff[1]*B + ... + buff[31]*B^31 var buff [32]uint64 for i, ai := range a { a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48 for j, bj := range b { // compute ai * bj and save to buff[4*(i+j):] // (a0 + a1*B + a2*B^2 + a3*B^3) * (b0 + b2*B^2) // = a0*b0 + a1*b0*B + (a2*b0 + a0*b2)*B^2 + (a1*b2 + a3*b0)*B^3 + a2*b2*B^4 + a3*b2*B^5 b0, b2 := bj&mask32, bj>>32 off := 4 * (i + j) buff[off+0] += a0 * b0 buff[off+1] += a1 * b0 buff[off+2] += a2*b0 + a0*b2 buff[off+3] += a3*b0 + a1*b2 buff[off+4] += a2 * b2 buff[off+5] += a3 * b2 } } // buff: // 0 1 2 3 | 4 5 6 7 | 8 9 10 11 | 12 13 14 15 // 外循环对将1,2,3加到0上 // 内循环处理0,4,8,12... for i := uint(1); i < 4; i++ { shift := 16 * i var head, carry uint64 for j := uint(0); j < 8; j++ { block := 4 * j xi := buff[block] yi := (buff[block+i] << shift) + head zi := xi + yi + carry buff[block] = zi carry = (xi&yi | (xi|yi)&^zi) >> 63 head = buff[block+i] >> (64 - shift) } } return [8]uint64{buff[0], buff[4], buff[8], buff[12], buff[16], buff[20], buff[24], buff[28]} } // halfMul returns a*b mod R, where R = 2^256. func halfMul(a, b [4]uint64) [4]uint64 { const ( mask16 uint64 = 0x0000ffff mask32 uint64 = 0xffffffff ) var buff [18]uint64 for i, ai := range a { a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48 for j, bj := range b { if i+j > 3 { break } b0, b2 := bj&mask32, bj>>32 off := 4 * (i + j) buff[off+0] += a0 * b0 buff[off+1] += a1 * b0 buff[off+2] += a2*b0 + a0*b2 buff[off+3] += a3*b0 + a1*b2 buff[off+4] += a2 * b2 buff[off+5] += a3 * b2 } } for i := uint(1); i < 4; i++ { shift := 16 * i var head, carry uint64 for j := uint(0); j < 4; j++ { block := 4 * j xi := buff[block] yi := (buff[block+i] << shift) + head zi := xi + yi + carry buff[block] = zi carry = (xi&yi | (xi|yi)&^zi) >> 63 head = buff[block+i] >> (64 - shift) } } return [4]uint64{buff[0], buff[4], buff[8], buff[12]} } // gfpMul implements the Montgomery multiplication of a*b, i.e., // c = a*b*R^{-1} mod p // // Let T = a*b = T_h*R + T_l, then // // a*b = T_h*R + T_l // = T_h*R + T_l + (T_l*np mod R)*P mod P // (For np*P = -1 mod R, so T_l + (T_l*np mod R)*P = 0 mod R.) // = higher parts of T + (T_l*np mod R)*P func gfpMul(c, a, b *gfP) { T := mul(*a, *b) m := halfMul([4]uint64{T[0], T[1], T[2], T[3]}, np) // m = T_l *np mod R t := mul([4]uint64{m[0], m[1], m[2], m[3]}, p2) // t = (T_l*np mod R)*P // (T, carry) = a*b and (c, carry) = a*b/R // T[0:4] must be 0. var carry uint64 for i, Ti := range T { ti := t[i] zi := Ti + ti + carry T[i] = zi carry = (Ti&ti | (Ti|ti)&^zi) >> 63 } *c = gfP{T[4], T[5], T[6], T[7]} // TODO: can c >= p? gfpCarry(c, carry) }