Files
2026-05-27 23:03:00 +08:00

815 lines
22 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains the Go wrapper for the constant-time, 64-bit assembly
// implementation of P256. The optimizations performed here are described in
// detail in:
// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
// 256-bit primes"
// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
// https://eprint.iacr.org/2013/816.pdf
//go:build (arm64 || amd64) && !generic && !generic32 && !generic64
// +build arm64 amd64
// +build !generic
// +build !generic32
// +build !generic64
package ec256
import (
"crypto/elliptic"
"math/big"
)
const (
// montgomery of one: 1*R mod p
montOne0 = 0x0000000000000001
montOne1 = 0x00000000ffffffff
montOne2 = 0x0000000000000000
montOne3 = 0x0000000100000000
// montgomery of base point:
montBaseX0 = 0x61328990f418029e
montBaseX1 = 0x3e7981eddca6c050
montBaseX2 = 0xd6a1ed99ac24c3c3
montBaseX3 = 0x91167a5ee1c13b05
montBaseY0 = 0xc1354e593c2d0ddd
montBaseY1 = 0xc1f5e5788d3295fa
montBaseY2 = 0x8d4cfb066e2a48f8
montBaseY3 = 0x63cd65d481d735bd
// R*R mod n
rrModN0 = 0x901192af7c114f20
rrModN1 = 0x3464504ade6fa2fa
rrModN2 = 0x620fc84c3affe0d4
rrModN3 = 0x1eb5e412a22b3d3b
// R*R mod p
rrModP0 = 0x0000000200000003
rrModP1 = 0x00000002ffffffff
rrModP2 = 0x0000000100000001
rrModP3 = 0x0000000400000002
)
// c256Point Jacobian represent of a point with x,y,z in Montgomery domain
type c256Point struct {
xyz [12]uint64
}
var (
c256Precomputed *[43][32 * 8]uint64
)
func init() {
initTable()
}
func (curve SM2CurveParam) Params() *elliptic.CurveParams {
return curve.CurveParams
}
//go:noescape
// func c256Add(res, in1, in2 []uint64)
// Functions implemented in c256_asm_*64.s
// Montgomery multiplication modulo P256
//
//go:noescape
func c256Mul(res, in1, in2 []uint64)
// Montgomery square modulo P256, repeated n times (n >= 1)
//
//go:noescape
func c256Sqr(res, in []uint64, n int)
// Montgomery multiplication by 1, montMul(in, 1)
//
//go:noescape
func c256FromMont(res, in []uint64)
// iff cond != 0 val <- -val
//
//go:noescape
func c256NegCond(val []uint64, cond int)
// if cond == 0 res <- b; else res <- a
//
//go:noescape
func c256MovCond(res, a, b []uint64, cond int)
// Endianness swap, 大端表示的32字节转4个小端表示的uint64
//
//go:noescape
func c256BigToLittle(res []uint64, in []byte)
//go:noescape
func c256LittleToBig(res []byte, in []uint64)
// Constant time table access
// idx = 0, returns infinity. idx = i > 0, returns table[i-1].
//
//go:noescape
func c256Select(point, table []uint64, idx int)
//go:noescape
func c256SelectBase(point, table []uint64, idx int)
// Montgomery multiplication modulo Ord(G)
//
//go:noescape
func c256OrdMul(res, in1, in2 []uint64)
// Montgomery square modulo Ord(G), repeated n times
//
//go:noescape
func c256OrdSqr(res, in []uint64, n int)
// Point add with in2 being affine point
// If sign == 1 -> in2 = -in2
// If sel == 0 -> res = in1
// if zero == 0 -> res = in2
//
//go:noescape
func c256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
// Point add. Returns one if the two input points were equal and zero
// otherwise. (Note that, due to the way that the equations work out, some
// representations of ∞ are considered equal to everything by this function.)
//
//go:noescape
func c256PointAddAsm(res, in1, in2 []uint64) int
// Point double
//
//go:noescape
func c256PointDoubleAsm(res, in []uint64)
func c256ToMont(res, in []uint64) {
c256Mul(res, in, rr)
}
// in: k = k0 mod N
// out: k0^{-1} mod N
// use montgomery power: k -> k*R -> k^{N-2}*R -> k^{N-2}
// Done - FIXME, need improve
func (curve SM2CurveParam) Inverse(k *big.Int) *big.Int {
if k.Sign() < 0 {
// This should never happen.
k = new(big.Int).Neg(k)
}
if k.Cmp(c256.N) >= 0 {
// This should never happen.
k = new(big.Int).Mod(k, c256.N)
}
// table will store precomputed powers of x.
var table [4 * 10]uint64
var (
_1 = table[4*0 : 4*1] // 1
_11 = table[4*1 : 4*2] // 3
_101 = table[4*2 : 4*3] // 5
_111 = table[4*3 : 4*4] // 7
_1111 = table[4*4 : 4*5] // 15
_10101 = table[4*5 : 4*6] // 21
_101111 = table[4*6 : 4*7] // 47
x = table[4*7 : 4*8]
t = table[4*8 : 4*9]
s = table[4*9 : 4*10]
)
fromBig(x[:], k)
// This code operates in the Montgomery domain where R = 2^256 mod n
// and n is the order of the scalar field. (See initP256 for the
// value.) Elements in the Montgomery domain take the form a×R and
// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
// i.e. converts x into the Montgomery domain.
// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
RR := []uint64{rrModN0, rrModN1, rrModN2, rrModN3} // sm2-p256
// FIXME: the ladder need improve
// SM2-p256:
// N-2 = 0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54121
c256OrdMul(_1, x, RR) // _1
c256OrdSqr(x, _1, 1) // _10 x=10
c256OrdMul(_11, x, _1) // _11
c256OrdMul(_101, x, _11) // _101
c256OrdMul(_111, x, _101) // _111
c256OrdSqr(x, _101, 1) // _1010 -- x = _1010
c256OrdMul(_1111, _101, x) // _1111
c256OrdSqr(t, x, 1) // _10100 -- t=_10100
c256OrdMul(_10101, t, _1) // _10101
c256OrdSqr(x, _10101, 1) // _101010 -- x=_101010
c256OrdMul(_101111, _101, x) // _101111
c256OrdMul(x, _10101, x) // _111111 = x6 -- x=x6
c256OrdSqr(s, x, 1) // x = _1111110
c256OrdMul(s, s, _1) // x = x7
c256OrdSqr(x, s, 1) // x = _11111110 = 0xfe
c256OrdMul(s, x, _1) // s = x8 = 0xff
c256OrdSqr(t, s, 8) // t=_ff00
c256OrdMul(x, t, x) // x = fffe
c256OrdMul(s, t, s) // s = _ffff
c256OrdSqr(t, s, 16) // t=_ffff0000
c256OrdMul(x, t, x) // x = fffffffe
c256OrdMul(t, x, _1) // t = ffffffff
c256OrdSqr(x, x, 32) // x=_fffffffe00000000
c256OrdMul(x, x, t) // x=_fffffffeffffffff
c256OrdSqr(x, x, 32) // x = _fffffffeffffffff00000000
c256OrdMul(x, x, t) // x= _fffffffeffffffffffffffff
c256OrdSqr(x, x, 32) // x = _fffffffeffffffffffffffff00000000
c256OrdMul(x, x, t) // x = _fffffffeffffffffffffffffffffffff
// 7203df6b21c6052b53bbf40939d54121 =
// 01110010000000111101111101101011001000011100011000000101001010110101001110111011111101000000100100111001110101010100000100100001 =
// 0111 001 00000001111 01111 101
// 101 011 001 0000111 00011
// 000000101 0010101 10101 00111 0111
// 011 1111 01 0000001 001
// 00111 00111 010101 01 000001
// 001 00001
sqrs := []uint8{
4, 3, 11, 5, 3,
3, 3, 3, 7, 5,
9, 7, 5, 5, 4,
3, 4, 2, 7, 3,
5, 5, 6, 2, 6,
3, 5,
}
muls := [][]uint64{
_111, _1, _1111, _1111, _101,
_101, _11, _1, _111, _11,
_101, _10101, _10101, _111, _111,
_11, _1111, _1, _1, _1,
_111, _111, _10101, _1, _1,
_1, _1,
}
for i, s := range sqrs {
c256OrdSqr(x, x, int(s))
c256OrdMul(x, x, muls[i])
}
// Multiplying by one in the Montgomery domain converts a Montgomery
// value out of the domain.
one := []uint64{1, 0, 0, 0}
c256OrdMul(x, x, one)
xOut := make([]byte, 32)
c256LittleToBig(xOut, x)
return new(big.Int).SetBytes(xOut)
}
// fromBig converts a *big.Int into a format used by this code.
func fromBig(out []uint64, big *big.Int) {
for i := range out {
out[i] = 0
}
for i, v := range big.Bits() {
out[i] = uint64(v)
}
}
// c256GetScalar endian-swaps the big-endian scalar value from in and writes it
// to out. If the scalar is equal or greater than the order of the group, it's
// reduced modulo that order.
func c256GetScalar(out []uint64, in []byte) {
n := new(big.Int).SetBytes(in)
if n.Cmp(c256.N) >= 0 {
n.Mod(n, c256.N)
}
fromBig(out, n)
}
// c256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
// underlying field of the curve. (See initP256 for the value.) Thus rr here is
// R×R mod p. See comment in Inverse about how this is used.
var rr = []uint64{rrModP0, rrModP1, rrModP2, rrModP3} //// changed to sm2
// Note: for most time, in < p
func maybeReduceModP(in *big.Int) *big.Int {
if in.Cmp(c256.P) < 0 {
return in
}
return new(big.Int).Mod(in, c256.P)
}
func CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
scalarReversed := make([]uint64, 4)
var r1, r2 c256Point
c256GetScalar(scalarReversed, baseScalar)
r1IsInfinity := scalarIsZero(scalarReversed)
r1.c256BaseMult(scalarReversed)
c256GetScalar(scalarReversed, scalar)
r2IsInfinity := scalarIsZero(scalarReversed)
r2.c256PointFromAffine(bigX, bigY)
r2.c256ScalarMult(scalarReversed)
var sum, double c256Point
pointsEqual := c256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
c256PointDoubleAsm(double.xyz[:], r1.xyz[:])
sum.CopyConditional(&double, pointsEqual)
sum.CopyConditional(&r1, r2IsInfinity)
sum.CopyConditional(&r2, r1IsInfinity)
return sum.c256PointToAffine()
}
func (curve SM2CurveParam) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
// return curve.ScalarMult(curve.Gx, curve.Gy, scalar)
scalarReversed := make([]uint64, 4)
c256GetScalar(scalarReversed, scalar)
var r c256Point
r.c256BaseMult(scalarReversed)
return r.c256PointToAffine()
}
func (curve SM2CurveParam) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
scalarReversed := make([]uint64, 4)
c256GetScalar(scalarReversed, scalar)
var r c256Point
fromBig(r.xyz[0:4], maybeReduceModP(bigX))
fromBig(r.xyz[4:8], maybeReduceModP(bigY))
c256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
c256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
// This sets r2's Z value to 1, in the Montgomery domain.
r.xyz[8] = montOne0
r.xyz[9] = montOne1
r.xyz[10] = montOne2
r.xyz[11] = montOne3
r.c256ScalarMult(scalarReversed)
return r.c256PointToAffine()
}
func (curve SM2CurveParam) Add(x1, y1, x2, y2 *big.Int) (x, y *big.Int) {
var r1, r2 c256Point
r1.c256PointFromAffine(x1, y1)
r2.c256PointFromAffine(x2, y2)
if true {
// in most cases, the input two points are not equal.
// omit the time-attack risk.
if c256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:]) == 1 {
c256PointDoubleAsm(r1.xyz[:], r2.xyz[:])
}
return r1.c256PointToAffine()
} else {
var res, double c256Point
pointEqual := c256PointAddAsm(res.xyz[:], r1.xyz[:], r2.xyz[:])
c256PointDoubleAsm(double.xyz[:], r1.xyz[:])
c256MovCond(res.xyz[:], res.xyz[:], double.xyz[:], pointEqual)
return res.c256PointToAffine()
}
}
func (curve SM2CurveParam) Double(x1, y1 *big.Int) (x, y *big.Int) {
var r c256Point
r.c256PointFromAffine(x1, y1)
c256PointDoubleAsm(r.xyz[:], r.xyz[:])
return r.c256PointToAffine()
}
// uint64IsZero returns 1 if x is zero and zero otherwise.
func uint64IsZero(x uint64) int {
x = ^x
x &= x >> 32
x &= x >> 16
x &= x >> 8
x &= x >> 4
x &= x >> 2
x &= x >> 1
return int(x & 1)
}
// scalarIsZero returns 1 if scalar represents the zero value, and zero
// otherwise.
func scalarIsZero(scalar []uint64) int {
return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
}
// c256PointFromAffine change affine point (x,y) to Montgemery domain
// Jacobian point p
func (p *c256Point) c256PointFromAffine(x, y *big.Int) {
xyz := p.xyz[:]
fromBig(xyz[0:4], maybeReduceModP(x))
fromBig(xyz[4:8], maybeReduceModP(y))
c256Mul(xyz[0:4], xyz[0:4], rr[:])
c256Mul(xyz[4:8], xyz[4:8], rr[:])
xyz[8] = montOne0
xyz[9] = montOne1
xyz[10] = montOne2
xyz[11] = montOne3
}
func (p *c256Point) c256PointToAffine() (x, y *big.Int) {
zInv := make([]uint64, 4)
zInvSq := make([]uint64, 4)
c256Inverse(zInv, p.xyz[8:12])
c256Sqr(zInvSq, zInv, 1)
c256Mul(zInv, zInv, zInvSq)
c256Mul(zInvSq, p.xyz[0:4], zInvSq)
c256Mul(zInv, p.xyz[4:8], zInv)
c256FromMont(zInvSq, zInvSq)
c256FromMont(zInv, zInv)
xOut := make([]byte, 32)
yOut := make([]byte, 32)
c256LittleToBig(xOut, zInvSq)
c256LittleToBig(yOut, zInv)
return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
}
// CopyConditional copies overwrites p with src if v == 1, and leaves p
// unchanged if v == 0.
func (p *c256Point) CopyConditional(src *c256Point, v int) {
pMask := uint64(v) - 1
srcMask := ^pMask
for i, n := range p.xyz {
p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
}
}
// c256Inverse sets out to in^-1 mod p.
// in*R => in^{-1} * R = mont_power(in*R, p-2)
// Tested Done
func c256Inverse(out, in []uint64) {
if false {
var stack [8 * 4]uint64
p2 := stack[4*0 : 4*0+4]
p4 := stack[4*1 : 4*1+4]
p8 := stack[4*2 : 4*2+4]
p16 := stack[4*3 : 4*3+4]
p32 := stack[4*4 : 4*4+4]
p28e := stack[4*5 : 4*6] // fffffffe
p28c := stack[4*6 : 4*7] // fffffffc
t := stack[4*7 : 4*8]
// 0xfffffffe ffffffff ffffffff ffffffff ffffffff 00000000 ffffffff fffffffd
c256Sqr(p28e, in, 1) // 10*p
c256Mul(p2, p28e, in) // 11*p
c256Sqr(t, p2, 2) //1100*p
c256Mul(p4, t, p2) // f*p
c256Sqr(t, p4, 4) // f0*p
c256Mul(p8, t, p4) // ff*p
c256Sqr(t, p8, 8) // ff00*p
c256Mul(p16, t, p8) // ffff*p
c256Sqr(t, p16, 8) // ffff00*p
c256Mul(t, t, p8) // ffffff*p
c256Sqr(t, t, 4) // ffffff0*p
c256Mul(t, t, p4) // fffffff*p
c256Sqr(t, t, 2) // fffffff_(00)*p
c256Mul(t, t, p2) // fffffff_(11)*p
c256Sqr(p28c, t, 2) // fffffffc*p
c256Mul(p28e, p28e, p28c) // fffffffe*p
c256Mul(p32, p28e, in) // ffffffff*p
c256Sqr(t, p28e, 32)
c256Mul(t, t, p32) // fffffffe ffffffff
c256Sqr(t, t, 32)
c256Mul(t, t, p32) // fffffffe ffffffff ffffffff
c256Sqr(t, t, 32)
c256Mul(t, t, p32) // fffffffe ffffffff ffffffff ffffffff
c256Sqr(t, t, 32)
c256Mul(t, t, p32) // fffffffe ffffffff ffffffff ffffffff ffffffff
c256Sqr(t, t, 64)
c256Mul(t, t, p32) // fffffffe ffffffff ffffffff ffffffff ffffffff 00000000 ffffffff
c256Sqr(t, t, 32)
c256Mul(t, t, p28c) // fffffffe ffffffff ffffffff ffffffff ffffffff 00000000 fffffffe
c256Mul(out, t, in) // fffffffe ffffffff ffffffff ffffffff ffffffff 00000000 fffffffd
// total 255 sqr + 16 mul
} else {
var stack [17 * 4]uint64
_10 := stack[4*0 : 4*0+4]
_11 := stack[4*1 : 4*1+4]
_110 := stack[4*2 : 4*2+4]
_111 := stack[4*3 : 4*3+4]
_111000 := stack[4*4 : 4*4+4]
_111111 := stack[4*5 : 4*6] // fffffffe
_1111110 := stack[4*6 : 4*7] // fffffffc
_1111111 := stack[4*7 : 4*8]
x12 := stack[4*8 : 4*9] // _111111111111
x24 := stack[4*9 : 4*10]
x31 := stack[4*10 : 4*11]
i39 := stack[4*11 : 4*12]
i68 := stack[4*12 : 4*13]
x62 := stack[4*13 : 4*14]
i71 := stack[4*14 : 4*15]
x64 := stack[4*15 : 4*16]
i265 := stack[4*16 : 4*17]
c256Sqr(_10, in, 1) // _10 = 2 * 1
c256Mul(_11, _10, in) // _11 = 1 + _10
c256Sqr(_110, _11, 1) // _110 = 2 * _11
c256Mul(_111, _110, in) // _111 = 1 + _110
c256Sqr(_111000, _111, 3) // _111000 = _111 << 3
c256Mul(_111111, _111, _111000) // _111111 = _111 + _111000
c256Sqr(_1111110, _111111, 1) // _1111110 = 2 * _111111
c256Mul(_1111111, _1111110, in) // _1111111 = 1 + _1111110
c256Sqr(x12, _1111110, 5) // x12 = _1111110<<5 + _111111
c256Mul(x12, x12, _111111)
c256Sqr(x24, x12, 12) // x24 = x12<<12 + x12
c256Mul(x24, x24, x12)
c256Sqr(x31, x24, 7) // x31 = x24<<7 + _1111111
c256Mul(x31, x31, _1111111)
c256Sqr(i39, x31, 2) // i39 = x31 << 2
c256Sqr(i68, i39, 29) // i68 = i39 << 29
c256Mul(x62, x31, i68) // x62 = x31 + i68
c256Sqr(i71, i68, 2) // i71 = i68 << 2
c256Mul(x64, i39, i71) // x64 = i39 + i71 + _11
c256Mul(x64, x64, _11)
c256Sqr(i265, i71, 32) // i265 = ((i71<<32+x64)<<64 + x64) << 94
c256Mul(i265, i265, x64)
c256Sqr(i265, i265, 64)
c256Mul(i265, i265, x64)
c256Sqr(i265, i265, 94)
c256Mul(i265, i265, x62) // return (x62+i265)<<2 + 1
c256Sqr(i265, i265, 2)
c256Mul(out, i265, in)
// 255 sqr + 14 mul
}
}
func (p *c256Point) c256StorePoint(r *[16 * 4 * 3]uint64, index int) {
copy(r[index*12:], p.xyz[:])
}
func boothW5(in uint) (int, int) {
var s uint = ^((in >> 5) - 1)
var d uint = (1 << 6) - in - 1
d = (d & s) | (in & (^s))
d = (d >> 1) + (d & 1)
return int(d), int(s & 1)
}
/*
输入in 低7位有效 i0,i1,i2,...,i6
*/
func boothW6(in uint) (int, int) {
if true {
var s uint = ^((in >> 6) - 1)
var d uint = (1 << 7) - in - 1
d = (d & s) | (in & (^s))
d = (d >> 1) + (d & 1)
return int(d), int(s & 1)
} else {
//
var sel, sign uint = 0, 0
in = in & 0x7f // 只取低7位。其中最低位是前一窗口的最高位。
// sign 是第7位
if (in >> 6) == 1 {
sign = 1
} else {
sign = 0
}
if sign == 1 {
sel = in >> 1
sel = (^sel) & 0x3f
sel++
if in&1 == 1 {
sel--
}
} else {
sel = (in + 1) >> 1
}
return int(sel), int(sign)
}
}
func initTable() {
/*
c256Precomputed[i][j] = 2^{6i}*(jG) =
0 1 2 31
0 G [2]G [3]G [32]G
1 [2^{6*1}]G [2^{6*1}][2]G
2 [2^{6*2}]G [2^{6*2}][2]G
·························
42 [2^{6*42}]G
===========================================
1 2 3 ... 32
64 64*2 64*3 64*32
64*64 64*64*2 ...
43*32 =
*/
c256Precomputed = new([43][32 * 8]uint64)
basePoint := []uint64{
montBaseX0, montBaseX1, montBaseX2, montBaseX3,
montBaseY0, montBaseY1, montBaseY2, montBaseY3,
montOne0, montOne1, montOne2, montOne3,
}
t1 := make([]uint64, 12)
t2 := make([]uint64, 12)
copy(t2, basePoint)
zInv := make([]uint64, 4)
zInvSq := make([]uint64, 4)
for j := 0; j < 32; j++ {
copy(t1, t2)
for i := 0; i < 43; i++ {
// The window size is 6 so we need to double 6 times.
if i != 0 {
for k := 0; k < 6; k++ {
c256PointDoubleAsm(t1, t1)
}
}
// Convert the point to affine form. (Its values are
// still in Montgomery form however.)
c256Inverse(zInv, t1[8:12])
c256Sqr(zInvSq, zInv, 1)
c256Mul(zInv, zInv, zInvSq)
c256Mul(t1[:4], t1[:4], zInvSq)
c256Mul(t1[4:8], t1[4:8], zInv)
copy(t1[8:12], basePoint[8:12])
// Update the table entry
copy(c256Precomputed[i][j*8:], t1[:8])
}
if j == 0 {
c256PointDoubleAsm(t2, basePoint)
} else {
c256PointAddAsm(t2, t2, basePoint)
}
}
}
func c256SelectBaseOfGo(point, table []uint64, idx int) {
if false {
c256SelectBase(point, table, idx)
return
} else {
if idx == 0 {
return
}
copy(point[:8], table[8*(idx-1):])
}
}
func (p *c256Point) c256BaseMult(scalar []uint64) {
wvalue := (scalar[0] << 1) & 0x7f
sel, sign := boothW6(uint(wvalue))
c256SelectBase(p.xyz[0:8], c256Precomputed[0][0:], sel)
c256NegCond(p.xyz[4:8], sign)
// (This is one, in the Montgomery domain.)
p.xyz[8] = montOne0
p.xyz[9] = montOne1
p.xyz[10] = montOne2
p.xyz[11] = montOne3
var t0 c256Point
// (This is one, in the Montgomery domain.)
t0.xyz[8] = montOne0
t0.xyz[9] = montOne1
t0.xyz[10] = montOne2
t0.xyz[11] = montOne3
// 191 = 6*31 + 5
index := uint(5)
zero := sel
for i := 1; i < 43; i++ {
if index < 192 {
wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
} else {
wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
}
index += 6
sel, sign = boothW6(uint(wvalue))
c256SelectBase(t0.xyz[0:8], c256Precomputed[i][0:], sel)
c256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
zero |= sel
}
}
func (p *c256Point) c256ScalarMult(scalar []uint64) {
// precomp is a table of precomputed points that stores powers of p
// from p^1 to p^16.
var precomp [16 * 4 * 3]uint64
var t0, t1, t2, t3 c256Point
// Prepare the table
p.c256StorePoint(&precomp, 0) // 1
c256PointDoubleAsm(t0.xyz[:], p.xyz[:])
c256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
c256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
c256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
t0.c256StorePoint(&precomp, 1) // 2
t1.c256StorePoint(&precomp, 3) // 4
t2.c256StorePoint(&precomp, 7) // 8
t3.c256StorePoint(&precomp, 15) // 16
c256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
c256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
c256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
t0.c256StorePoint(&precomp, 2) // 3
t1.c256StorePoint(&precomp, 4) // 5
t2.c256StorePoint(&precomp, 8) // 9
c256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
c256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
t0.c256StorePoint(&precomp, 5) // 6
t1.c256StorePoint(&precomp, 9) // 10
c256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
c256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
t2.c256StorePoint(&precomp, 6) // 7
t1.c256StorePoint(&precomp, 10) // 11
c256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
c256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
t0.c256StorePoint(&precomp, 11) // 12
t2.c256StorePoint(&precomp, 13) // 14
c256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
c256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
t0.c256StorePoint(&precomp, 12) // 13
t2.c256StorePoint(&precomp, 14) // 15
// Start scanning the window from top bit
index := uint(254)
var sel, sign int
wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
sel, _ = boothW5(uint(wvalue))
c256Select(p.xyz[0:12], precomp[0:], sel)
zero := sel
for index > 4 {
index -= 5
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
if index < 192 {
wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
} else {
wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
}
sel, sign = boothW5(uint(wvalue))
c256Select(t0.xyz[0:], precomp[0:], sel)
c256NegCond(t0.xyz[4:8], sign)
c256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
c256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
c256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
zero |= sel
}
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
c256PointDoubleAsm(p.xyz[:], p.xyz[:])
wvalue = (scalar[0] << 1) & 0x3f
sel, sign = boothW5(uint(wvalue))
c256Select(t0.xyz[0:], precomp[0:], sel)
c256NegCond(t0.xyz[4:8], sign)
c256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
c256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
c256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
}