init: v1.0.0
This commit is contained in:
@@ -0,0 +1,280 @@
|
||||
/**
|
||||
Encrypt 4 block of sm4. Use AESNI opcode.
|
||||
|
||||
TODO: MOVUPS or MOVOU?
|
||||
*/
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA zero<>+0(SB)/8, $0x0
|
||||
DATA zero<>+8(SB)/8, $0x0
|
||||
GLOBL zero<>(SB), NOPTR, $16
|
||||
|
||||
// no need, use shufps instead
|
||||
// DATA ext<>+0(SB)/8, $0x0302010003020100
|
||||
// DATA ext<>+8(SB)/8, $0x0302010003020100
|
||||
// GLOBL ext<>(SB), NOPTR, $16
|
||||
|
||||
DATA l64<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA l64<>+8(SB)/8, $0x0706050403020100
|
||||
GLOBL l64<>(SB), NOPTR, $16
|
||||
|
||||
DATA e00l<>+0(SB)/8, $0xffffffff00010203
|
||||
DATA e00l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e00l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e01l<>+0(SB)/8, $0x00010203ffffffff
|
||||
DATA e01l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e01l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e02l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e02l<>+8(SB)/8, $0xffffffff00010203
|
||||
GLOBL e02l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e03l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e03l<>+8(SB)/8, $0x00010203ffffffff
|
||||
GLOBL e03l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e10l<>+0(SB)/8, $0xffffffff04050607
|
||||
DATA e10l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e10l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e11l<>+0(SB)/8, $0x04050607ffffffff
|
||||
DATA e11l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e11l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e12l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e12l<>+8(SB)/8, $0xffffffff04050607
|
||||
GLOBL e12l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e13l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e13l<>+8(SB)/8, $0x04050607ffffffff
|
||||
GLOBL e13l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e20l<>+0(SB)/8, $0xffffffff08090a0b
|
||||
DATA e20l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e20l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e21l<>+0(SB)/8, $0x08090a0bffffffff
|
||||
DATA e21l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e21l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e22l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e22l<>+8(SB)/8, $0xffffffff08090a0b
|
||||
GLOBL e22l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e23l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e23l<>+8(SB)/8, $0x08090a0bffffffff
|
||||
GLOBL e23l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e30l<>+0(SB)/8, $0xffffffff0c0d0e0f
|
||||
DATA e30l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e30l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e31l<>+0(SB)/8, $0x0c0d0e0fffffffff
|
||||
DATA e31l<>+8(SB)/8, $0xffffffffffffffff
|
||||
GLOBL e31l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e32l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e32l<>+8(SB)/8, $0xffffffff0c0d0e0f
|
||||
GLOBL e32l<>(SB), NOPTR, $16
|
||||
|
||||
DATA e33l<>+0(SB)/8, $0xffffffffffffffff
|
||||
DATA e33l<>+8(SB)/8, $0x0c0d0e0fffffffff
|
||||
GLOBL e33l<>(SB), NOPTR, $16
|
||||
|
||||
|
||||
DATA c0f<>+0(SB)/8, $0x0F0F0F0F0F0F0F0F
|
||||
DATA c0f<>+8(SB)/8, $0x0F0F0F0F0F0F0F0F
|
||||
|
||||
DATA flp<>+0(SB)/8, $0x0405060700010203
|
||||
DATA flp<>+8(SB)/8, $0x0C0D0E0F08090A0B
|
||||
|
||||
DATA shr<>+0(SB)/8, $0x0B0E0104070A0D00
|
||||
DATA shr<>+8(SB)/8, $0x0306090C0F020508
|
||||
|
||||
DATA m1l<>+0(SB)/8, $0x9197E2E474720701
|
||||
DATA m1l<>+8(SB)/8, $0xC7C1B4B222245157
|
||||
|
||||
DATA m1h<>+0(SB)/8, $0xE240AB09EB49A200
|
||||
DATA m1h<>+8(SB)/8, $0xF052B91BF95BB012
|
||||
|
||||
DATA m2l<>+0(SB)/8, $0x5B67F2CEA19D0834
|
||||
DATA m2l<>+8(SB)/8, $0xEDD14478172BBE82
|
||||
|
||||
DATA m2h<>+0(SB)/8, $0xAE7201DD73AFDC00
|
||||
DATA m2h<>+8(SB)/8, $0x11CDBE62CC1063BF
|
||||
|
||||
DATA r08<>+0(SB)/8, $0x0605040702010003
|
||||
DATA r08<>+8(SB)/8, $0x0E0D0C0F0A09080B
|
||||
|
||||
DATA r16<>+0(SB)/8, $0x0504070601000302
|
||||
DATA r16<>+8(SB)/8, $0x0D0C0F0E09080B0A
|
||||
|
||||
DATA r24<>+0(SB)/8, $0x0407060500030201
|
||||
DATA r24<>+8(SB)/8, $0x0C0F0E0D080B0A09
|
||||
|
||||
|
||||
GLOBL c0f<>(SB), NOPTR, $16
|
||||
GLOBL flp<>(SB), NOPTR, $16
|
||||
GLOBL shr<>(SB), NOPTR, $16
|
||||
GLOBL m1l<>(SB), NOPTR, $16
|
||||
GLOBL m1h<>(SB), NOPTR, $16
|
||||
GLOBL m2l<>(SB), NOPTR, $16
|
||||
GLOBL m2h<>(SB), NOPTR, $16
|
||||
GLOBL r08<>(SB), NOPTR, $16
|
||||
GLOBL r16<>(SB), NOPTR, $16
|
||||
GLOBL r24<>(SB), NOPTR, $16
|
||||
|
||||
|
||||
#define transposeLoad0(reg, begin, X, XT)\
|
||||
MOVL +(begin+0)(reg), AX\
|
||||
MOVL +(begin+16)(reg), BX\
|
||||
MOVL +(begin+32)(reg), CX\
|
||||
MOVL +(begin+48)(reg), DX\
|
||||
SHLQ $32, DX\
|
||||
XORQ CX, DX\
|
||||
SHLQ $32, BX\
|
||||
XORQ AX, BX\
|
||||
MOVQ DX, X\
|
||||
MOVQ BX, XT\
|
||||
PSHUFB l64<>(SB), X\
|
||||
PXOR XT, X
|
||||
|
||||
#define transposeLoad(X,mask0,mask1,mask2,mask3)\
|
||||
MOVOU X8, X4\
|
||||
MOVOU X9, X5\
|
||||
MOVOU X10, X6\
|
||||
MOVOU X11, X7\
|
||||
PSHUFB mask0, X4\
|
||||
PSHUFB mask1, X5\
|
||||
PSHUFB mask2, X6\
|
||||
PSHUFB mask3, X7\
|
||||
PXOR X5,X4\
|
||||
PXOR X6,X4\
|
||||
PXOR X7,X4\
|
||||
MOVOU X4, X
|
||||
|
||||
#define storeTranspose(mask0,mask1,mask2,mask3, begin)\
|
||||
MOVOU X3, X4\
|
||||
MOVOU X2, X5\
|
||||
MOVOU X1, X6\
|
||||
MOVOU X0, X7\
|
||||
PSHUFB mask0, X4\
|
||||
PSHUFB mask1, X5\
|
||||
PSHUFB mask2, X6\
|
||||
PSHUFB mask3, X7\
|
||||
PXOR X5,X4\
|
||||
PXOR X6,X4\
|
||||
PXOR X7,X4\
|
||||
MOVOU X4, +(begin)(DI)\
|
||||
|
||||
// func encrypt4(dst []byte, src []byte, rk []uint32)
|
||||
TEXT ·encrypt4(SB),NOSPLIT,$0
|
||||
MOVQ dst+0(FP), DI
|
||||
MOVQ src+24(FP), SI
|
||||
|
||||
// transposeLoad0(SI, 0,X0, X4)
|
||||
// transposeLoad0(SI, 4,X1, X4)
|
||||
// transposeLoad0(SI, 8,X2, X4)
|
||||
// transposeLoad0(SI, 12,X3, X4)
|
||||
// PSHUFB flp<>(SB), X0 // to bigendian
|
||||
// PSHUFB flp<>(SB), X1
|
||||
// PSHUFB flp<>(SB), X2
|
||||
// PSHUFB flp<>(SB), X3
|
||||
|
||||
// X8-X11 only available in 64 bits mode.
|
||||
MOVOU +(+0)(SI), X8
|
||||
MOVOU +(+16)(SI), X9
|
||||
MOVOU +(+32)(SI), X10
|
||||
MOVOU +(+48)(SI), X11
|
||||
transposeLoad(X0,e00l<>(SB),e01l<>(SB),e02l<>(SB),e03l<>(SB))
|
||||
transposeLoad(X1,e10l<>(SB),e11l<>(SB),e12l<>(SB),e13l<>(SB))
|
||||
transposeLoad(X2,e20l<>(SB),e21l<>(SB),e22l<>(SB),e23l<>(SB))
|
||||
transposeLoad(X3,e30l<>(SB),e31l<>(SB),e32l<>(SB),e33l<>(SB))
|
||||
|
||||
LOOP_BEGIN:
|
||||
MOVQ rk+48(FP), BX
|
||||
XORQ CX, CX
|
||||
LOOP_IF:
|
||||
CMPQ CX, $32
|
||||
JL LOOP_BODY
|
||||
JMP LOOP_END
|
||||
LOOP_BODY:
|
||||
INCQ CX
|
||||
MOVL (BX), X4
|
||||
SHUFPS $0,X4,X4
|
||||
ADDQ $4, BX
|
||||
|
||||
PXOR X1,X4
|
||||
PXOR X2,X4
|
||||
PXOR X3,X4
|
||||
|
||||
// inner affine
|
||||
MOVUPS X4,X5
|
||||
PAND c0f<>(SB), X5
|
||||
MOVUPS m1l<>(SB), X6
|
||||
PSHUFB X5, X6
|
||||
MOVUPS X6,X5
|
||||
PSRLQ $4, X4
|
||||
PAND c0f<>(SB), X4
|
||||
MOVUPS m1h<>(SB), X6
|
||||
PSHUFB X4,X6
|
||||
MOVUPS X6, X4
|
||||
PXOR X5,X4
|
||||
|
||||
PSHUFB shr<>(SB), X4 // invert ShiftRows
|
||||
AESENCLAST zero<>(SB), X4
|
||||
|
||||
// outer affine
|
||||
MOVUPS X4,X5
|
||||
PAND c0f<>(SB), X5
|
||||
MOVUPS m2l<>(SB), X6
|
||||
PSHUFB X5, X6
|
||||
MOVUPS X6,X5
|
||||
PSRLQ $4, X4
|
||||
PAND c0f<>(SB), X4
|
||||
MOVUPS m2h<>(SB), X6
|
||||
PSHUFB X4,X6
|
||||
MOVUPS X6, X4
|
||||
PXOR X5,X4
|
||||
|
||||
// 4 parallel L1 linear transforms
|
||||
MOVUPS X4,X5
|
||||
MOVUPS X4,X6
|
||||
PSHUFB r08<>(SB), X5
|
||||
PSHUFB r16<>(SB), X6
|
||||
PXOR X4,X5
|
||||
PXOR X6,X5
|
||||
MOVUPS X5,X6
|
||||
PSLLL $2, X6
|
||||
PSRLL $30, X5
|
||||
PXOR X6,X5
|
||||
MOVUPS X4,X6
|
||||
PSHUFB r24<>(SB), X6
|
||||
PXOR X6,X4
|
||||
PXOR X5,X4
|
||||
|
||||
// shift
|
||||
PXOR X0, X4
|
||||
MOVUPS X1,X0
|
||||
MOVUPS X2,X1
|
||||
MOVUPS X3,X2
|
||||
MOVUPS X4,X3
|
||||
JMP LOOP_IF
|
||||
LOOP_END:
|
||||
|
||||
// to little endian, this is done by storeTranspose
|
||||
// PSHUFB flp<>(SB), X0
|
||||
// PSHUFB flp<>(SB), X1
|
||||
// PSHUFB flp<>(SB), X2
|
||||
// PSHUFB flp<>(SB), X3
|
||||
|
||||
// Move X3 || X2 || X1 || X0 to DI and transpose back
|
||||
storeTranspose(e00l<>(SB),e01l<>(SB),e02l<>(SB),e03l<>(SB),0)
|
||||
storeTranspose(e10l<>(SB),e11l<>(SB),e12l<>(SB),e13l<>(SB),16)
|
||||
storeTranspose(e20l<>(SB),e21l<>(SB),e22l<>(SB),e23l<>(SB),32)
|
||||
storeTranspose(e30l<>(SB),e31l<>(SB),e32l<>(SB),e33l<>(SB),48)
|
||||
RET
|
||||
|
||||
|
||||
Reference in New Issue
Block a user