init: v1.0.0
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
package xor
|
||||
|
||||
var XorBytes = xorBytes
|
||||
@@ -0,0 +1,27 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xor
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
_ = dst[n-1]
|
||||
xorBytesSSE2(&dst[0], &a[0], &b[0], n) // amd64 must have SSE2
|
||||
return n
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
xorBytes(dst, a, b)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorBytesSSE2(dst, a, b *byte, n int)
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytesSSE2(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVQ n+24(FP), DX
|
||||
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, AX // position in slices
|
||||
|
||||
loop16b:
|
||||
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
|
||||
MOVOU (CX)(AX*1), X1
|
||||
PXOR X1, X0
|
||||
MOVOU X0, (BX)(AX*1)
|
||||
ADDQ $16, AX
|
||||
CMPQ DX, AX
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
SUBQ $1, DX // XOR 1byte backwards.
|
||||
MOVB (SI)(DX*1), DI
|
||||
MOVB (CX)(DX*1), AX
|
||||
XORB AX, DI
|
||||
MOVB DI, (BX)(DX*1)
|
||||
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
|
||||
JNZ loop_1b
|
||||
CMPQ DX, $0 // if len is 0, ret.
|
||||
JE ret
|
||||
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
|
||||
JNE loop_1b
|
||||
SUBQ $8, DX // XOR 8bytes backwards.
|
||||
MOVQ (SI)(DX*1), DI
|
||||
MOVQ (CX)(DX*1), AX
|
||||
XORQ AX, DI
|
||||
MOVQ DI, (BX)(DX*1)
|
||||
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
|
||||
JGE aligned
|
||||
|
||||
ret:
|
||||
RET
|
||||
@@ -0,0 +1,29 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xor
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
// make sure dst has enough space
|
||||
_ = dst[n-1]
|
||||
|
||||
xorBytesARM64(&dst[0], &a[0], &b[0], n)
|
||||
return n
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
xorBytes(dst, a, b)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorBytesARM64(dst, a, b *byte, n int)
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytesARM64(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytesARM64(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD dst+0(FP), R0
|
||||
MOVD a+8(FP), R1
|
||||
MOVD b+16(FP), R2
|
||||
MOVD n+24(FP), R3
|
||||
CMP $64, R3
|
||||
BLT tail
|
||||
loop_64:
|
||||
VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
|
||||
VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
|
||||
VEOR V0.B16, V4.B16, V4.B16
|
||||
VEOR V1.B16, V5.B16, V5.B16
|
||||
VEOR V2.B16, V6.B16, V6.B16
|
||||
VEOR V3.B16, V7.B16, V7.B16
|
||||
VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
|
||||
SUBS $64, R3
|
||||
CMP $64, R3
|
||||
BGE loop_64
|
||||
tail:
|
||||
// quick end
|
||||
CBZ R3, end
|
||||
TBZ $5, R3, less_than32
|
||||
VLD1.P 32(R1), [V0.B16, V1.B16]
|
||||
VLD1.P 32(R2), [V2.B16, V3.B16]
|
||||
VEOR V0.B16, V2.B16, V2.B16
|
||||
VEOR V1.B16, V3.B16, V3.B16
|
||||
VST1.P [V2.B16, V3.B16], 32(R0)
|
||||
less_than32:
|
||||
TBZ $4, R3, less_than16
|
||||
LDP.P 16(R1), (R11, R12)
|
||||
LDP.P 16(R2), (R13, R14)
|
||||
EOR R11, R13, R13
|
||||
EOR R12, R14, R14
|
||||
STP.P (R13, R14), 16(R0)
|
||||
less_than16:
|
||||
TBZ $3, R3, less_than8
|
||||
MOVD.P 8(R1), R11
|
||||
MOVD.P 8(R2), R12
|
||||
EOR R11, R12, R12
|
||||
MOVD.P R12, 8(R0)
|
||||
less_than8:
|
||||
TBZ $2, R3, less_than4
|
||||
MOVWU.P 4(R1), R13
|
||||
MOVWU.P 4(R2), R14
|
||||
EORW R13, R14, R14
|
||||
MOVWU.P R14, 4(R0)
|
||||
less_than4:
|
||||
TBZ $1, R3, less_than2
|
||||
MOVHU.P 2(R1), R15
|
||||
MOVHU.P 2(R2), R16
|
||||
EORW R15, R16, R16
|
||||
MOVHU.P R16, 2(R0)
|
||||
less_than2:
|
||||
TBZ $0, R3, end
|
||||
MOVBU (R1), R17
|
||||
MOVBU (R2), R19
|
||||
EORW R17, R19, R19
|
||||
MOVBU R19, (R0)
|
||||
end:
|
||||
RET
|
||||
@@ -0,0 +1,92 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !amd64 && !ppc64 && !ppc64le && !arm64
|
||||
// +build !amd64,!ppc64,!ppc64le,!arm64
|
||||
|
||||
package xor
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
switch {
|
||||
case supportsUnaligned:
|
||||
fastXORBytes(dst, a, b, n)
|
||||
default:
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastXORBytes. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
safeXORBytes(dst, a, b, n)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||
|
||||
// fastXORBytes xors in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
// n needs to be smaller or equal than the length of a and b.
|
||||
func fastXORBytes(dst, a, b []byte, n int) {
|
||||
// Assert dst has enough space
|
||||
_ = dst[n-1]
|
||||
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
for i := 0; i < w; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
for i := (n - n%wordSize); i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
|
||||
// n needs to be smaller or equal than the length of a and b.
|
||||
func safeXORBytes(dst, a, b []byte, n int) {
|
||||
for i := 0; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The arguments are assumed to be of equal length.
|
||||
func fastXORWords(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
for i := 0; i < n; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The slice arguments a and b are assumed to be of equal length.
|
||||
func xorWords(dst, a, b []byte) {
|
||||
if supportsUnaligned {
|
||||
fastXORWords(dst, a, b)
|
||||
} else {
|
||||
safeXORBytes(dst, a, b, len(b))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
package xor
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
_ = dst[n-1]
|
||||
xorBytesVSX(&dst[0], &a[0], &b[0], n)
|
||||
return n
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
xorBytes(dst, a, b)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorBytesVSX(dst, a, b *byte, n int)
|
||||
@@ -0,0 +1,87 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ppc64 ppc64le
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytesVSX(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytesVSX(SB), NOSPLIT, $0
|
||||
MOVD dst+0(FP), R3 // R3 = dst
|
||||
MOVD a+8(FP), R4 // R4 = a
|
||||
MOVD b+16(FP), R5 // R5 = b
|
||||
MOVD n+24(FP), R6 // R6 = n
|
||||
|
||||
CMPU R6, $32, CR7 // Check if n ≥ 32 bytes
|
||||
MOVD R0, R8 // R8 = index
|
||||
CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes
|
||||
BLT CR6, small // Smaller than 8
|
||||
BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes
|
||||
|
||||
// Case for n ≥ 32 bytes
|
||||
preloop32:
|
||||
SRD $5, R6, R7 // Setup loop counter
|
||||
MOVD R7, CTR
|
||||
MOVD $16, R10
|
||||
ANDCC $31, R6, R9 // Check for tailing bytes for later
|
||||
loop32:
|
||||
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
|
||||
LXVD2X (R4)(R10), VS34
|
||||
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
|
||||
LXVD2X (R5)(R10), VS35
|
||||
XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[]
|
||||
XXLXOR VS34, VS35, VS34
|
||||
STXVD2X VS32, (R3)(R8) // Store to dst
|
||||
STXVD2X VS34, (R3)(R10)
|
||||
ADD $32, R8 // Update index
|
||||
ADD $32, R10
|
||||
BC 16, 0, loop32 // bdnz loop16
|
||||
|
||||
BEQ CR0, done
|
||||
|
||||
MOVD R9, R6
|
||||
CMP R6, $8
|
||||
BLT small
|
||||
xor16:
|
||||
CMP R6, $16
|
||||
BLT xor8
|
||||
LXVD2X (R4)(R8), VS32
|
||||
LXVD2X (R5)(R8), VS33
|
||||
XXLXOR VS32, VS33, VS32
|
||||
STXVD2X VS32, (R3)(R8)
|
||||
ADD $16, R8
|
||||
ADD $-16, R6
|
||||
CMP R6, $8
|
||||
BLT small
|
||||
xor8:
|
||||
// Case for 8 ≤ n < 16 bytes
|
||||
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
|
||||
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
|
||||
XOR R14, R15, R16 // R16 = a[] ^ b[]
|
||||
SUB $8, R6 // n = n - 8
|
||||
MOVD R16, (R3)(R8) // Store to dst
|
||||
ADD $8, R8
|
||||
|
||||
// Check if we're finished
|
||||
CMP R6, R0
|
||||
BGT small
|
||||
RET
|
||||
|
||||
// Case for n < 8 bytes and tailing bytes from the
|
||||
// previous cases.
|
||||
small:
|
||||
CMP R6, R0
|
||||
BEQ done
|
||||
MOVD R6, CTR // Setup loop counter
|
||||
|
||||
loop:
|
||||
MOVBZ (R4)(R8), R14 // R14 = a[i]
|
||||
MOVBZ (R5)(R8), R15 // R15 = b[i]
|
||||
XOR R14, R15, R16 // R16 = a[i] ^ b[i]
|
||||
MOVB R16, (R3)(R8) // Store to dst
|
||||
ADD $1, R8
|
||||
BC 16, 0, loop // bdnz loop
|
||||
|
||||
done:
|
||||
RET
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xor_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"xdx.jelly/xgcl/internal/xor"
|
||||
)
|
||||
|
||||
func TestXOR(t *testing.T) {
|
||||
for j := 1; j <= 1024; j++ {
|
||||
if testing.Short() && j > 16 {
|
||||
break
|
||||
}
|
||||
for alignP := 0; alignP < 2; alignP++ {
|
||||
for alignQ := 0; alignQ < 2; alignQ++ {
|
||||
for alignD := 0; alignD < 2; alignD++ {
|
||||
p := make([]byte, j)[alignP:]
|
||||
q := make([]byte, j)[alignQ:]
|
||||
d1 := make([]byte, j+alignD)[alignD:]
|
||||
d2 := make([]byte, j+alignD)[alignD:]
|
||||
if _, err := io.ReadFull(rand.Reader, p); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := io.ReadFull(rand.Reader, q); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
xor.XorBytes(d1, p, q)
|
||||
n := min(p, q)
|
||||
for i := 0; i < n; i++ {
|
||||
d2[i] = p[i] ^ q[i]
|
||||
}
|
||||
if !bytes.Equal(d1, d2) {
|
||||
t.Logf("p: %#v", p)
|
||||
t.Logf("q: %#v", q)
|
||||
t.Logf("expect: %#v", d2)
|
||||
t.Logf("result: %#v", d1)
|
||||
t.Fatal("not equal")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func min(a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func BenchmarkXORBytes(b *testing.B) {
|
||||
dst := make([]byte, 1<<15)
|
||||
data0 := make([]byte, 1<<15)
|
||||
data1 := make([]byte, 1<<15)
|
||||
sizes := []int64{1 << 3, 1 << 7, 1 << 11, 1 << 15}
|
||||
for _, size := range sizes {
|
||||
b.Run(fmt.Sprintf("%dBytes", size), func(b *testing.B) {
|
||||
s0 := data0[:size]
|
||||
s1 := data1[:size]
|
||||
b.SetBytes(int64(size))
|
||||
for i := 0; i < b.N; i++ {
|
||||
xor.XorBytes(dst, s0, s1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user