Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 30 additions & 25 deletions common/bitutil/bitutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,33 +105,15 @@ func safeANDBytes(dst, a, b []byte) int {

// ORBytes ors the bytes in a and b. The destination is assumed to have enough
// space. Returns the number of bytes or'd.
//
// dst and x or y may overlap exactly or not at all,
// otherwise ORBytes may panic.
func ORBytes(dst, a, b []byte) int {
if supportsUnaligned {
return fastORBytes(dst, a, b)
}
return safeORBytes(dst, a, b)
}

// fastORBytes ors in bulk. It only works on architectures that support
// unaligned read/writes.
func fastORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
w := n / wordSize
if w > 0 {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
for i := 0; i < w; i++ {
dw[i] = aw[i] | bw[i]
}
}
for i := n - n%wordSize; i < n; i++ {
dst[i] = a[i] | b[i]
n := min(len(a), len(b))
if inexactOverlap(dst[:n], a[:n]) || inexactOverlap(dst[:n], b[:n]) {
panic("ORBytes: invalid overlap")
}
return n
return orBytes(dst, a, b)
}

// safeORBytes ors one by one. It works on all architectures, independent if
Expand Down Expand Up @@ -186,3 +168,26 @@ func safeTestBytes(p []byte) bool {
}
return false
}

// anyOverlap reports whether x and y share memory at any (not necessarily
// corresponding) index. The memory beyond the slice length is ignored.
// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L13-L17
func anyOverlap(x, y []byte) bool {
return len(x) > 0 && len(y) > 0 &&
uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) &&
uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1]))
}

// inexactOverlap reports whether x and y share memory at any non-corresponding
// index. The memory beyond the slice length is ignored. Note that x and y can
// have different lengths and still not have any inexact overlap.
//
// inexactOverlap can be used to implement the requirements of the crypto/cipher
// AEAD, Block, BlockMode and Stream interfaces.
// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L25-L30
func inexactOverlap(x, y []byte) bool {
if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
return false
}
return anyOverlap(x, y)
}
26 changes: 26 additions & 0 deletions common/bitutil/bitutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,32 @@ func TestOR(t *testing.T) {
}
}

func TestORBytesInexactOverlap(t *testing.T) {
shouldPanic := func(f func()) (ok bool) {
defer func() {
if r := recover(); r != nil {
if r.(string) == "ORBytes: invalid overlap" {
ok = true
}
}
}()
f()
return
}
a := make([]byte, 5)
if ok := shouldPanic(func() {
ORBytes(a[1:4], a[0:3], make([]byte, 3))
}); !ok {
t.Error("expected panic on inexact overlap")
}

if ok := shouldPanic(func() {
ORBytes(a[1:4], make([]byte, 3), a[0:3])
}); !ok {
t.Error("expected panic on inexact overlap")
}
}

// Tests that bit testing works for various alignments.
func TestTest(t *testing.T) {
for align := 0; align < 2; align++ {
Expand Down
59 changes: 59 additions & 0 deletions common/bitutil/or_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s

//go:build !purego

#include "textflag.h"

// func orBytesASM(dst, a, b *byte, n int)
TEXT ·orBytesASM(SB), NOSPLIT, $0
MOVQ dst+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVQ n+24(FP), DX
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
JNZ not_aligned

aligned:
MOVQ $0, AX // position in slices

PCALIGN $16
loop16b:
MOVOU (SI)(AX*1), X0 // OR 16byte forwards.
MOVOU (CX)(AX*1), X1
POR X1, X0
MOVOU X0, (BX)(AX*1)
ADDQ $16, AX
CMPQ DX, AX
JNE loop16b
RET

PCALIGN $16
loop_1b:
SUBQ $1, DX // OR 1byte backwards.
MOVB (SI)(DX*1), DI
MOVB (CX)(DX*1), AX
ORB AX, DI
MOVB DI, (BX)(DX*1)
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
JNZ loop_1b
CMPQ DX, $0 // if len is 0, ret.
JE ret
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
JZ aligned

not_aligned:
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
JNE loop_1b
SUBQ $8, DX // OR 8bytes backwards.
MOVQ (SI)(DX*1), DI
MOVQ (CX)(DX*1), AX
ORQ AX, DI
MOVQ DI, (BX)(DX*1)
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
JGE aligned

ret:
RET
70 changes: 70 additions & 0 deletions common/bitutil/or_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s

//go:build !purego

#include "textflag.h"

// func orBytesASM(dst, a, b *byte, n int)
TEXT ·orBytesASM(SB), NOSPLIT|NOFRAME, $0
MOVD dst+0(FP), R0
MOVD a+8(FP), R1
MOVD b+16(FP), R2
MOVD n+24(FP), R3
CMP $64, R3
BLT tail
loop_64:
VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
VORR V0.B16, V4.B16, V4.B16
VORR V1.B16, V5.B16, V5.B16
VORR V2.B16, V6.B16, V6.B16
VORR V3.B16, V7.B16, V7.B16
VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
SUBS $64, R3
CMP $64, R3
BGE loop_64
tail:
// quick end
CBZ R3, end
TBZ $5, R3, less_than32
VLD1.P 32(R1), [V0.B16, V1.B16]
VLD1.P 32(R2), [V2.B16, V3.B16]
VORR V0.B16, V2.B16, V2.B16
VORR V1.B16, V3.B16, V3.B16
VST1.P [V2.B16, V3.B16], 32(R0)
less_than32:
TBZ $4, R3, less_than16
LDP.P 16(R1), (R11, R12)
LDP.P 16(R2), (R13, R14)
ORR R11, R13, R13
ORR R12, R14, R14
STP.P (R13, R14), 16(R0)
less_than16:
TBZ $3, R3, less_than8
MOVD.P 8(R1), R11
MOVD.P 8(R2), R12
ORR R11, R12, R12
MOVD.P R12, 8(R0)
less_than8:
TBZ $2, R3, less_than4
MOVWU.P 4(R1), R13
MOVWU.P 4(R2), R14
ORRW R13, R14, R14
MOVWU.P R14, 4(R0)
less_than4:
TBZ $1, R3, less_than2
MOVHU.P 2(R1), R15
MOVHU.P 2(R2), R16
ORRW R15, R16, R16
MOVHU.P R16, 2(R0)
less_than2:
TBZ $0, R3, end
MOVBU (R1), R17
MOVBU (R2), R19
ORRW R17, R19, R19
MOVBU R19, (R0)
end:
RET
17 changes: 17 additions & 0 deletions common/bitutil/or_asm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go

//go:build (amd64 || arm64) && !purego

package bitutil

func orBytes(dst, a, b []byte) int {
n := min(len(a), len(b))
orBytesASM(&dst[0], &a[0], &b[0], n)
return len(a)
}

//go:noescape
func orBytesASM(dst, a, b *byte, n int)
38 changes: 38 additions & 0 deletions common/bitutil/or_generic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (!amd64 && !arm64) || purego

package bitutil

import "unsafe"

func orBytes(dst, a, b []byte) int {
if supportsUnaligned {
return fastORBytes(dst, a, b)
}
return safeORBytes(dst, a, b)
}

// fastORBytes ors in bulk. It only works on architectures that support
// unaligned read/writes.
func fastORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
w := n / wordSize
if w > 0 {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
for i := 0; i < w; i++ {
dw[i] = aw[i] | bw[i]
}
}
for i := n - n%wordSize; i < n; i++ {
dst[i] = a[i] | b[i]
}
return n
}