diff --git a/common/bitutil/bitutil.go b/common/bitutil/bitutil.go index a18a6d18eed8..cc62633a8fc8 100644 --- a/common/bitutil/bitutil.go +++ b/common/bitutil/bitutil.go @@ -105,33 +105,15 @@ func safeANDBytes(dst, a, b []byte) int { // ORBytes ors the bytes in a and b. The destination is assumed to have enough // space. Returns the number of bytes or'd. +// +// dst and x or y may overlap exactly or not at all, +// otherwise ORBytes may panic. func ORBytes(dst, a, b []byte) int { - if supportsUnaligned { - return fastORBytes(dst, a, b) - } - return safeORBytes(dst, a, b) -} - -// fastORBytes ors in bulk. It only works on architectures that support -// unaligned read/writes. -func fastORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - w := n / wordSize - if w > 0 { - dw := *(*[]uintptr)(unsafe.Pointer(&dst)) - aw := *(*[]uintptr)(unsafe.Pointer(&a)) - bw := *(*[]uintptr)(unsafe.Pointer(&b)) - for i := 0; i < w; i++ { - dw[i] = aw[i] | bw[i] - } - } - for i := n - n%wordSize; i < n; i++ { - dst[i] = a[i] | b[i] + n := min(len(a), len(b)) + if inexactOverlap(dst[:n], a[:n]) || inexactOverlap(dst[:n], b[:n]) { + panic("ORBytes: invalid overlap") } - return n + return orBytes(dst, a, b) } // safeORBytes ors one by one. It works on all architectures, independent if @@ -186,3 +168,26 @@ func safeTestBytes(p []byte) bool { } return false } + +// anyOverlap reports whether x and y share memory at any (not necessarily +// corresponding) index. The memory beyond the slice length is ignored. +// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L13-L17 +func anyOverlap(x, y []byte) bool { + return len(x) > 0 && len(y) > 0 && + uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) && + uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1])) +} + +// inexactOverlap reports whether x and y share memory at any non-corresponding +// index. The memory beyond the slice length is ignored. Note that x and y can +// have different lengths and still not have any inexact overlap. +// +// inexactOverlap can be used to implement the requirements of the crypto/cipher +// AEAD, Block, BlockMode and Stream interfaces. +// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L25-L30 +func inexactOverlap(x, y []byte) bool { + if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] { + return false + } + return anyOverlap(x, y) +} diff --git a/common/bitutil/bitutil_test.go b/common/bitutil/bitutil_test.go index 12f3fe24a6c9..77d00dfe4d09 100644 --- a/common/bitutil/bitutil_test.go +++ b/common/bitutil/bitutil_test.go @@ -92,6 +92,32 @@ func TestOR(t *testing.T) { } } +func TestORBytesInexactOverlap(t *testing.T) { + shouldPanic := func(f func()) (ok bool) { + defer func() { + if r := recover(); r != nil { + if r.(string) == "ORBytes: invalid overlap" { + ok = true + } + } + }() + f() + return + } + a := make([]byte, 5) + if ok := shouldPanic(func() { + ORBytes(a[1:4], a[0:3], make([]byte, 3)) + }); !ok { + t.Error("expected panic on inexact overlap") + } + + if ok := shouldPanic(func() { + ORBytes(a[1:4], make([]byte, 3), a[0:3]) + }); !ok { + t.Error("expected panic on inexact overlap") + } +} + // Tests that bit testing works for various alignments. func TestTest(t *testing.T) { for align := 0; align < 2; align++ { diff --git a/common/bitutil/or_amd64.s b/common/bitutil/or_amd64.s new file mode 100644 index 000000000000..9c7cd12abb65 --- /dev/null +++ b/common/bitutil/or_amd64.s @@ -0,0 +1,59 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s + +//go:build !purego + +#include "textflag.h" + +// func orBytesASM(dst, a, b *byte, n int) +TEXT ·orBytesASM(SB), NOSPLIT, $0 + MOVQ dst+0(FP), BX + MOVQ a+8(FP), SI + MOVQ b+16(FP), CX + MOVQ n+24(FP), DX + TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. + JNZ not_aligned + +aligned: + MOVQ $0, AX // position in slices + + PCALIGN $16 +loop16b: + MOVOU (SI)(AX*1), X0 // OR 16byte forwards. + MOVOU (CX)(AX*1), X1 + POR X1, X0 + MOVOU X0, (BX)(AX*1) + ADDQ $16, AX + CMPQ DX, AX + JNE loop16b + RET + + PCALIGN $16 +loop_1b: + SUBQ $1, DX // OR 1byte backwards. + MOVB (SI)(DX*1), DI + MOVB (CX)(DX*1), AX + ORB AX, DI + MOVB DI, (BX)(DX*1) + TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. + JNZ loop_1b + CMPQ DX, $0 // if len is 0, ret. + JE ret + TESTQ $15, DX // AND 15 & len, if zero jump to aligned. + JZ aligned + +not_aligned: + TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. + JNE loop_1b + SUBQ $8, DX // OR 8bytes backwards. + MOVQ (SI)(DX*1), DI + MOVQ (CX)(DX*1), AX + ORQ AX, DI + MOVQ DI, (BX)(DX*1) + CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. + JGE aligned + +ret: + RET diff --git a/common/bitutil/or_arm64.s b/common/bitutil/or_arm64.s new file mode 100644 index 000000000000..e6b49a48cc55 --- /dev/null +++ b/common/bitutil/or_arm64.s @@ -0,0 +1,70 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s + +//go:build !purego + +#include "textflag.h" + +// func orBytesASM(dst, a, b *byte, n int) +TEXT ·orBytesASM(SB), NOSPLIT|NOFRAME, $0 + MOVD dst+0(FP), R0 + MOVD a+8(FP), R1 + MOVD b+16(FP), R2 + MOVD n+24(FP), R3 + CMP $64, R3 + BLT tail +loop_64: + VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] + VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16] + VORR V0.B16, V4.B16, V4.B16 + VORR V1.B16, V5.B16, V5.B16 + VORR V2.B16, V6.B16, V6.B16 + VORR V3.B16, V7.B16, V7.B16 + VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0) + SUBS $64, R3 + CMP $64, R3 + BGE loop_64 +tail: + // quick end + CBZ R3, end + TBZ $5, R3, less_than32 + VLD1.P 32(R1), [V0.B16, V1.B16] + VLD1.P 32(R2), [V2.B16, V3.B16] + VORR V0.B16, V2.B16, V2.B16 + VORR V1.B16, V3.B16, V3.B16 + VST1.P [V2.B16, V3.B16], 32(R0) +less_than32: + TBZ $4, R3, less_than16 + LDP.P 16(R1), (R11, R12) + LDP.P 16(R2), (R13, R14) + ORR R11, R13, R13 + ORR R12, R14, R14 + STP.P (R13, R14), 16(R0) +less_than16: + TBZ $3, R3, less_than8 + MOVD.P 8(R1), R11 + MOVD.P 8(R2), R12 + ORR R11, R12, R12 + MOVD.P R12, 8(R0) +less_than8: + TBZ $2, R3, less_than4 + MOVWU.P 4(R1), R13 + MOVWU.P 4(R2), R14 + ORRW R13, R14, R14 + MOVWU.P R14, 4(R0) +less_than4: + TBZ $1, R3, less_than2 + MOVHU.P 2(R1), R15 + MOVHU.P 2(R2), R16 + ORRW R15, R16, R16 + MOVHU.P R16, 2(R0) +less_than2: + TBZ $0, R3, end + MOVBU (R1), R17 + MOVBU (R2), R19 + ORRW R17, R19, R19 + MOVBU R19, (R0) +end: + RET diff --git a/common/bitutil/or_asm.go b/common/bitutil/or_asm.go new file mode 100644 index 000000000000..bb5362010c04 --- /dev/null +++ b/common/bitutil/or_asm.go @@ -0,0 +1,17 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go + +//go:build (amd64 || arm64) && !purego + +package bitutil + +func orBytes(dst, a, b []byte) int { + n := min(len(a), len(b)) + orBytesASM(&dst[0], &a[0], &b[0], n) + return len(a) +} + +//go:noescape +func orBytesASM(dst, a, b *byte, n int) diff --git a/common/bitutil/or_generic.go b/common/bitutil/or_generic.go new file mode 100644 index 000000000000..96f73a0273e5 --- /dev/null +++ b/common/bitutil/or_generic.go @@ -0,0 +1,38 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (!amd64 && !arm64) || purego + +package bitutil + +import "unsafe" + +func orBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastORBytes(dst, a, b) + } + return safeORBytes(dst, a, b) +} + +// fastORBytes ors in bulk. It only works on architectures that support +// unaligned read/writes. +func fastORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] | bw[i] + } + } + for i := n - n%wordSize; i < n; i++ { + dst[i] = a[i] | b[i] + } + return n +}