Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions common/bitutil/and_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s

//go:build !purego

#include "textflag.h"

// func andBytesASM(dst, a, b *byte, n int)
TEXT ·andBytesASM(SB), NOSPLIT, $0
MOVQ dst+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVQ n+24(FP), DX
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
JNZ not_aligned

aligned:
MOVQ $0, AX // position in slices

PCALIGN $16
loop16b:
MOVOU (SI)(AX*1), X0 // AND 16byte forwards.
MOVOU (CX)(AX*1), X1
PAND X1, X0
MOVOU X0, (BX)(AX*1)
ADDQ $16, AX
CMPQ DX, AX
JNE loop16b
RET

PCALIGN $16
loop_1b:
SUBQ $1, DX // AND 1byte backwards.
MOVB (SI)(DX*1), DI
MOVB (CX)(DX*1), AX
ANDB AX, DI
MOVB DI, (BX)(DX*1)
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
JNZ loop_1b
CMPQ DX, $0 // if len is 0, ret.
JE ret
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
JZ aligned

not_aligned:
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
JNE loop_1b
SUBQ $8, DX // AND 8bytes backwards.
MOVQ (SI)(DX*1), DI
MOVQ (CX)(DX*1), AX
ANDQ AX, DI
MOVQ DI, (BX)(DX*1)
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
JGE aligned

ret:
RET
26 changes: 1 addition & 25 deletions common/bitutil/bitutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,31 +149,7 @@ func safeORBytes(dst, a, b []byte) int {

// TestBytes tests whether any bit is set in the input byte slice.
func TestBytes(p []byte) bool {
if supportsUnaligned {
return fastTestBytes(p)
}
return safeTestBytes(p)
}

// fastTestBytes tests for set bits in bulk. It only works on architectures that
// support unaligned read/writes.
func fastTestBytes(p []byte) bool {
n := len(p)
w := n / wordSize
if w > 0 {
pw := *(*[]uintptr)(unsafe.Pointer(&p))
for i := 0; i < w; i++ {
if pw[i] != 0 {
return true
}
}
}
for i := n - n%wordSize; i < n; i++ {
if p[i] != 0 {
return true
}
}
return false
return testBytes(p)
}

// safeTestBytes tests for set bits one byte at a time. It works on all
Expand Down
63 changes: 63 additions & 0 deletions common/bitutil/test_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s

//go:build !purego

#include "textflag.h"

// func testBytesASM(p *byte, n int) bool
TEXT ·testBytesASM(SB), NOSPLIT, $0
MOVQ p+0(FP), SI
MOVQ n+8(FP), DX
TESTQ DX, DX // if len is 0, return false
JZ not_found
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
JNZ not_aligned

aligned:
MOVQ $0, AX // position in slice

PCALIGN $16
loop16b:
MOVOU (SI)(AX*1), X0 // Load 16 bytes
PTEST X0, X0 // Test if all bits are zero (ZF=1 if all zero)
JNZ found // If any bit is set (ZF=0), jump to found
ADDQ $16, AX
CMPQ DX, AX
JNE loop16b
JMP not_found

PCALIGN $16
loop_1b:
SUBQ $1, DX // Test 1 byte backwards.
MOVB (SI)(DX*1), DI
TESTB DI, DI // Test if byte is non-zero
JNZ found
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
JNZ loop_1b
CMPQ DX, $0 // if len is 0, ret.
JE not_found
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
JZ aligned

not_aligned:
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
JNE loop_1b
SUBQ $8, DX // Test 8 bytes backwards.
MOVQ (SI)(DX*1), DI
TESTQ DI, DI // Test if 8 bytes are non-zero
JNZ found
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
JGE aligned
JMP not_found

not_found:
MOVB $0, ret+16(FP)
RET

found:
MOVB $1, ret+16(FP)
RET

68 changes: 68 additions & 0 deletions common/bitutil/test_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s

//go:build !purego

#include "textflag.h"

// func testBytesASM(p *byte, n int) bool
TEXT ·testBytesASM(SB), NOSPLIT|NOFRAME, $0
MOVD p+0(FP), R0
MOVD n+8(FP), R1
CMP $64, R1
BLT tail
loop_64:
VLD1.P 64(R0), [V0.B16, V1.B16, V2.B16, V3.B16]
// OR all vectors together to check if any byte is non-zero
VORR V0.B16, V1.B16, V4.B16
VORR V2.B16, V3.B16, V5.B16
VORR V4.B16, V5.B16, V6.B16
// Check if any byte in V6 is non-zero by checking both 64-bit halves
VMOV V6.D[0], R2
VMOV V6.D[1], R3
ORR R2, R3, R2
CBNZ R2, found
SUBS $64, R1
CMP $64, R1
BGE loop_64
tail:
// quick end
CBZ R1, not_found
TBZ $5, R1, less_than32
VLD1.P 32(R0), [V0.B16, V1.B16]
VORR V0.B16, V1.B16, V2.B16
VMOV V2.D[0], R2
VMOV V2.D[1], R3
ORR R2, R3, R2
CBNZ R2, found
less_than32:
TBZ $4, R1, less_than16
LDP.P 16(R0), (R11, R12)
ORR R11, R12, R2
CBNZ R2, found
less_than16:
TBZ $3, R1, less_than8
MOVD.P 8(R0), R11
CBNZ R11, found
less_than8:
TBZ $2, R1, less_than4
MOVWU.P 4(R0), R11
CBNZ R11, found
less_than4:
TBZ $1, R1, less_than2
MOVHU.P 2(R0), R11
CBNZ R11, found
less_than2:
TBZ $0, R1, not_found
MOVBU (R0), R11
CBNZ R11, found
not_found:
MOVD $0, R0
MOVB R0, ret+16(FP)
RET
found:
MOVD $1, R0
MOVB R0, ret+16(FP)
RET
15 changes: 15 additions & 0 deletions common/bitutil/test_asm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go

//go:build (amd64 || arm64) && !purego

package bitutil

func testBytes(p []byte) bool {
return testBytesASM(&p[0], len(p))
}

//go:noescape
func testBytesASM(p *byte, n int) bool
37 changes: 37 additions & 0 deletions common/bitutil/test_generic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (!amd64 && !arm64) || purego

package bitutil

import "unsafe"

func testBytes(p []byte) bool {
if supportsUnaligned {
return fastTestBytes(p)
}
return safeTestBytes(p)
}

// fastTestBytes tests for set bits in bulk. It only works on architectures that
// support unaligned read/writes.
func fastTestBytes(p []byte) bool {
n := len(p)
w := n / wordSize
if w > 0 {
pw := *(*[]uintptr)(unsafe.Pointer(&p))
for i := 0; i < w; i++ {
if pw[i] != 0 {
return true
}
}
}
for i := n - n%wordSize; i < n; i++ {
if p[i] != 0 {
return true
}
}
return false
}