Skip to content

Commit dff466f

Browse files
committed
common/bitutil: optimize TestBytes
1 parent 5d51208 commit dff466f

File tree

6 files changed

+243
-25
lines changed

6 files changed

+243
-25
lines changed

common/bitutil/and_amd64.s

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s
5+
6+
//go:build !purego
7+
8+
#include "textflag.h"
9+
10+
// func andBytesASM(dst, a, b *byte, n int)
11+
TEXT ·andBytesASM(SB), NOSPLIT, $0
12+
MOVQ dst+0(FP), BX
13+
MOVQ a+8(FP), SI
14+
MOVQ b+16(FP), CX
15+
MOVQ n+24(FP), DX
16+
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
17+
JNZ not_aligned
18+
19+
aligned:
20+
MOVQ $0, AX // position in slices
21+
22+
PCALIGN $16
23+
loop16b:
24+
MOVOU (SI)(AX*1), X0 // AND 16byte forwards.
25+
MOVOU (CX)(AX*1), X1
26+
PAND X1, X0
27+
MOVOU X0, (BX)(AX*1)
28+
ADDQ $16, AX
29+
CMPQ DX, AX
30+
JNE loop16b
31+
RET
32+
33+
PCALIGN $16
34+
loop_1b:
35+
SUBQ $1, DX // AND 1byte backwards.
36+
MOVB (SI)(DX*1), DI
37+
MOVB (CX)(DX*1), AX
38+
ANDB AX, DI
39+
MOVB DI, (BX)(DX*1)
40+
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
41+
JNZ loop_1b
42+
CMPQ DX, $0 // if len is 0, ret.
43+
JE ret
44+
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
45+
JZ aligned
46+
47+
not_aligned:
48+
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
49+
JNE loop_1b
50+
SUBQ $8, DX // AND 8bytes backwards.
51+
MOVQ (SI)(DX*1), DI
52+
MOVQ (CX)(DX*1), AX
53+
ANDQ AX, DI
54+
MOVQ DI, (BX)(DX*1)
55+
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
56+
JGE aligned
57+
58+
ret:
59+
RET

common/bitutil/bitutil.go

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -149,31 +149,7 @@ func safeORBytes(dst, a, b []byte) int {
149149

150150
// TestBytes tests whether any bit is set in the input byte slice.
151151
func TestBytes(p []byte) bool {
152-
if supportsUnaligned {
153-
return fastTestBytes(p)
154-
}
155-
return safeTestBytes(p)
156-
}
157-
158-
// fastTestBytes tests for set bits in bulk. It only works on architectures that
159-
// support unaligned read/writes.
160-
func fastTestBytes(p []byte) bool {
161-
n := len(p)
162-
w := n / wordSize
163-
if w > 0 {
164-
pw := *(*[]uintptr)(unsafe.Pointer(&p))
165-
for i := 0; i < w; i++ {
166-
if pw[i] != 0 {
167-
return true
168-
}
169-
}
170-
}
171-
for i := n - n%wordSize; i < n; i++ {
172-
if p[i] != 0 {
173-
return true
174-
}
175-
}
176-
return false
152+
return testBytes(p)
177153
}
178154

179155
// safeTestBytes tests for set bits one byte at a time. It works on all

common/bitutil/test_amd64.s

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s
5+
6+
//go:build !purego
7+
8+
#include "textflag.h"
9+
10+
// func testBytesASM(p *byte, n int) bool
11+
TEXT ·testBytesASM(SB), NOSPLIT, $0
12+
MOVQ p+0(FP), SI
13+
MOVQ n+8(FP), DX
14+
TESTQ DX, DX // if len is 0, return false
15+
JZ not_found
16+
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
17+
JNZ not_aligned
18+
19+
aligned:
20+
MOVQ $0, AX // position in slice
21+
22+
PCALIGN $16
23+
loop16b:
24+
MOVOU (SI)(AX*1), X0 // Load 16 bytes
25+
PTEST X0, X0 // Test if all bits are zero (ZF=1 if all zero)
26+
JNZ found // If any bit is set (ZF=0), jump to found
27+
ADDQ $16, AX
28+
CMPQ DX, AX
29+
JNE loop16b
30+
JMP not_found
31+
32+
PCALIGN $16
33+
loop_1b:
34+
SUBQ $1, DX // Test 1 byte backwards.
35+
MOVB (SI)(DX*1), DI
36+
TESTB DI, DI // Test if byte is non-zero
37+
JNZ found
38+
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
39+
JNZ loop_1b
40+
CMPQ DX, $0 // if len is 0, ret.
41+
JE not_found
42+
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
43+
JZ aligned
44+
45+
not_aligned:
46+
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
47+
JNE loop_1b
48+
SUBQ $8, DX // Test 8 bytes backwards.
49+
MOVQ (SI)(DX*1), DI
50+
TESTQ DI, DI // Test if 8 bytes are non-zero
51+
JNZ found
52+
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
53+
JGE aligned
54+
JMP not_found
55+
56+
not_found:
57+
MOVB $0, ret+16(FP)
58+
RET
59+
60+
found:
61+
MOVB $1, ret+16(FP)
62+
RET
63+

common/bitutil/test_arm64.s

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright 2020 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s
5+
6+
//go:build !purego
7+
8+
#include "textflag.h"
9+
10+
// func testBytesASM(p *byte, n int) bool
11+
TEXT ·testBytesASM(SB), NOSPLIT|NOFRAME, $0
12+
MOVD p+0(FP), R0
13+
MOVD n+8(FP), R1
14+
CMP $64, R1
15+
BLT tail
16+
loop_64:
17+
VLD1.P 64(R0), [V0.B16, V1.B16, V2.B16, V3.B16]
18+
// OR all vectors together to check if any byte is non-zero
19+
VORR V0.B16, V1.B16, V4.B16
20+
VORR V2.B16, V3.B16, V5.B16
21+
VORR V4.B16, V5.B16, V6.B16
22+
// Check if any byte in V6 is non-zero by checking both 64-bit halves
23+
VMOV V6.D[0], R2
24+
VMOV V6.D[1], R3
25+
ORR R2, R3, R2
26+
CBNZ R2, found
27+
SUBS $64, R1
28+
CMP $64, R1
29+
BGE loop_64
30+
tail:
31+
// quick end
32+
CBZ R1, not_found
33+
TBZ $5, R1, less_than32
34+
VLD1.P 32(R0), [V0.B16, V1.B16]
35+
VORR V0.B16, V1.B16, V2.B16
36+
VMOV V2.D[0], R2
37+
VMOV V2.D[1], R3
38+
ORR R2, R3, R2
39+
CBNZ R2, found
40+
less_than32:
41+
TBZ $4, R1, less_than16
42+
LDP.P 16(R0), (R11, R12)
43+
ORR R11, R12, R2
44+
CBNZ R2, found
45+
less_than16:
46+
TBZ $3, R1, less_than8
47+
MOVD.P 8(R0), R11
48+
CBNZ R11, found
49+
less_than8:
50+
TBZ $2, R1, less_than4
51+
MOVWU.P 4(R0), R11
52+
CBNZ R11, found
53+
less_than4:
54+
TBZ $1, R1, less_than2
55+
MOVHU.P 2(R0), R11
56+
CBNZ R11, found
57+
less_than2:
58+
TBZ $0, R1, not_found
59+
MOVBU (R0), R11
60+
CBNZ R11, found
61+
not_found:
62+
MOVD $0, R0
63+
MOVB R0, ret+16(FP)
64+
RET
65+
found:
66+
MOVD $1, R0
67+
MOVB R0, ret+16(FP)
68+
RET

common/bitutil/test_asm.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go
5+
6+
//go:build (amd64 || arm64) && !purego
7+
8+
package bitutil
9+
10+
func testBytes(p []byte) bool {
11+
return testBytesASM(&p[0], len(p))
12+
}
13+
14+
//go:noescape
15+
func testBytesASM(p *byte, n int) bool

common/bitutil/test_generic.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2013 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build (!amd64 && !arm64) || purego
6+
7+
package bitutil
8+
9+
import "unsafe"
10+
11+
func TestBytes(p []byte) bool {
12+
if supportsUnaligned {
13+
return fastTestBytes(p)
14+
}
15+
return safeTestBytes(p)
16+
}
17+
18+
// fastTestBytes tests for set bits in bulk. It only works on architectures that
19+
// support unaligned read/writes.
20+
func fastTestBytes(p []byte) bool {
21+
n := len(p)
22+
w := n / wordSize
23+
if w > 0 {
24+
pw := *(*[]uintptr)(unsafe.Pointer(&p))
25+
for i := 0; i < w; i++ {
26+
if pw[i] != 0 {
27+
return true
28+
}
29+
}
30+
}
31+
for i := n - n%wordSize; i < n; i++ {
32+
if p[i] != 0 {
33+
return true
34+
}
35+
}
36+
return false
37+
}

0 commit comments

Comments
 (0)