2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
+ #include "asm_riscv64.h"
5
6
#include "go_asm.h"
6
7
#include "textflag.h"
7
8
@@ -11,12 +12,14 @@ TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
11
12
// X12 = b_cap (unused)
12
13
// X13 = byte to find
13
14
AND $0xff , X13, X12 // x12 byte to look for
14
- MOV X10, X13 // store base for later
15
15
16
16
SLTI $24 , X11, X14
17
- ADD X10, X11 // end
18
- BEQZ X14, bigBody
17
+ BNEZ X14, small
18
+ JMP indexByteBig<>(SB)
19
19
20
+ small:
21
+ MOV X10, X13 // store base for later
22
+ ADD X10, X11 // end
20
23
SUB $1 , X10
21
24
loop:
22
25
ADD $1 , X10
@@ -31,21 +34,19 @@ notfound:
31
34
MOV $-1 , X10
32
35
RET
33
36
34
- bigBody:
35
- JMP indexByteBig<>(SB)
36
-
37
37
TEXT ·IndexByteString<ABIInternal> (SB),NOSPLIT,$0 -32
38
38
// X10 = b_base
39
39
// X11 = b_len
40
40
// X12 = byte to find
41
-
42
41
AND $0xff , X12 // x12 byte to look for
43
- MOV X10, X13 // store base for later
44
42
45
43
SLTI $24 , X11, X14
46
- ADD X10, X11 // end
47
- BEQZ X14, bigBody
44
+ BNEZ X14, small
45
+ JMP indexByteBig<>(SB)
48
46
47
+ small:
48
+ MOV X10, X13 // store base for later
49
+ ADD X10, X11 // end
49
50
SUB $1 , X10
50
51
loop:
51
52
ADD $1 , X10
@@ -60,20 +61,41 @@ notfound:
60
61
MOV $-1 , X10
61
62
RET
62
63
63
- bigBody:
64
- JMP indexByteBig<>(SB)
65
-
66
64
TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0
67
- // On entry
65
+ // On entry:
68
66
// X10 = b_base
69
- // X11 = end
67
+ // X11 = b_len (at least 16 bytes)
70
68
// X12 = byte to find
71
- // X13 = b_base
72
- // X11 is at least 16 bytes > X10
73
-
74
- // On exit
69
+ // On exit:
75
70
// X10 = index of first instance of sought byte, if found, or -1 otherwise
76
71
72
+ MOV X10, X13 // store base for later
73
+
74
+ #ifndef hasV
75
+ MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
76
+ BEQZ X5, indexbyte_scalar
77
+ #endif
78
+
79
+ PCALIGN $16
80
+ vector_loop:
81
+ VSETVLI X11, E8, M8, TA, MA, X5
82
+ VLE8V (X10), V8
83
+ VMSEQVX X12, V8, V0
84
+ VFIRSTM V0, X6
85
+ BGEZ X6, vector_found
86
+ ADD X5, X10
87
+ SUB X5, X11
88
+ BNEZ X11, vector_loop
89
+ JMP notfound
90
+
91
+ vector_found:
92
+ SUB X13, X10
93
+ ADD X6, X10
94
+ RET
95
+
96
+ indexbyte_scalar:
97
+ ADD X10, X11 // end
98
+
77
99
// Process the first few bytes until we get to an 8 byte boundary
78
100
// No need to check for end here as we have at least 16 bytes in
79
101
// the buffer.
0 commit comments