1
+ #pragma warning disable SYSLIB5003
2
+
3
+ using System ;
4
+ using System . Numerics ;
5
+ using System . Linq ;
6
+ using System . Diagnostics ;
7
+ using System . Runtime . Intrinsics ;
8
+ using System . Runtime . Intrinsics . Arm ;
9
+ using BenchmarkDotNet . Attributes ;
10
+ using BenchmarkDotNet . Extensions ;
11
+ using BenchmarkDotNet . Configs ;
12
+ using BenchmarkDotNet . Filters ;
13
+ using MicroBenchmarks ;
14
+
15
+ namespace SveBenchmarks
16
+ {
17
+ [ BenchmarkCategory ( Categories . Runtime ) ]
18
+ [ OperatingSystemsArchitectureFilter ( allowed : true , System . Runtime . InteropServices . Architecture . Arm64 ) ]
19
+ [ Config ( typeof ( Config ) ) ]
20
+ public class StrCmp
21
+ {
22
+ private class Config : ManualConfig
23
+ {
24
+ public Config ( )
25
+ {
26
+ AddFilter ( new SimpleFilter ( _ => Sve . IsSupported ) ) ;
27
+ }
28
+ }
29
+
30
+ [ Params ( 15 , 127 , 527 , 10015 ) ]
31
+ public int Size ;
32
+
33
+ [ Params ( "Middle" , "End" , "None" ) ]
34
+ public string Modify ;
35
+
36
+ private byte [ ] _arr1 , _arr2 ;
37
+
38
+ [ GlobalSetup ]
39
+ public virtual void Setup ( )
40
+ {
41
+ _arr1 = ValuesGenerator . Array < byte > ( Size ) ;
42
+ _arr2 = ValuesGenerator . Array < byte > ( Size ) ;
43
+
44
+ switch ( Modify )
45
+ {
46
+ case "Middle" :
47
+ // modify arr1 value in the middle of the array
48
+ _arr1 [ Size / 2 ] += 1 ;
49
+ break ;
50
+
51
+ case "End" :
52
+ // modify arr2 value near the end of the array
53
+ _arr2 [ Size - 1 ] += 1 ;
54
+ break ;
55
+
56
+ case "None" :
57
+ // keep both arrays equal
58
+ break ;
59
+ }
60
+ }
61
+
62
+ [ Benchmark ]
63
+ public int Scalar ( )
64
+ {
65
+ if ( _arr1 . Length == _arr2 . Length )
66
+ {
67
+ for ( int i = 0 ; i < Size ; i ++ )
68
+ {
69
+ if ( _arr1 [ i ] != _arr2 [ i ] )
70
+ return _arr1 [ i ] - _arr2 [ i ] ;
71
+ }
72
+
73
+ return 0 ;
74
+ }
75
+
76
+ Debug . Assert ( false , "Different array lengths are not expected" ) ;
77
+ return 0 ;
78
+ }
79
+
80
+ [ Benchmark ]
81
+ public int Vector128StrCmp ( )
82
+ {
83
+ int incr = Vector128 < byte > . Count ;
84
+ int i = 0 ;
85
+
86
+ if ( _arr1 . Length == _arr2 . Length )
87
+ {
88
+ for ( ; i <= Size - incr ; i += incr )
89
+ {
90
+ Vector128 < byte > arr1_vals = Vector128 . LoadUnsafe ( ref _arr1 [ i ] ) ;
91
+ Vector128 < byte > arr2_vals = Vector128 . LoadUnsafe ( ref _arr2 [ i ] ) ;
92
+
93
+ bool allEqual = Vector128 . EqualsAll ( arr1_vals , arr2_vals ) ;
94
+
95
+ if ( ! allEqual )
96
+ {
97
+ break ;
98
+ }
99
+ }
100
+
101
+ // fall back to scalar for remaining values
102
+ for ( ; i < Size ; i ++ )
103
+ {
104
+ if ( _arr1 [ i ] != _arr2 [ i ] )
105
+ return _arr1 [ i ] - _arr2 [ i ] ;
106
+ }
107
+ return 0 ;
108
+ }
109
+
110
+ Debug . Assert ( false , "Different array lengths are not expected" ) ;
111
+ return 0 ;
112
+ }
113
+
114
+
115
+ [ Benchmark ]
116
+ public unsafe long SveStrCmp ( )
117
+ {
118
+ if ( Sve . IsSupported )
119
+ {
120
+ int i = 0 ;
121
+ int elemsInVector = ( int ) Sve . Count8BitElements ( ) ;
122
+
123
+ Vector < byte > ptrue = Sve . CreateTrueMaskByte ( ) ;
124
+ Vector < byte > pLoop = ( Vector < byte > ) Sve . CreateWhileLessThanMask8Bit ( i , Size ) ;
125
+ Vector < byte > cmp = Vector < byte > . Zero ;
126
+ Vector < byte > arr1_data , arr2_data ;
127
+
128
+ if ( _arr1 . Length == _arr2 . Length )
129
+ {
130
+ fixed ( byte * arr1_ptr = _arr1 , arr2_ptr = _arr2 )
131
+ {
132
+ while ( Sve . TestFirstTrue ( ptrue , pLoop ) )
133
+ {
134
+ arr1_data = Sve . LoadVector ( pLoop , arr1_ptr + i ) ;
135
+ arr2_data = Sve . LoadVector ( pLoop , arr2_ptr + i ) ;
136
+
137
+ // stop if any values arent equal
138
+ cmp = Sve . CompareNotEqualTo ( arr1_data , arr2_data ) ;
139
+
140
+ if ( Sve . TestAnyTrue ( ptrue , cmp ) )
141
+ break ;
142
+
143
+ i += elemsInVector ;
144
+
145
+ pLoop = ( Vector < byte > ) Sve . CreateWhileLessThanMask8Bit ( i , Size ) ;
146
+ }
147
+
148
+ // create a bitmask to find position of changed value
149
+ int mask = 0 ;
150
+ for ( int j = 0 ; j < elemsInVector ; j ++ )
151
+ {
152
+ // set bits in lanes with non zero elements
153
+ if ( cmp . GetElement ( j ) != 0 )
154
+ mask |= ( 1 << j ) ;
155
+ }
156
+
157
+ int zeroCount = BitOperations . TrailingZeroCount ( mask ) ;
158
+
159
+ if ( zeroCount < elemsInVector )
160
+ return _arr1 [ i + zeroCount ] - _arr2 [ i + zeroCount ] ;
161
+
162
+ return 0 ;
163
+ }
164
+ }
165
+
166
+ Debug . Assert ( false , "Different array lengths are not expected" ) ;
167
+ return 0 ;
168
+ }
169
+ return 0 ;
170
+ }
171
+
172
+ [ Benchmark ]
173
+ public unsafe long SveTail ( )
174
+ {
175
+ if ( Sve . IsSupported )
176
+ {
177
+ Vector < byte > ptrue = Sve . CreateTrueMaskByte ( ) ;
178
+ Vector < byte > cmp ;
179
+ Vector < byte > arr1_data , arr2_data ;
180
+
181
+ int i = 0 ;
182
+ int elemsInVector = ( int ) Sve . Count8BitElements ( ) ;
183
+
184
+ if ( _arr1 . Length == _arr2 . Length )
185
+ {
186
+ fixed ( byte * arr1_ptr = _arr1 , arr2_ptr = _arr2 )
187
+ {
188
+ for ( ; i <= Size - elemsInVector ; i += elemsInVector )
189
+ {
190
+ arr1_data = Sve . LoadVector ( ptrue , arr1_ptr + i ) ;
191
+ arr2_data = Sve . LoadVector ( ptrue , arr2_ptr + i ) ;
192
+
193
+ cmp = Sve . CompareNotEqualTo ( arr1_data , arr2_data ) ;
194
+
195
+ byte allEqual = ( byte ) Sve . AddAcross ( cmp ) . ToScalar ( ) ;
196
+
197
+ if ( allEqual > 0 )
198
+ {
199
+ break ;
200
+ }
201
+ }
202
+
203
+ for ( ; i < Size ; i ++ )
204
+ {
205
+ if ( _arr1 [ i ] != _arr2 [ i ] )
206
+ return _arr1 [ i ] - _arr2 [ i ] ;
207
+ }
208
+
209
+ return 0 ;
210
+ }
211
+ }
212
+
213
+ Debug . Assert ( false , "Different array lengths are not expected" ) ;
214
+ return 0 ;
215
+ }
216
+
217
+ return 0 ;
218
+ }
219
+ }
220
+ }
221
+
222
+ #pragma warning restore SYSLIB5003
0 commit comments