@@ -161,6 +161,67 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
161
161
162
162
var partials = Vector < T > . Zero ;
163
163
164
+ // Unrolled vectorized loop, with 8 unrolled iterations. We only run this when the
165
+ // current type T is at least 2 bytes in size, otherwise the average chunk length
166
+ // would always be too small to be able to trigger the unrolled loop, and the overall
167
+ // performance would just be slightly worse due to the additional conditional branches.
168
+ if ( typeof ( T ) != typeof ( sbyte ) )
169
+ {
170
+ while ( chunkLength >= Vector < T > . Count * 8 )
171
+ {
172
+ ref T ri0 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 0 ) ) ;
173
+ var vi0 = Unsafe . As < T , Vector < T > > ( ref ri0 ) ;
174
+ var ve0 = Vector . Equals ( vi0 , vc ) ;
175
+
176
+ partials -= ve0 ;
177
+
178
+ ref T ri1 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 1 ) ) ;
179
+ var vi1 = Unsafe . As < T , Vector < T > > ( ref ri1 ) ;
180
+ var ve1 = Vector . Equals ( vi1 , vc ) ;
181
+
182
+ partials -= ve1 ;
183
+
184
+ ref T ri2 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 2 ) ) ;
185
+ var vi2 = Unsafe . As < T , Vector < T > > ( ref ri2 ) ;
186
+ var ve2 = Vector . Equals ( vi2 , vc ) ;
187
+
188
+ partials -= ve2 ;
189
+
190
+ ref T ri3 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 3 ) ) ;
191
+ var vi3 = Unsafe . As < T , Vector < T > > ( ref ri3 ) ;
192
+ var ve3 = Vector . Equals ( vi3 , vc ) ;
193
+
194
+ partials -= ve3 ;
195
+
196
+ ref T ri4 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 4 ) ) ;
197
+ var vi4 = Unsafe . As < T , Vector < T > > ( ref ri4 ) ;
198
+ var ve4 = Vector . Equals ( vi4 , vc ) ;
199
+
200
+ partials -= ve4 ;
201
+
202
+ ref T ri5 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 5 ) ) ;
203
+ var vi5 = Unsafe . As < T , Vector < T > > ( ref ri5 ) ;
204
+ var ve5 = Vector . Equals ( vi5 , vc ) ;
205
+
206
+ partials -= ve5 ;
207
+
208
+ ref T ri6 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 6 ) ) ;
209
+ var vi6 = Unsafe . As < T , Vector < T > > ( ref ri6 ) ;
210
+ var ve6 = Vector . Equals ( vi6 , vc ) ;
211
+
212
+ partials -= ve6 ;
213
+
214
+ ref T ri7 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 7 ) ) ;
215
+ var vi7 = Unsafe . As < T , Vector < T > > ( ref ri7 ) ;
216
+ var ve7 = Vector . Equals ( vi7 , vc ) ;
217
+
218
+ partials -= ve7 ;
219
+
220
+ chunkLength -= Vector < T > . Count * 8 ;
221
+ offset += Vector < T > . Count * 8 ;
222
+ }
223
+ }
224
+
164
225
while ( chunkLength >= Vector < T > . Count )
165
226
{
166
227
ref T ri = ref Unsafe . Add ( ref r0 , offset ) ;
0 commit comments