@@ -79,9 +79,6 @@ public static nint Count<T>(ref T r0, nint length, T value)
79
79
/// Implements <see cref="Count{T}"/> with a sequential search.
80
80
/// </summary>
81
81
[ Pure ]
82
- #if NETCOREAPP3_1
83
- [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
84
- #endif
85
82
private static nint CountSequential < T > ( ref T r0 , nint length , T value )
86
83
where T : IEquatable < T >
87
84
{
@@ -132,9 +129,6 @@ private static nint CountSequential<T>(ref T r0, nint length, T value)
132
129
/// Implements <see cref="Count{T}"/> with a vectorized search.
133
130
/// </summary>
134
131
[ Pure ]
135
- #if NETCOREAPP3_1
136
- [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
137
- #endif
138
132
private static nint CountSimd < T > ( ref T r0 , nint length , T value )
139
133
where T : unmanaged, IEquatable < T >
140
134
{
@@ -161,6 +155,67 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
161
155
162
156
var partials = Vector < T > . Zero ;
163
157
158
+ // Unrolled vectorized loop, with 8 unrolled iterations. We only run this when the
159
+ // current type T is at least 2 bytes in size, otherwise the average chunk length
160
+ // would always be too small to be able to trigger the unrolled loop, and the overall
161
+ // performance would just be slightly worse due to the additional conditional branches.
162
+ if ( typeof ( T ) != typeof ( sbyte ) )
163
+ {
164
+ while ( chunkLength >= Vector < T > . Count * 8 )
165
+ {
166
+ ref T ri0 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 0 ) ) ;
167
+ var vi0 = Unsafe . As < T , Vector < T > > ( ref ri0 ) ;
168
+ var ve0 = Vector . Equals ( vi0 , vc ) ;
169
+
170
+ partials -= ve0 ;
171
+
172
+ ref T ri1 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 1 ) ) ;
173
+ var vi1 = Unsafe . As < T , Vector < T > > ( ref ri1 ) ;
174
+ var ve1 = Vector . Equals ( vi1 , vc ) ;
175
+
176
+ partials -= ve1 ;
177
+
178
+ ref T ri2 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 2 ) ) ;
179
+ var vi2 = Unsafe . As < T , Vector < T > > ( ref ri2 ) ;
180
+ var ve2 = Vector . Equals ( vi2 , vc ) ;
181
+
182
+ partials -= ve2 ;
183
+
184
+ ref T ri3 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 3 ) ) ;
185
+ var vi3 = Unsafe . As < T , Vector < T > > ( ref ri3 ) ;
186
+ var ve3 = Vector . Equals ( vi3 , vc ) ;
187
+
188
+ partials -= ve3 ;
189
+
190
+ ref T ri4 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 4 ) ) ;
191
+ var vi4 = Unsafe . As < T , Vector < T > > ( ref ri4 ) ;
192
+ var ve4 = Vector . Equals ( vi4 , vc ) ;
193
+
194
+ partials -= ve4 ;
195
+
196
+ ref T ri5 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 5 ) ) ;
197
+ var vi5 = Unsafe . As < T , Vector < T > > ( ref ri5 ) ;
198
+ var ve5 = Vector . Equals ( vi5 , vc ) ;
199
+
200
+ partials -= ve5 ;
201
+
202
+ ref T ri6 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 6 ) ) ;
203
+ var vi6 = Unsafe . As < T , Vector < T > > ( ref ri6 ) ;
204
+ var ve6 = Vector . Equals ( vi6 , vc ) ;
205
+
206
+ partials -= ve6 ;
207
+
208
+ ref T ri7 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 7 ) ) ;
209
+ var vi7 = Unsafe . As < T , Vector < T > > ( ref ri7 ) ;
210
+ var ve7 = Vector . Equals ( vi7 , vc ) ;
211
+
212
+ partials -= ve7 ;
213
+
214
+ chunkLength -= Vector < T > . Count * 8 ;
215
+ offset += Vector < T > . Count * 8 ;
216
+ }
217
+ }
218
+
164
219
while ( chunkLength >= Vector < T > . Count )
165
220
{
166
221
ref T ri = ref Unsafe . Add ( ref r0 , offset ) ;
@@ -242,28 +297,22 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
242
297
private static unsafe nint GetUpperBound < T > ( )
243
298
where T : unmanaged
244
299
{
245
- if ( typeof ( T ) == typeof ( byte ) ||
246
- typeof ( T ) == typeof ( sbyte ) ||
247
- typeof ( T ) == typeof ( bool ) )
300
+ if ( typeof ( T ) == typeof ( sbyte ) )
248
301
{
249
302
return sbyte . MaxValue ;
250
303
}
251
304
252
- if ( typeof ( T ) == typeof ( char ) ||
253
- typeof ( T ) == typeof ( ushort ) ||
254
- typeof ( T ) == typeof ( short ) )
305
+ if ( typeof ( T ) == typeof ( short ) )
255
306
{
256
307
return short . MaxValue ;
257
308
}
258
309
259
- if ( typeof ( T ) == typeof ( int ) ||
260
- typeof ( T ) == typeof ( uint ) )
310
+ if ( typeof ( T ) == typeof ( int ) )
261
311
{
262
312
return int . MaxValue ;
263
313
}
264
314
265
- if ( typeof ( T ) == typeof ( long ) ||
266
- typeof ( T ) == typeof ( ulong ) )
315
+ if ( typeof ( T ) == typeof ( long ) )
267
316
{
268
317
if ( sizeof ( nint ) == sizeof ( int ) )
269
318
{
0 commit comments