@@ -79,9 +79,6 @@ public static nint Count<T>(ref T r0, nint length, T value)
7979 /// Implements <see cref="Count{T}"/> with a sequential search.
8080 /// </summary>
8181 [ Pure ]
82- #if NETCOREAPP3_1
83- [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
84- #endif
8582 private static nint CountSequential < T > ( ref T r0 , nint length , T value )
8683 where T : IEquatable < T >
8784 {
@@ -132,9 +129,6 @@ private static nint CountSequential<T>(ref T r0, nint length, T value)
132129 /// Implements <see cref="Count{T}"/> with a vectorized search.
133130 /// </summary>
134131 [ Pure ]
135- #if NETCOREAPP3_1
136- [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
137- #endif
138132 private static nint CountSimd < T > ( ref T r0 , nint length , T value )
139133 where T : unmanaged, IEquatable < T >
140134 {
@@ -161,6 +155,67 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
161155
162156 var partials = Vector < T > . Zero ;
163157
158+ // Unrolled vectorized loop, with 8 unrolled iterations. We only run this when the
159+ // current type T is at least 2 bytes in size, otherwise the average chunk length
160+ // would always be too small to be able to trigger the unrolled loop, and the overall
161+ // performance would just be slightly worse due to the additional conditional branches.
162+ if ( typeof ( T ) != typeof ( sbyte ) )
163+ {
164+ while ( chunkLength >= Vector < T > . Count * 8 )
165+ {
166+ ref T ri0 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 0 ) ) ;
167+ var vi0 = Unsafe . As < T , Vector < T > > ( ref ri0 ) ;
168+ var ve0 = Vector . Equals ( vi0 , vc ) ;
169+
170+ partials -= ve0 ;
171+
172+ ref T ri1 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 1 ) ) ;
173+ var vi1 = Unsafe . As < T , Vector < T > > ( ref ri1 ) ;
174+ var ve1 = Vector . Equals ( vi1 , vc ) ;
175+
176+ partials -= ve1 ;
177+
178+ ref T ri2 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 2 ) ) ;
179+ var vi2 = Unsafe . As < T , Vector < T > > ( ref ri2 ) ;
180+ var ve2 = Vector . Equals ( vi2 , vc ) ;
181+
182+ partials -= ve2 ;
183+
184+ ref T ri3 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 3 ) ) ;
185+ var vi3 = Unsafe . As < T , Vector < T > > ( ref ri3 ) ;
186+ var ve3 = Vector . Equals ( vi3 , vc ) ;
187+
188+ partials -= ve3 ;
189+
190+ ref T ri4 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 4 ) ) ;
191+ var vi4 = Unsafe . As < T , Vector < T > > ( ref ri4 ) ;
192+ var ve4 = Vector . Equals ( vi4 , vc ) ;
193+
194+ partials -= ve4 ;
195+
196+ ref T ri5 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 5 ) ) ;
197+ var vi5 = Unsafe . As < T , Vector < T > > ( ref ri5 ) ;
198+ var ve5 = Vector . Equals ( vi5 , vc ) ;
199+
200+ partials -= ve5 ;
201+
202+ ref T ri6 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 6 ) ) ;
203+ var vi6 = Unsafe . As < T , Vector < T > > ( ref ri6 ) ;
204+ var ve6 = Vector . Equals ( vi6 , vc ) ;
205+
206+ partials -= ve6 ;
207+
208+ ref T ri7 = ref Unsafe . Add ( ref r0 , offset + ( Vector < T > . Count * 7 ) ) ;
209+ var vi7 = Unsafe . As < T , Vector < T > > ( ref ri7 ) ;
210+ var ve7 = Vector . Equals ( vi7 , vc ) ;
211+
212+ partials -= ve7 ;
213+
214+ chunkLength -= Vector < T > . Count * 8 ;
215+ offset += Vector < T > . Count * 8 ;
216+ }
217+ }
218+
164219 while ( chunkLength >= Vector < T > . Count )
165220 {
166221 ref T ri = ref Unsafe . Add ( ref r0 , offset ) ;
@@ -242,28 +297,22 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
242297 private static unsafe nint GetUpperBound < T > ( )
243298 where T : unmanaged
244299 {
245- if ( typeof ( T ) == typeof ( byte ) ||
246- typeof ( T ) == typeof ( sbyte ) ||
247- typeof ( T ) == typeof ( bool ) )
300+ if ( typeof ( T ) == typeof ( sbyte ) )
248301 {
249302 return sbyte . MaxValue ;
250303 }
251304
252- if ( typeof ( T ) == typeof ( char ) ||
253- typeof ( T ) == typeof ( ushort ) ||
254- typeof ( T ) == typeof ( short ) )
305+ if ( typeof ( T ) == typeof ( short ) )
255306 {
256307 return short . MaxValue ;
257308 }
258309
259- if ( typeof ( T ) == typeof ( int ) ||
260- typeof ( T ) == typeof ( uint ) )
310+ if ( typeof ( T ) == typeof ( int ) )
261311 {
262312 return int . MaxValue ;
263313 }
264314
265- if ( typeof ( T ) == typeof ( long ) ||
266- typeof ( T ) == typeof ( ulong ) )
315+ if ( typeof ( T ) == typeof ( long ) )
267316 {
268317 if ( sizeof ( nint ) == sizeof ( int ) )
269318 {
0 commit comments