Skip to content

Commit 4ec90dd

Browse files
committed
Added unrolled vectorized Count loop
Gives up to a ~21% performance boost on .NET 5
1 parent 1d055a1 commit 4ec90dd

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

Microsoft.Toolkit.HighPerformance/Helpers/Internals/SpanHelper.Count.cs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,67 @@ private static nint CountSimd<T>(ref T r0, nint length, T value)
161161

162162
var partials = Vector<T>.Zero;
163163

164+
// Unrolled vectorized loop, with 8 unrolled iterations. We only run this when the
165+
// current type T is at least 2 bytes in size, otherwise the average chunk length
166+
// would always be too small to be able to trigger the unrolled loop, and the overall
167+
// performance would just be slightly worse due to the additional conditional branches.
168+
if (typeof(T) != typeof(sbyte))
169+
{
170+
while (chunkLength >= Vector<T>.Count * 8)
171+
{
172+
ref T ri0 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 0));
173+
var vi0 = Unsafe.As<T, Vector<T>>(ref ri0);
174+
var ve0 = Vector.Equals(vi0, vc);
175+
176+
partials -= ve0;
177+
178+
ref T ri1 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 1));
179+
var vi1 = Unsafe.As<T, Vector<T>>(ref ri1);
180+
var ve1 = Vector.Equals(vi1, vc);
181+
182+
partials -= ve1;
183+
184+
ref T ri2 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 2));
185+
var vi2 = Unsafe.As<T, Vector<T>>(ref ri2);
186+
var ve2 = Vector.Equals(vi2, vc);
187+
188+
partials -= ve2;
189+
190+
ref T ri3 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 3));
191+
var vi3 = Unsafe.As<T, Vector<T>>(ref ri3);
192+
var ve3 = Vector.Equals(vi3, vc);
193+
194+
partials -= ve3;
195+
196+
ref T ri4 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 4));
197+
var vi4 = Unsafe.As<T, Vector<T>>(ref ri4);
198+
var ve4 = Vector.Equals(vi4, vc);
199+
200+
partials -= ve4;
201+
202+
ref T ri5 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 5));
203+
var vi5 = Unsafe.As<T, Vector<T>>(ref ri5);
204+
var ve5 = Vector.Equals(vi5, vc);
205+
206+
partials -= ve5;
207+
208+
ref T ri6 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 6));
209+
var vi6 = Unsafe.As<T, Vector<T>>(ref ri6);
210+
var ve6 = Vector.Equals(vi6, vc);
211+
212+
partials -= ve6;
213+
214+
ref T ri7 = ref Unsafe.Add(ref r0, offset + (Vector<T>.Count * 7));
215+
var vi7 = Unsafe.As<T, Vector<T>>(ref ri7);
216+
var ve7 = Vector.Equals(vi7, vc);
217+
218+
partials -= ve7;
219+
220+
chunkLength -= Vector<T>.Count * 8;
221+
offset += Vector<T>.Count * 8;
222+
}
223+
}
224+
164225
while (chunkLength >= Vector<T>.Count)
165226
{
166227
ref T ri = ref Unsafe.Add(ref r0, offset);

0 commit comments

Comments
 (0)