Skip to content

Commit 3759e5a

Browse files
authored
Merge pull request #31 from rameel/advsimd-support
Add AdvSimd support
2 parents 4933df6 + a1affb9 commit 3759e5a

File tree

5 files changed

+127
-34
lines changed

5 files changed

+127
-34
lines changed

.github/workflows/test.yml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-latest, windows-latest]
14+
os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest]
1515

1616
steps:
1717
- name: Setup .NET
@@ -38,24 +38,22 @@ jobs:
3838
- name: Test (Release)
3939
run: dotnet test -c Release --no-build
4040

41-
- name: Test (Debug, Avx2=Disabled)
41+
- name: Test (Debug, AVX2=0)
4242
env:
43-
COMPlus_EnableAVX2: "0"
43+
DOTNET_EnableAVX2: "0"
4444
run: dotnet test -c Debug --no-build
4545

46-
- name: Test (Release, Avx2=Disabled)
46+
- name: Test (Release, AVX2=0)
4747
env:
48-
COMPlus_EnableAVX2: "0"
48+
DOTNET_EnableAVX2: "0"
4949
run: dotnet test -c Release --no-build
5050

51-
- name: Test (Debug, Avx2=Disabled, Sse2=Disabled)
51+
- name: Test (Debug, HWIntrinsic=0)
5252
env:
53-
COMPlus_EnableAVX2: "0"
54-
COMPlus_EnableSSE2: "0"
53+
DOTNET_EnableHWIntrinsic: "0"
5554
run: dotnet test -c Debug --no-build
5655

57-
- name: Test (Release, Avx2=Disabled, Sse2=Disabled)
56+
- name: Test (Release, HWIntrinsic=0)
5857
env:
59-
COMPlus_EnableAVX2: "0"
60-
COMPlus_EnableSSE2: "0"
58+
DOTNET_EnableHWIntrinsic: "0"
6159
run: dotnet test -c Release --no-build

Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22
<PropertyGroup>
3-
<TargetFramework>net6.0</TargetFramework>
3+
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
44
<ImplicitUsings>enable</ImplicitUsings>
55
<Nullable>enable</Nullable>
66
<LangVersion>preview</LangVersion>
Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using System.Runtime.InteropServices;
2+
using System.Runtime.Intrinsics.Arm;
13
using System.Runtime.Intrinsics.X86;
24

35
namespace Ramstack.Globbing;
@@ -8,10 +10,19 @@ public class SimdConfigurationTests
810
[Test]
911
public void VerifySimdConfiguration()
1012
{
11-
var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0";
12-
var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0";
13+
if (Environment.GetEnvironmentVariable("DOTNET_EnableHWIntrinsic") == "0")
14+
{
15+
Assert.That(Sse2.IsSupported, Is.False);
16+
Assert.That(Sse41.IsSupported, Is.False);
17+
Assert.That(Avx2.IsSupported, Is.False);
18+
Assert.That(AdvSimd.IsSupported, Is.False);
19+
}
1320

14-
Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported));
15-
Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported));
21+
if (RuntimeInformation.ProcessArchitecture == Architecture.X64 && Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0")
22+
{
23+
Assert.That(Sse2.IsSupported, Is.True);
24+
Assert.That(Sse41.IsSupported, Is.True);
25+
Assert.That(Avx2.IsSupported, Is.False);
26+
}
1627
}
1728
}

Ramstack.Globbing/Internal/PathHelper.cs

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Runtime.CompilerServices;
55
using System.Runtime.InteropServices;
66
using System.Runtime.Intrinsics;
7+
using System.Runtime.Intrinsics.Arm;
78
using System.Runtime.Intrinsics.X86;
89

910
namespace Ramstack.Globbing.Internal;
@@ -172,14 +173,46 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length)
172173
}
173174
while (i < tail);
174175

176+
//
175177
// Process remaining chars
176178
// NOTE: An extra one write for the 'length == Vector128<ushort>.Count'
179+
//
177180

178181
value = LoadVector128(ref p, tail);
179182
mask = Sse2.CompareEqual(value, backslash);
180183
result = Sse41.BlendVariable(value, slash, mask);
181184
WriteVector128(ref p, tail, result);
182185
}
186+
else if (AdvSimd.IsSupported && length >= Vector128<ushort>.Count)
187+
{
188+
Vector128<ushort> value;
189+
Vector128<ushort> mask;
190+
Vector128<ushort> result;
191+
192+
var slash = Vector128.Create((ushort)'/');
193+
var backslash = Vector128.Create((ushort)'\\');
194+
var tail = length - Vector128<ushort>.Count;
195+
196+
do
197+
{
198+
value = LoadVector128(ref p, i);
199+
mask = AdvSimd.CompareEqual(value, backslash);
200+
result = AdvSimd.BitwiseSelect(mask, slash, value);
201+
WriteVector128(ref p, i, result);
202+
203+
i += Vector128<ushort>.Count;
204+
}
205+
while (i < tail);
206+
207+
//
208+
// Process remaining chars
209+
// NOTE: An extra one write for the 'length == Vector128<ushort>.Count'
210+
//
211+
value = LoadVector128(ref p, tail);
212+
mask = AdvSimd.CompareEqual(value, backslash);
213+
result = AdvSimd.BitwiseSelect(mask, slash, value);
214+
WriteVector128(ref p, tail, result);
215+
}
183216
else
184217
{
185218
for (; i < length; i++)
@@ -198,10 +231,10 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length)
198231
/// <returns>
199232
/// A 256-bit bitmask for escaping characters.
200233
/// </returns>
201-
private static Vector256<ushort> CreateAllowEscaping256Bitmask(MatchFlags flags)
234+
private static Vector256<ushort> CreateBackslash256Bitmask(MatchFlags flags)
202235
{
203236
var mask = Vector256<ushort>.Zero;
204-
if (flags != MatchFlags.Windows)
237+
if (flags == MatchFlags.Windows)
205238
mask = Vector256<ushort>.AllBitsSet;
206239

207240
return mask;
@@ -214,10 +247,10 @@ private static Vector256<ushort> CreateAllowEscaping256Bitmask(MatchFlags flags)
214247
/// <returns>
215248
/// A 128-bit bitmask for escaping characters.
216249
/// </returns>
217-
private static Vector128<ushort> CreateAllowEscaping128Bitmask(MatchFlags flags)
250+
private static Vector128<ushort> CreateBackslash128Bitmask(MatchFlags flags)
218251
{
219252
var mask = Vector128<ushort>.Zero;
220-
if (flags != MatchFlags.Windows)
253+
if (flags == MatchFlags.Windows)
221254
mask = Vector128<ushort>.AllBitsSet;
222255

223256
return mask;
@@ -301,15 +334,37 @@ public PathSegmentIterator() =>
301334

302335
while ((int)_position < length)
303336
{
304-
if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0)
337+
if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0)
305338
{
306339
var offset = BitOperations.TrailingZeroCount(_mask);
307-
_last = (int)(_position + (nint)((uint)offset >> 1));
340+
if (AdvSimd.IsSupported)
341+
{
342+
//
343+
// On ARM, ExtractMostSignificantBits returns a mask where each bit
344+
// represents one vector element (1 bit per ushort), so offset
345+
// directly corresponds to the element index
346+
//
347+
_last = (int)(_position + (nint)(uint)offset);
308348

309-
//
310-
// Clear the bits for the current separator to process the next position in the mask
311-
//
312-
_mask &= ~(0b_11u << offset);
349+
//
350+
// Clear the bits for the current separator
351+
//
352+
_mask &= ~(1u << offset);
353+
}
354+
else
355+
{
356+
//
357+
// On x86, MoveMask (and ExtractMostSignificantBits on byte-based vectors)
358+
// returns a mask where each bit represents one byte (2 bits per ushort),
359+
// so we need to divide offset by 2 to get the actual element index
360+
//
361+
_last = (int)(_position + (nint)((uint)offset >> 1));
362+
363+
//
364+
// Clear the bits for the current separator
365+
//
366+
_mask &= ~(0b_11u << offset);
367+
}
313368

314369
//
315370
// Advance position to the next chunk when no separators remain in the mask
@@ -340,14 +395,14 @@ public PathSegmentIterator() =>
340395
if (Avx2.IsSupported && (int)_position + Vector256<ushort>.Count <= length)
341396
{
342397
var chunk = LoadVector256(ref source, _position);
343-
var allowEscapingMask = CreateAllowEscaping256Bitmask(flags);
398+
var backslashMask = CreateBackslash256Bitmask(flags);
344399
var slash = Vector256.Create((ushort)'/');
345400
var backslash = Vector256.Create((ushort)'\\');
346401

347402
var comparison = Avx2.Or(
348403
Avx2.CompareEqual(chunk, slash),
349-
Avx2.AndNot(
350-
allowEscapingMask,
404+
Avx2.And(
405+
backslashMask,
351406
Avx2.CompareEqual(chunk, backslash)));
352407

353408
//
@@ -367,14 +422,14 @@ public PathSegmentIterator() =>
367422
else if (Sse2.IsSupported && !Avx2.IsSupported && (int)_position + Vector128<ushort>.Count <= length)
368423
{
369424
var chunk = LoadVector128(ref source, _position);
370-
var allowEscapingMask = CreateAllowEscaping128Bitmask(flags);
425+
var backslashMask = CreateBackslash128Bitmask(flags);
371426
var slash = Vector128.Create((ushort)'/');
372427
var backslash = Vector128.Create((ushort)'\\');
373428

374429
var comparison = Sse2.Or(
375430
Sse2.CompareEqual(chunk, slash),
376-
Sse2.AndNot(
377-
allowEscapingMask,
431+
Sse2.And(
432+
backslashMask,
378433
Sse2.CompareEqual(chunk, backslash)));
379434

380435
//
@@ -391,6 +446,35 @@ public PathSegmentIterator() =>
391446
if (_mask == 0)
392447
_position += Vector128<ushort>.Count;
393448
}
449+
#if NET7_0_OR_GREATER
450+
else if (AdvSimd.IsSupported && (int)_position + Vector128<ushort>.Count <= length)
451+
{
452+
var chunk = LoadVector128(ref source, _position);
453+
var backslashMask = CreateBackslash128Bitmask(flags);
454+
var slash = Vector128.Create((ushort)'/');
455+
var backslash = Vector128.Create((ushort)'\\');
456+
457+
var comparison = AdvSimd.Or(
458+
AdvSimd.CompareEqual(chunk, slash),
459+
AdvSimd.And(
460+
backslashMask,
461+
AdvSimd.CompareEqual(chunk, backslash)));
462+
463+
//
464+
// Store the comparison bitmask and reuse it across iterations
465+
// as long as it contains non-zero bits.
466+
// This avoids reloading SIMD registers and repeating comparisons
467+
// on the same chunk of data.
468+
//
469+
_mask = comparison.ExtractMostSignificantBits();
470+
471+
//
472+
// Advance position to the next chunk when no separators found
473+
//
474+
if (_mask == 0)
475+
_position += Vector128<ushort>.Count;
476+
}
477+
#endif
394478
else
395479
{
396480
for (; (int)_position < length; _position++)

Ramstack.Globbing/Ramstack.Globbing.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22
<PropertyGroup>
3-
<TargetFramework>net6.0</TargetFramework>
3+
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
44
<Description>Fast and zero-allocation .NET globbing library for matching file paths using glob patterns.</Description>
55
<ImplicitUsings>enable</ImplicitUsings>
66
<Nullable>enable</Nullable>

0 commit comments

Comments
 (0)