Skip to content

Commit 9d4d0ac

Browse files
Attempt AdvSimd implementation
1 parent a7481bb commit 9d4d0ac

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

src/ImageSharp.Drawing/Shapes/Rasterization/ScanEdgeCollection.Build.cs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.Arm;
910
using System.Runtime.Intrinsics.X86;
1011
using SixLabors.ImageSharp.Memory;
1112

@@ -160,8 +161,41 @@ static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float
160161
}
161162
}
162163
}
164+
else if (AdvSimd.IsSupported)
165+
{
166+
// If the length of the input buffer as a float array is a multiple of 8, we can use AdvSimd instructions:
167+
int verticesLengthInFloats = vertices.Length * 2;
168+
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
169+
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
170+
int verticesLength = verticesLengthInFloats - remainder;
171+
172+
if (verticesLength > 0)
173+
{
174+
ri = vertices.Length - (remainder / 2);
175+
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
176+
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
177+
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
178+
179+
Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
180+
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
181+
182+
// For every 1 vector we add to the destination we read 2 from the vertices.
183+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
184+
{
185+
// Load 4 PointF
186+
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
187+
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
188+
189+
// Shuffle the points to group the Y properties
190+
Vector128<float> pointsY = AdvSimdShuffle(points1, points2, 0b11_01_11_01);
191+
192+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
193+
Vector128<float> rounded = AdvSimd.RoundAwayFromZero(Sse.Multiply(pointsY, ssRatio));
194+
Unsafe.Add(ref destinationBase, i) = AdvSimd.Multiply(rounded, inverseSsRatio);
195+
}
196+
}
197+
}
163198

164-
// TODO: Arm64
165199
for (; ri < vertices.Length; ri++)
166200
{
167201
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
@@ -171,6 +205,17 @@ static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float
171205
return new ScanEdgeCollection(buffer, walker.EdgeCounter);
172206
}
173207

208+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
209+
private static Vector128<float> AdvSimdShuffle(Vector128<float> a, Vector128<float> b, byte control)
210+
{
211+
Vector128<float> result = Vector128.Create(AdvSimd.Extract(a, (byte)(control & 0x3)));
212+
result = AdvSimd.Insert(result, 1, AdvSimd.Extract(a, (byte)((control >> 2) & 0x3)));
213+
result = AdvSimd.Insert(result, 2, AdvSimd.Extract(b, (byte)((control >> 4) & 0x3)));
214+
result = AdvSimd.Insert(result, 3, AdvSimd.Extract(b, (byte)((control >> 6) & 0x3)));
215+
216+
return result;
217+
}
218+
174219
[MethodImpl(MethodImplOptions.AggressiveInlining)]
175220
private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory, EdgeCategory currentCategory)
176221
{

tests/ImageSharp.Drawing.Tests/Shapes/Scan/ScanEdgeCollectionTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ static void RunTest()
104104
VerifyEdge(edges, 2f, 3f, (1f, 1.5f), 1, 1, true);
105105
}
106106

107-
FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTest, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE41);
107+
FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTest, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE41 | HwIntrinsics.DisableArm64AdvSimd);
108108
}
109109

110110
[Fact]

0 commit comments

Comments
 (0)