Skip to content

Commit e050b92

Browse files
Merge pull request #290 from SixLabors/js/draw-perf
Drawing performance tweaks
2 parents 3f1b4fa + d15be1c commit e050b92

File tree

32 files changed

+944
-119
lines changed

32 files changed

+944
-119
lines changed

src/ImageSharp.Drawing/Shapes/PolygonClipper/ClipperOffset.cs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

4-
using System.Numerics;
5-
64
namespace SixLabors.ImageSharp.Drawing.Shapes.PolygonClipper;
75

86
/// <summary>
97
/// Wrapper for clipper offset
108
/// </summary>
119
internal class ClipperOffset
1210
{
13-
// To make the floating point polygons compatible with clipper we have to scale them.
14-
private const float ScalingFactor = 1000F;
1511
private readonly PolygonOffsetter polygonClipperOffset;
1612

1713
/// <summary>
@@ -30,16 +26,16 @@ public ClipperOffset(float meterLimit = 2F, float arcTolerance = .25F)
3026
public ComplexPolygon Execute(float width)
3127
{
3228
PathsF solution = new();
33-
this.polygonClipperOffset.Execute(width * ScalingFactor, solution);
29+
this.polygonClipperOffset.Execute(width, solution);
3430

35-
var polygons = new Polygon[solution.Count];
31+
Polygon[] polygons = new Polygon[solution.Count];
3632
for (int i = 0; i < solution.Count; i++)
3733
{
3834
PathF pt = solution[i];
39-
var points = new PointF[pt.Count];
35+
PointF[] points = new PointF[pt.Count];
4036
for (int j = 0; j < pt.Count; j++)
4137
{
42-
points[j] = pt[j] / ScalingFactor;
38+
points[j] = pt[j];
4339
}
4440

4541
polygons[i] = new Polygon(points);
@@ -59,7 +55,7 @@ public void AddPath(ReadOnlySpan<PointF> pathPoints, JointStyle jointStyle, EndC
5955
PathF points = new(pathPoints.Length);
6056
for (int i = 0; i < pathPoints.Length; i++)
6157
{
62-
points.Add((Vector2)pathPoints[i] * ScalingFactor);
58+
points.Add(pathPoints[i]);
6359
}
6460

6561
this.polygonClipperOffset.AddPath(points, jointStyle, endCapStyle);

src/ImageSharp.Drawing/Shapes/Rasterization/ScanEdgeCollection.Build.cs

Lines changed: 135 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
using System.Buffers;
55
using System.Diagnostics;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.Arm;
10+
using System.Runtime.Intrinsics.X86;
711
using SixLabors.ImageSharp.Memory;
812

913
namespace SixLabors.ImageSharp.Drawing.Shapes.Rasterization;
@@ -42,17 +46,17 @@ private enum VertexCategory
4246
RightRight,
4347
}
4448

45-
internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon, MemoryAllocator allocator, int subsampling)
49+
internal static ScanEdgeCollection Create(TessellatedMultipolygon multiPolygon, MemoryAllocator allocator, int subsampling)
4650
{
4751
// We allocate more than we need, since we don't know how many horizontal edges do we have:
48-
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multipolygon.TotalVertexCount);
52+
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multiPolygon.TotalVertexCount);
4953

50-
RingWalker walker = new RingWalker(buffer.Memory.Span);
54+
RingWalker walker = new(buffer.Memory.Span);
5155

52-
using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multipolygon.Max(r => r.Vertices.Length));
56+
using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multiPolygon.Max(r => r.Vertices.Length));
5357
Span<float> roundedY = roundedYBuffer.Memory.Span;
5458

55-
foreach (TessellatedMultipolygon.Ring ring in multipolygon)
59+
foreach (TessellatedMultipolygon.Ring ring in multiPolygon)
5660
{
5761
if (ring.VertexCount < 3)
5862
{
@@ -82,22 +86,140 @@ internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon,
8286

8387
static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
8488
{
85-
for (int i = 0; i < vertices.Length; i++)
89+
int ri = 0;
90+
if (Avx.IsSupported)
8691
{
87-
// for future SIMD impl:
88-
// https://www.ocf.berkeley.edu/~horie/rounding.html
89-
// Avx.RoundToPositiveInfinity()
90-
destination[i] = MathF.Round(vertices[i].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
92+
// If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions:
93+
int verticesLengthInFloats = vertices.Length * 2;
94+
int vector256FloatCount_x2 = Vector256<float>.Count * 2;
95+
int remainder = verticesLengthInFloats % vector256FloatCount_x2;
96+
int verticesLength = verticesLengthInFloats - remainder;
97+
98+
if (verticesLength > 0)
99+
{
100+
ri = vertices.Length - (remainder / 2);
101+
float maxIterations = verticesLength / (Vector256<float>.Count * 2);
102+
ref Vector256<float> sourceBase = ref Unsafe.As<PointF, Vector256<float>>(ref MemoryMarshal.GetReference(vertices));
103+
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));
104+
105+
Vector256<float> ssRatio = Vector256.Create(subsamplingRatio);
106+
Vector256<float> inverseSsRatio = Vector256.Create(1F / subsamplingRatio);
107+
Vector256<float> half = Vector256.Create(.5F);
108+
109+
// For every 1 vector we add to the destination we read 2 from the vertices.
110+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
111+
{
112+
// Load 8 PointF
113+
Vector256<float> points1 = Unsafe.Add(ref sourceBase, j);
114+
Vector256<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
115+
116+
// Shuffle the points to group the Y properties
117+
Vector128<float> points1Y = Sse.Shuffle(points1.GetLower(), points1.GetUpper(), 0b11_01_11_01);
118+
Vector128<float> points2Y = Sse.Shuffle(points2.GetLower(), points2.GetUpper(), 0b11_01_11_01);
119+
Vector256<float> pointsY = Vector256.Create(points1Y, points2Y);
120+
121+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
122+
// https://www.ocf.berkeley.edu/~horie/rounding.html
123+
Vector256<float> rounded = Avx.RoundToPositiveInfinity(Avx.Subtract(Avx.Multiply(pointsY, ssRatio), half));
124+
Unsafe.Add(ref destinationBase, i) = Avx.Multiply(rounded, inverseSsRatio);
125+
}
126+
}
127+
}
128+
else if (Sse41.IsSupported)
129+
{
130+
// If the length of the input buffer as a float array is a multiple of 8, we can use Sse instructions:
131+
int verticesLengthInFloats = vertices.Length * 2;
132+
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
133+
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
134+
int verticesLength = verticesLengthInFloats - remainder;
135+
136+
if (verticesLength > 0)
137+
{
138+
ri = vertices.Length - (remainder / 2);
139+
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
140+
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
141+
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
142+
143+
Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
144+
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
145+
Vector128<float> half = Vector128.Create(.5F);
146+
147+
// For every 1 vector we add to the destination we read 2 from the vertices.
148+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
149+
{
150+
// Load 4 PointF
151+
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
152+
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
153+
154+
// Shuffle the points to group the Y properties
155+
Vector128<float> pointsY = Sse.Shuffle(points1, points2, 0b11_01_11_01);
156+
157+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
158+
// https://www.ocf.berkeley.edu/~horie/rounding.html
159+
Vector128<float> rounded = Sse41.RoundToPositiveInfinity(Sse.Subtract(Sse.Multiply(pointsY, ssRatio), half));
160+
Unsafe.Add(ref destinationBase, i) = Sse.Multiply(rounded, inverseSsRatio);
161+
}
162+
}
163+
}
164+
else if (AdvSimd.IsSupported)
165+
{
166+
// If the length of the input buffer as a float array is a multiple of 8, we can use AdvSimd instructions:
167+
int verticesLengthInFloats = vertices.Length * 2;
168+
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
169+
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
170+
int verticesLength = verticesLengthInFloats - remainder;
171+
172+
if (verticesLength > 0)
173+
{
174+
ri = vertices.Length - (remainder / 2);
175+
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
176+
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
177+
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
178+
179+
Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
180+
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
181+
182+
// For every 1 vector we add to the destination we read 2 from the vertices.
183+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
184+
{
185+
// Load 4 PointF
186+
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
187+
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
188+
189+
// Shuffle the points to group the Y properties
190+
Vector128<float> pointsY = AdvSimdShuffle(points1, points2, 0b11_01_11_01);
191+
192+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
193+
Vector128<float> rounded = AdvSimd.RoundAwayFromZero(AdvSimd.Multiply(pointsY, ssRatio));
194+
Unsafe.Add(ref destinationBase, i) = AdvSimd.Multiply(rounded, inverseSsRatio);
195+
}
196+
}
197+
}
198+
199+
for (; ri < vertices.Length; ri++)
200+
{
201+
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
91202
}
92203
}
93204

94205
return new ScanEdgeCollection(buffer, walker.EdgeCounter);
95206
}
96207

208+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
209+
private static Vector128<float> AdvSimdShuffle(Vector128<float> a, Vector128<float> b, byte control)
210+
{
211+
Vector128<float> result = Vector128.Create(AdvSimd.Extract(a, (byte)(control & 0x3)));
212+
result = AdvSimd.Insert(result, 1, AdvSimd.Extract(a, (byte)((control >> 2) & 0x3)));
213+
result = AdvSimd.Insert(result, 2, AdvSimd.Extract(b, (byte)((control >> 4) & 0x3)));
214+
result = AdvSimd.Insert(result, 3, AdvSimd.Extract(b, (byte)((control >> 6) & 0x3)));
215+
216+
return result;
217+
}
218+
97219
[MethodImpl(MethodImplOptions.AggressiveInlining)]
98220
private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory, EdgeCategory currentCategory)
99221
{
100-
var value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
222+
VertexCategory value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
101223
VerifyVertexCategory(value);
102224
return value;
103225
}
@@ -106,7 +228,7 @@ private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory
106228
private static void VerifyVertexCategory(VertexCategory vertexCategory)
107229
{
108230
int value = (int)vertexCategory;
109-
if (value < 0 || value >= 16)
231+
if (value is < 0 or >= 16)
110232
{
111233
throw new ArgumentOutOfRangeException(nameof(vertexCategory), "EdgeCategoryPair value shall be: 0 <= value < 16");
112234
}
@@ -151,7 +273,7 @@ public EdgeData(float startX, float endX, float startYRounded, float endYRounded
151273

152274
public void EmitScanEdge(Span<ScanEdge> edges, ref int edgeCounter)
153275
{
154-
if (this.EdgeCategory == EdgeCategory.Left || this.EdgeCategory == EdgeCategory.Right)
276+
if (this.EdgeCategory is EdgeCategory.Left or EdgeCategory.Right)
155277
{
156278
return;
157279
}

tests/Directory.Build.targets

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
<ItemGroup>
2020
<!-- Test Dependencies -->
2121
<PackageReference Update="BenchmarkDotNet" Version="0.13.1" />
22-
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="8.3.3" />
22+
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="12.2.2" />
23+
<PackageReference Update="Microsoft.DotNet.RemoteExecutor" Version="6.0.0-beta.21311.3" />
24+
<PackageReference Update="Microsoft.DotNet.XUnitExtensions" Version="6.0.0-beta.21311.3" />
2325
<PackageReference Update="Moq" Version="4.16.1" />
2426
<PackageReference Include="runtime.osx.10.10-x64.CoreCompat.System.Drawing" Version="5.8.64" Condition="'$(IsOSX)'=='true'" />
2527
<PackageReference Update="System.Drawing.Common" Version="5.0.2" />

0 commit comments

Comments
 (0)