Skip to content

Commit 775c95e

Browse files
Add Sse41 rounding
1 parent ce4d2ac commit 775c95e

File tree

2 files changed

+97
-8
lines changed

2 files changed

+97
-8
lines changed

src/ImageSharp.Drawing/Shapes/Rasterization/ScanEdgeCollection.Build.cs

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,17 @@ private enum VertexCategory
4545
RightRight,
4646
}
4747

48-
internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon, MemoryAllocator allocator, int subsampling)
48+
internal static ScanEdgeCollection Create(TessellatedMultipolygon multiPolygon, MemoryAllocator allocator, int subsampling)
4949
{
5050
// We allocate more than we need, since we don't know how many horizontal edges do we have:
51-
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multipolygon.TotalVertexCount);
51+
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multiPolygon.TotalVertexCount);
5252

53-
RingWalker walker = new RingWalker(buffer.Memory.Span);
53+
RingWalker walker = new(buffer.Memory.Span);
5454

55-
using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multipolygon.Max(r => r.Vertices.Length));
55+
using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multiPolygon.Max(r => r.Vertices.Length));
5656
Span<float> roundedY = roundedYBuffer.Memory.Span;
5757

58-
foreach (TessellatedMultipolygon.Ring ring in multipolygon)
58+
foreach (TessellatedMultipolygon.Ring ring in multiPolygon)
5959
{
6060
if (ring.VertexCount < 3)
6161
{
@@ -123,6 +123,43 @@ static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float
123123
}
124124
}
125125
}
126+
else if (Sse41.IsSupported)
127+
{
128+
// If the length of the input buffer as a float array is a multiple of 8, we can use Sse instructions:
129+
int verticesLengthInFloats = vertices.Length * 2;
130+
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
131+
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
132+
int verticesLength = verticesLengthInFloats - remainder;
133+
134+
if (verticesLength > 0)
135+
{
136+
ri = vertices.Length - (remainder / 2);
137+
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
138+
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
139+
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
140+
141+
Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
142+
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
143+
144+
// For every 1 vector we add to the destination we read 2 from the vertices.
145+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
146+
{
147+
// Load 4 PointF
148+
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
149+
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
150+
151+
// Shuffle the points to group the Y properties
152+
Vector128<float> points1Y = Sse.Shuffle(points1, points1, 0b11_01_11_01);
153+
Vector128<float> points2Y = Sse.Shuffle(points2, points2, 0b11_01_11_01);
154+
Vector128<float> pointsY = Vector128.Create(points1Y.GetLower(), points2Y.GetLower());
155+
156+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
157+
// https://www.ocf.berkeley.edu/~horie/rounding.html
158+
Vector128<float> rounded = Sse41.RoundToPositiveInfinity(Sse.Multiply(pointsY, ssRatio));
159+
Unsafe.Add(ref destinationBase, i) = Sse.Multiply(rounded, inverseSsRatio);
160+
}
161+
}
162+
}
126163

127164
for (; ri < vertices.Length; ri++)
128165
{
@@ -136,7 +173,7 @@ static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float
136173
[MethodImpl(MethodImplOptions.AggressiveInlining)]
137174
private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory, EdgeCategory currentCategory)
138175
{
139-
var value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
176+
VertexCategory value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
140177
VerifyVertexCategory(value);
141178
return value;
142179
}
@@ -145,7 +182,7 @@ private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory
145182
private static void VerifyVertexCategory(VertexCategory vertexCategory)
146183
{
147184
int value = (int)vertexCategory;
148-
if (value < 0 || value >= 16)
185+
if (value is < 0 or >= 16)
149186
{
150187
throw new ArgumentOutOfRangeException(nameof(vertexCategory), "EdgeCategoryPair value shall be: 0 <= value < 16");
151188
}
@@ -190,7 +227,7 @@ public EdgeData(float startX, float endX, float startYRounded, float endYRounded
190227

191228
public void EmitScanEdge(Span<ScanEdge> edges, ref int edgeCounter)
192229
{
193-
if (this.EdgeCategory == EdgeCategory.Left || this.EdgeCategory == EdgeCategory.Right)
230+
if (this.EdgeCategory is EdgeCategory.Left or EdgeCategory.Right)
194231
{
195232
return;
196233
}

tests/ImageSharp.Drawing.Benchmarks/Drawing/Rounding.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ public class Rounding
1212
{
1313
private PointF[] vertices;
1414
private float[] destination;
15+
private float[] destinationSse41;
1516
private float[] destinationAvx;
1617

1718
[GlobalSetup]
1819
public void Setup()
1920
{
2021
this.vertices = new PointF[1000];
2122
this.destination = new float[this.vertices.Length];
23+
this.destinationSse41 = new float[this.vertices.Length];
2224
this.destinationAvx = new float[this.vertices.Length];
2325
Random r = new(42);
2426
for (int i = 0; i < this.vertices.Length; i++)
@@ -30,6 +32,9 @@ public void Setup()
3032
[Benchmark]
3133
public void RoundYAvx() => RoundYAvx(this.vertices, this.destinationAvx, 16);
3234

35+
[Benchmark]
36+
public void RoundYSse41() => RoundYSse41(this.vertices, this.destinationSse41, 16);
37+
3338
[Benchmark(Baseline = true)]
3439
public void RoundY() => RoundY(this.vertices, this.destination, 16);
3540

@@ -80,6 +85,53 @@ private static void RoundYAvx(ReadOnlySpan<PointF> vertices, Span<float> destina
8085
}
8186
}
8287

88+
private static void RoundYSse41(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
89+
{
90+
int ri = 0;
91+
if (Sse41.IsSupported)
92+
{
93+
// If the length of the input buffer as a float array is a multiple of 8, we can use Sse instructions:
94+
int verticesLengthInFloats = vertices.Length * 2;
95+
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
96+
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
97+
int verticesLength = verticesLengthInFloats - remainder;
98+
99+
if (verticesLength > 0)
100+
{
101+
ri = vertices.Length - (remainder / 2);
102+
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
103+
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
104+
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));
105+
106+
Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
107+
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
108+
109+
// For every 1 vector we add to the destination we read 2 from the vertices.
110+
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
111+
{
112+
// Load 4 PointF
113+
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
114+
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);
115+
116+
// Shuffle the points to group the Y properties
117+
Vector128<float> points1Y = Sse.Shuffle(points1, points1, 0b11_01_11_01);
118+
Vector128<float> points2Y = Sse.Shuffle(points2, points2, 0b11_01_11_01);
119+
Vector128<float> pointsY = Vector128.Create(points1Y.GetLower(), points2Y.GetLower());
120+
121+
// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
122+
// https://www.ocf.berkeley.edu/~horie/rounding.html
123+
Vector128<float> rounded = Sse41.RoundToPositiveInfinity(Sse.Multiply(pointsY, ssRatio));
124+
Unsafe.Add(ref destinationBase, i) = Sse.Multiply(rounded, inverseSsRatio);
125+
}
126+
}
127+
}
128+
129+
for (; ri < vertices.Length; ri++)
130+
{
131+
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
132+
}
133+
}
134+
83135
private static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
84136
{
85137
int ri = 0;

0 commit comments

Comments
 (0)