44using System . Buffers ;
55using System . Diagnostics ;
66using System . Runtime . CompilerServices ;
7+ using System . Runtime . InteropServices ;
8+ using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . Arm ;
10+ using System . Runtime . Intrinsics . X86 ;
711using SixLabors . ImageSharp . Memory ;
812
913namespace SixLabors . ImageSharp . Drawing . Shapes . Rasterization ;
@@ -42,17 +46,17 @@ private enum VertexCategory
4246 RightRight ,
4347 }
4448
45- internal static ScanEdgeCollection Create ( TessellatedMultipolygon multipolygon , MemoryAllocator allocator , int subsampling )
49+ internal static ScanEdgeCollection Create ( TessellatedMultipolygon multiPolygon , MemoryAllocator allocator , int subsampling )
4650 {
4751 // We allocate more than we need, since we don't know how many horizontal edges do we have:
48- IMemoryOwner < ScanEdge > buffer = allocator . Allocate < ScanEdge > ( multipolygon . TotalVertexCount ) ;
52+ IMemoryOwner < ScanEdge > buffer = allocator . Allocate < ScanEdge > ( multiPolygon . TotalVertexCount ) ;
4953
50- RingWalker walker = new RingWalker ( buffer . Memory . Span ) ;
54+ RingWalker walker = new ( buffer . Memory . Span ) ;
5155
52- using IMemoryOwner < float > roundedYBuffer = allocator . Allocate < float > ( multipolygon . Max ( r => r . Vertices . Length ) ) ;
56+ using IMemoryOwner < float > roundedYBuffer = allocator . Allocate < float > ( multiPolygon . Max ( r => r . Vertices . Length ) ) ;
5357 Span < float > roundedY = roundedYBuffer . Memory . Span ;
5458
55- foreach ( TessellatedMultipolygon . Ring ring in multipolygon )
59+ foreach ( TessellatedMultipolygon . Ring ring in multiPolygon )
5660 {
5761 if ( ring . VertexCount < 3 )
5862 {
@@ -82,22 +86,140 @@ internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon,
8286
8387 static void RoundY ( ReadOnlySpan < PointF > vertices , Span < float > destination , float subsamplingRatio )
8488 {
85- for ( int i = 0 ; i < vertices . Length ; i ++ )
89+ int ri = 0 ;
90+ if ( Avx . IsSupported )
8691 {
87- // for future SIMD impl:
88- // https://www.ocf.berkeley.edu/~horie/rounding.html
89- // Avx.RoundToPositiveInfinity()
90- destination [ i ] = MathF . Round ( vertices [ i ] . Y * subsamplingRatio , MidpointRounding . AwayFromZero ) / subsamplingRatio ;
92+ // If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions:
93+ int verticesLengthInFloats = vertices . Length * 2 ;
94+ int vector256FloatCount_x2 = Vector256 < float > . Count * 2 ;
95+ int remainder = verticesLengthInFloats % vector256FloatCount_x2 ;
96+ int verticesLength = verticesLengthInFloats - remainder ;
97+
98+ if ( verticesLength > 0 )
99+ {
100+ ri = vertices . Length - ( remainder / 2 ) ;
101+ float maxIterations = verticesLength / ( Vector256 < float > . Count * 2 ) ;
102+ ref Vector256 < float > sourceBase = ref Unsafe . As < PointF , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vertices ) ) ;
103+ ref Vector256 < float > destinationBase = ref Unsafe . As < float , Vector256 < float > > ( ref MemoryMarshal . GetReference ( destination ) ) ;
104+
105+ Vector256 < float > ssRatio = Vector256 . Create ( subsamplingRatio ) ;
106+ Vector256 < float > inverseSsRatio = Vector256 . Create ( 1F / subsamplingRatio ) ;
107+ Vector256 < float > half = Vector256 . Create ( .5F ) ;
108+
109+ // For every 1 vector we add to the destination we read 2 from the vertices.
110+ for ( nint i = 0 , j = 0 ; i < maxIterations ; i ++ , j += 2 )
111+ {
112+ // Load 8 PointF
113+ Vector256 < float > points1 = Unsafe . Add ( ref sourceBase , j ) ;
114+ Vector256 < float > points2 = Unsafe . Add ( ref sourceBase , j + 1 ) ;
115+
116+ // Shuffle the points to group the Y properties
117+ Vector128 < float > points1Y = Sse . Shuffle ( points1 . GetLower ( ) , points1 . GetUpper ( ) , 0b11_01_11_01 ) ;
118+ Vector128 < float > points2Y = Sse . Shuffle ( points2 . GetLower ( ) , points2 . GetUpper ( ) , 0b11_01_11_01 ) ;
119+ Vector256 < float > pointsY = Vector256 . Create ( points1Y , points2Y ) ;
120+
121+ // Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
122+ // https://www.ocf.berkeley.edu/~horie/rounding.html
123+ Vector256 < float > rounded = Avx . RoundToPositiveInfinity ( Avx . Subtract ( Avx . Multiply ( pointsY , ssRatio ) , half ) ) ;
124+ Unsafe . Add ( ref destinationBase , i ) = Avx. Multiply ( rounded , inverseSsRatio ) ;
125+ }
126+ }
127+ }
128+ else if ( Sse41 . IsSupported )
129+ {
130+ // If the length of the input buffer as a float array is a multiple of 8, we can use Sse instructions:
131+ int verticesLengthInFloats = vertices . Length * 2 ;
132+ int vector128FloatCount_x2 = Vector128 < float > . Count * 2 ;
133+ int remainder = verticesLengthInFloats % vector128FloatCount_x2 ;
134+ int verticesLength = verticesLengthInFloats - remainder ;
135+
136+ if ( verticesLength > 0 )
137+ {
138+ ri = vertices . Length - ( remainder / 2 ) ;
139+ float maxIterations = verticesLength / ( Vector128 < float > . Count * 2 ) ;
140+ ref Vector128 < float > sourceBase = ref Unsafe . As < PointF , Vector128 < float > > ( ref MemoryMarshal . GetReference ( vertices ) ) ;
141+ ref Vector128 < float > destinationBase = ref Unsafe . As < float , Vector128 < float > > ( ref MemoryMarshal . GetReference ( destination ) ) ;
142+
143+ Vector128 < float > ssRatio = Vector128 . Create ( subsamplingRatio ) ;
144+ Vector128 < float > inverseSsRatio = Vector128 . Create ( 1F / subsamplingRatio ) ;
145+ Vector128 < float > half = Vector128 . Create ( .5F ) ;
146+
147+ // For every 1 vector we add to the destination we read 2 from the vertices.
148+ for ( nint i = 0 , j = 0 ; i < maxIterations ; i ++ , j += 2 )
149+ {
150+ // Load 4 PointF
151+ Vector128 < float > points1 = Unsafe . Add ( ref sourceBase , j ) ;
152+ Vector128 < float > points2 = Unsafe . Add ( ref sourceBase , j + 1 ) ;
153+
154+ // Shuffle the points to group the Y properties
155+ Vector128 < float > pointsY = Sse . Shuffle ( points1 , points2 , 0b11_01_11_01 ) ;
156+
157+ // Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
158+ // https://www.ocf.berkeley.edu/~horie/rounding.html
159+ Vector128 < float > rounded = Sse41 . RoundToPositiveInfinity ( Sse . Subtract ( Sse . Multiply ( pointsY , ssRatio ) , half ) ) ;
160+ Unsafe . Add ( ref destinationBase , i ) = Sse. Multiply ( rounded , inverseSsRatio ) ;
161+ }
162+ }
163+ }
164+ else if ( AdvSimd . IsSupported )
165+ {
166+ // If the length of the input buffer as a float array is a multiple of 8, we can use AdvSimd instructions:
167+ int verticesLengthInFloats = vertices . Length * 2 ;
168+ int vector128FloatCount_x2 = Vector128 < float > . Count * 2 ;
169+ int remainder = verticesLengthInFloats % vector128FloatCount_x2 ;
170+ int verticesLength = verticesLengthInFloats - remainder ;
171+
172+ if ( verticesLength > 0 )
173+ {
174+ ri = vertices . Length - ( remainder / 2 ) ;
175+ float maxIterations = verticesLength / ( Vector128 < float > . Count * 2 ) ;
176+ ref Vector128 < float > sourceBase = ref Unsafe . As < PointF , Vector128 < float > > ( ref MemoryMarshal . GetReference ( vertices ) ) ;
177+ ref Vector128 < float > destinationBase = ref Unsafe . As < float , Vector128 < float > > ( ref MemoryMarshal . GetReference ( destination ) ) ;
178+
179+ Vector128 < float > ssRatio = Vector128 . Create ( subsamplingRatio ) ;
180+ Vector128 < float > inverseSsRatio = Vector128 . Create ( 1F / subsamplingRatio ) ;
181+
182+ // For every 1 vector we add to the destination we read 2 from the vertices.
183+ for ( nint i = 0 , j = 0 ; i < maxIterations ; i ++ , j += 2 )
184+ {
185+ // Load 4 PointF
186+ Vector128 < float > points1 = Unsafe . Add ( ref sourceBase , j ) ;
187+ Vector128 < float > points2 = Unsafe . Add ( ref sourceBase , j + 1 ) ;
188+
189+ // Shuffle the points to group the Y properties
190+ Vector128 < float > pointsY = AdvSimdShuffle ( points1 , points2 , 0b11_01_11_01 ) ;
191+
192+ // Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
193+ Vector128 < float > rounded = AdvSimd . RoundAwayFromZero ( AdvSimd . Multiply ( pointsY , ssRatio ) ) ;
194+ Unsafe . Add ( ref destinationBase , i ) = AdvSimd. Multiply ( rounded , inverseSsRatio ) ;
195+ }
196+ }
197+ }
198+
199+ for ( ; ri < vertices . Length ; ri ++ )
200+ {
201+ destination [ ri ] = MathF . Round ( vertices [ ri ] . Y * subsamplingRatio , MidpointRounding . AwayFromZero ) / subsamplingRatio ;
91202 }
92203 }
93204
94205 return new ScanEdgeCollection ( buffer , walker . EdgeCounter ) ;
95206 }
96207
208+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
209+ private static Vector128 < float > AdvSimdShuffle ( Vector128 < float > a , Vector128 < float > b , byte control )
210+ {
211+ Vector128 < float > result = Vector128 . Create ( AdvSimd . Extract ( a , ( byte ) ( control & 0x3 ) ) ) ;
212+ result = AdvSimd . Insert ( result , 1 , AdvSimd . Extract ( a , ( byte ) ( ( control >> 2 ) & 0x3 ) ) ) ;
213+ result = AdvSimd . Insert ( result , 2 , AdvSimd . Extract ( b , ( byte ) ( ( control >> 4 ) & 0x3 ) ) ) ;
214+ result = AdvSimd . Insert ( result , 3 , AdvSimd . Extract ( b , ( byte ) ( ( control >> 6 ) & 0x3 ) ) ) ;
215+
216+ return result ;
217+ }
218+
97219 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
98220 private static VertexCategory CreateVertexCategory ( EdgeCategory previousCategory , EdgeCategory currentCategory )
99221 {
100- var value = ( VertexCategory ) ( ( ( int ) previousCategory << 2 ) | ( int ) currentCategory ) ;
222+ VertexCategory value = ( VertexCategory ) ( ( ( int ) previousCategory << 2 ) | ( int ) currentCategory ) ;
101223 VerifyVertexCategory ( value ) ;
102224 return value ;
103225 }
@@ -106,7 +228,7 @@ private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory
106228 private static void VerifyVertexCategory ( VertexCategory vertexCategory )
107229 {
108230 int value = ( int ) vertexCategory ;
109- if ( value < 0 || value >= 16 )
231+ if ( value is < 0 or >= 16 )
110232 {
111233 throw new ArgumentOutOfRangeException ( nameof ( vertexCategory ) , "EdgeCategoryPair value shall be: 0 <= value < 16" ) ;
112234 }
@@ -151,7 +273,7 @@ public EdgeData(float startX, float endX, float startYRounded, float endYRounded
151273
152274 public void EmitScanEdge ( Span < ScanEdge > edges , ref int edgeCounter )
153275 {
154- if ( this . EdgeCategory == EdgeCategory . Left || this . EdgeCategory == EdgeCategory . Right )
276+ if ( this . EdgeCategory is EdgeCategory . Left or EdgeCategory . Right )
155277 {
156278 return ;
157279 }
0 commit comments