Skip to content

Commit f95d4d1

Browse files
Merge branch 'main' into sn/nullable/various
2 parents 29317da + ae0a51c commit f95d4d1

File tree

22 files changed

+10256
-727
lines changed

22 files changed

+10256
-727
lines changed

.github/workflows/build-and-test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,13 @@ jobs:
114114
if: ${{ matrix.options.sdk-preview != true }}
115115
uses: actions/setup-dotnet@v3
116116
with:
117-
include-prerelease: true
118117
dotnet-version: |
119118
6.0.x
120119
121120
- name: DotNet Setup Preview
122121
if: ${{ matrix.options.sdk-preview == true }}
123122
uses: actions/setup-dotnet@v3
124123
with:
125-
include-prerelease: true
126124
dotnet-version: |
127125
7.0.x
128126

ci-pack.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ dotnet clean -c Release
33
$repositoryUrl = "https://github.com/$env:GITHUB_REPOSITORY"
44

55
# Building for packing and publishing.
6-
dotnet pack -c Release --output "$PSScriptRoot/artifacts" /p:RepositoryUrl=$repositoryUrl
6+
dotnet pack -c Release -p:PackageOutputPath="$PSScriptRoot/artifacts" -p:RepositoryUrl=$repositoryUrl

src/ImageSharp/Common/Constants.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

44
namespace SixLabors.ImageSharp;

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Runtime.CompilerServices;
66
using System.Runtime.InteropServices;
77
using System.Runtime.Intrinsics;
8+
using System.Runtime.Intrinsics.Arm;
89
using System.Runtime.Intrinsics.X86;
910

1011
namespace SixLabors.ImageSharp;
@@ -808,6 +809,25 @@ public static int ReduceSum(Vector256<int> accumulator)
808809
return Sse2.ConvertToInt32(vsum);
809810
}
810811

812+
/// <summary>
813+
/// Reduces elements of the vector into one sum.
814+
/// </summary>
815+
/// <param name="accumulator">The accumulator to reduce.</param>
816+
/// <returns>The sum of all elements.</returns>
817+
[MethodImpl(InliningOptions.ShortMethod)]
818+
public static int ReduceSumArm(Vector128<uint> accumulator)
819+
{
820+
if (AdvSimd.Arm64.IsSupported)
821+
{
822+
Vector64<uint> sum = AdvSimd.Arm64.AddAcross(accumulator);
823+
return (int)AdvSimd.Extract(sum, 0);
824+
}
825+
826+
Vector128<ulong> sum2 = AdvSimd.AddPairwiseWidening(accumulator);
827+
Vector64<uint> sum3 = AdvSimd.Add(sum2.GetLower().AsUInt32(), sum2.GetUpper().AsUInt32());
828+
return (int)AdvSimd.Extract(sum3, 0);
829+
}
830+
811831
/// <summary>
812832
/// Reduces even elements of the vector into one sum.
813833
/// </summary>

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,8 @@ private static void Shuffle4Slice3(
532532
}
533533

534534
/// <summary>
535-
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
535+
/// Performs a multiplication and an addition of the <see cref="Vector256{Single}"/>.
536+
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
536537
/// </summary>
537538
/// <remarks>ret = (vm0 * vm1) + va</remarks>
538539
/// <param name="va">The vector to add to the intermediate result.</param>
@@ -549,22 +550,21 @@ public static Vector256<float> MultiplyAdd(
549550
{
550551
return Fma.MultiplyAdd(vm1, vm0, va);
551552
}
552-
else
553-
{
554-
return Avx.Add(Avx.Multiply(vm0, vm1), va);
555-
}
553+
554+
return Avx.Add(Avx.Multiply(vm0, vm1), va);
556555
}
557556

558557
/// <summary>
559-
/// Performs a multiplication and a substraction of the <see cref="Vector256{T}"/>.
558+
/// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>.
559+
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
560560
/// </summary>
561561
/// <remarks>ret = (vm0 * vm1) - vs</remarks>
562-
/// <param name="vs">The vector to substract from the intermediate result.</param>
562+
/// <param name="vs">The vector to subtract from the intermediate result.</param>
563563
/// <param name="vm0">The first vector to multiply.</param>
564564
/// <param name="vm1">The second vector to multiply.</param>
565565
/// <returns>The <see cref="Vector256{T}"/>.</returns>
566566
[MethodImpl(InliningOptions.ShortMethod)]
567-
public static Vector256<float> MultiplySubstract(
567+
public static Vector256<float> MultiplySubtract(
568568
in Vector256<float> vs,
569569
in Vector256<float> vm0,
570570
in Vector256<float> vm1)
@@ -573,10 +573,30 @@ public static Vector256<float> MultiplySubstract(
573573
{
574574
return Fma.MultiplySubtract(vm1, vm0, vs);
575575
}
576-
else
576+
577+
return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
578+
}
579+
580+
/// <summary>
581+
/// Performs a multiplication and a negated addition of the <see cref="Vector256{Single}"/>.
582+
/// </summary>
583+
/// <remarks>ret = c - (a * b)</remarks>
584+
/// <param name="a">The first vector to multiply.</param>
585+
/// <param name="b">The second vector to multiply.</param>
586+
/// <param name="c">The vector to add negated to the intermediate result.</param>
587+
/// <returns>The <see cref="Vector256{T}"/>.</returns>
588+
[MethodImpl(InliningOptions.ShortMethod)]
589+
public static Vector256<float> MultiplyAddNegated(
590+
in Vector256<float> a,
591+
in Vector256<float> b,
592+
in Vector256<float> c)
593+
{
594+
if (Fma.IsSupported)
577595
{
578-
return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
596+
return Fma.MultiplyAddNegated(a, b, c);
579597
}
598+
599+
return Avx.Subtract(c, Avx.Multiply(a, b));
580600
}
581601

582602
/// <summary>

src/ImageSharp/Formats/DecoderOptions.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,25 @@ public sealed class DecoderOptions
1515

1616
private uint maxFrames = int.MaxValue;
1717

18+
// Used by the FileProvider in the unit tests to set the configuration on the fly.
19+
#pragma warning disable IDE0032 // Use auto property
20+
private Configuration configuration = Configuration.Default;
21+
#pragma warning restore IDE0032 // Use auto property
22+
1823
/// <summary>
1924
/// Gets the shared default general decoder options instance.
25+
/// Used internally to reduce allocations for default decoding operations.
2026
/// </summary>
2127
internal static DecoderOptions Default { get; } = LazyOptions.Value;
2228

2329
/// <summary>
2430
/// Gets a custom configuration instance to be used by the image processing pipeline.
2531
/// </summary>
26-
public Configuration Configuration { get; internal set; } = Configuration.Default;
32+
#pragma warning disable IDE0032 // Use auto property
33+
#pragma warning disable RCS1085 // Use auto-implemented property.
34+
public Configuration Configuration { get => this.configuration; init => this.configuration = value; }
35+
#pragma warning restore RCS1085 // Use auto-implemented property.
36+
#pragma warning restore IDE0032 // Use auto property
2737

2838
/// <summary>
2939
/// Gets the target size to decode the image into. Scaling should use an operation equivalent to <see cref="ResizeMode.Max"/>.
@@ -44,4 +54,6 @@ public sealed class DecoderOptions
4454
/// Gets the maximum number of image frames to decode, inclusive.
4555
/// </summary>
4656
public uint MaxFrames { get => this.maxFrames; init => this.maxFrames = Math.Clamp(value, 1, int.MaxValue); }
57+
58+
internal void SetConfiguration(Configuration configuration) => this.configuration = configuration;
4759
}

src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
9999

100100
var mm256_F_1_4142 = Vector256.Create(1.414213562f);
101101
Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
102-
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
102+
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubtract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
103103

104104
tmp0 = Avx.Add(tmp10, tmp13);
105105
tmp3 = Avx.Subtract(tmp10, tmp13);

src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Runtime.CompilerServices;
66
using System.Runtime.InteropServices;
77
using System.Runtime.Intrinsics;
8+
using System.Runtime.Intrinsics.Arm;
89
using System.Runtime.Intrinsics.X86;
910

1011
// ReSharper disable InconsistentNaming
@@ -26,6 +27,11 @@ public static int Vp8_Sse16x16(Span<byte> a, Span<byte> b)
2627
return Vp8_Sse16xN_Sse2(a, b, 8);
2728
}
2829

30+
if (AdvSimd.IsSupported)
31+
{
32+
return Vp8_Sse16x16_Neon(a, b);
33+
}
34+
2935
return Vp8_SseNxN(a, b, 16, 16);
3036
}
3137

@@ -43,6 +49,11 @@ public static int Vp8_Sse16x8(Span<byte> a, Span<byte> b)
4349
return Vp8_Sse16xN_Sse2(a, b, 4);
4450
}
4551

52+
if (AdvSimd.IsSupported)
53+
{
54+
return Vp8_Sse16x8_Neon(a, b);
55+
}
56+
4657
return Vp8_SseNxN(a, b, 16, 8);
4758
}
4859

@@ -119,6 +130,11 @@ public static int Vp8_Sse4x4(Span<byte> a, Span<byte> b)
119130
return Numerics.ReduceSum(sum);
120131
}
121132

133+
if (AdvSimd.IsSupported)
134+
{
135+
return Vp8_Sse4x4_Neon(a, b);
136+
}
137+
122138
return Vp8_SseNxN(a, b, 4, 4);
123139
}
124140

@@ -199,6 +215,106 @@ private static int Vp8_Sse16xN_Avx2(Span<byte> a, Span<byte> b, int numPairs)
199215
return Numerics.ReduceSum(sum);
200216
}
201217

218+
[MethodImpl(InliningOptions.ShortMethod)]
219+
private static unsafe int Vp8_Sse16x16_Neon(Span<byte> a, Span<byte> b)
220+
{
221+
Vector128<uint> sum = Vector128<uint>.Zero;
222+
fixed (byte* aRef = &MemoryMarshal.GetReference(a))
223+
{
224+
fixed (byte* bRef = &MemoryMarshal.GetReference(b))
225+
{
226+
for (int y = 0; y < 16; y++)
227+
{
228+
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum);
229+
}
230+
}
231+
}
232+
233+
#if NET7_0_OR_GREATER
234+
return (int)Vector128.Sum(sum);
235+
#else
236+
return Numerics.ReduceSumArm(sum);
237+
#endif
238+
}
239+
240+
[MethodImpl(InliningOptions.ShortMethod)]
241+
private static unsafe int Vp8_Sse16x8_Neon(Span<byte> a, Span<byte> b)
242+
{
243+
Vector128<uint> sum = Vector128<uint>.Zero;
244+
fixed (byte* aRef = &MemoryMarshal.GetReference(a))
245+
{
246+
fixed (byte* bRef = &MemoryMarshal.GetReference(b))
247+
{
248+
for (int y = 0; y < 8; y++)
249+
{
250+
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum);
251+
}
252+
}
253+
}
254+
255+
#if NET7_0_OR_GREATER
256+
return (int)Vector128.Sum(sum);
257+
#else
258+
return Numerics.ReduceSumArm(sum);
259+
#endif
260+
}
261+
262+
[MethodImpl(InliningOptions.ShortMethod)]
263+
private static int Vp8_Sse4x4_Neon(Span<byte> a, Span<byte> b)
264+
{
265+
Vector128<byte> a0 = Load4x4Neon(a).AsByte();
266+
Vector128<byte> b0 = Load4x4Neon(b).AsByte();
267+
Vector128<byte> absDiff = AdvSimd.AbsoluteDifference(a0, b0);
268+
Vector64<byte> absDiffLower = absDiff.GetLower().AsByte();
269+
Vector64<byte> absDiffUpper = absDiff.GetUpper().AsByte();
270+
Vector128<ushort> prod1 = AdvSimd.MultiplyWideningLower(absDiffLower, absDiffLower);
271+
Vector128<ushort> prod2 = AdvSimd.MultiplyWideningLower(absDiffUpper, absDiffUpper);
272+
273+
// pair-wise adds and widen.
274+
Vector128<uint> sum1 = AdvSimd.AddPairwiseWidening(prod1);
275+
Vector128<uint> sum2 = AdvSimd.AddPairwiseWidening(prod2);
276+
277+
Vector128<uint> sum = AdvSimd.Add(sum1, sum2);
278+
#if NET7_0_OR_GREATER
279+
return (int)Vector128.Sum(sum);
280+
#else
281+
return Numerics.ReduceSumArm(sum);
282+
#endif
283+
}
284+
285+
// Load all 4x4 pixels into a single Vector128<uint>
286+
[MethodImpl(InliningOptions.ShortMethod)]
287+
private static unsafe Vector128<uint> Load4x4Neon(Span<byte> src)
288+
{
289+
fixed (byte* srcRef = &MemoryMarshal.GetReference(src))
290+
{
291+
Vector128<uint> output = Vector128<uint>.Zero;
292+
output = AdvSimd.LoadAndInsertScalar(output, 0, (uint*)srcRef);
293+
output = AdvSimd.LoadAndInsertScalar(output, 1, (uint*)(srcRef + WebpConstants.Bps));
294+
output = AdvSimd.LoadAndInsertScalar(output, 2, (uint*)(srcRef + (WebpConstants.Bps * 2)));
295+
output = AdvSimd.LoadAndInsertScalar(output, 3, (uint*)(srcRef + (WebpConstants.Bps * 3)));
296+
return output;
297+
}
298+
}
299+
300+
[MethodImpl(InliningOptions.ShortMethod)]
301+
private static unsafe Vector128<uint> AccumulateSSE16Neon(byte* a, byte* b, Vector128<uint> sum)
302+
{
303+
Vector128<byte> a0 = AdvSimd.LoadVector128(a);
304+
Vector128<byte> b0 = AdvSimd.LoadVector128(b);
305+
306+
Vector128<byte> absDiff = AdvSimd.AbsoluteDifference(a0, b0);
307+
Vector64<byte> absDiffLower = absDiff.GetLower();
308+
Vector64<byte> absDiffUpper = absDiff.GetUpper();
309+
Vector128<ushort> prod1 = AdvSimd.MultiplyWideningLower(absDiffLower, absDiffLower);
310+
Vector128<ushort> prod2 = AdvSimd.MultiplyWideningLower(absDiffUpper, absDiffUpper);
311+
312+
// pair-wise adds and widen.
313+
Vector128<uint> sum1 = AdvSimd.AddPairwiseWidening(prod1);
314+
Vector128<uint> sum2 = AdvSimd.AddPairwiseWidening(prod2);
315+
return AdvSimd.Add(sum, AdvSimd.Add(sum1, sum2));
316+
}
317+
202318
[MethodImpl(InliningOptions.ShortMethod)]
203319
private static Vector128<int> SubtractAndAccumulate(Vector128<byte> a, Vector128<byte> b)
204320
{

0 commit comments

Comments
 (0)