Skip to content

Commit eda0869

Browse files
Update benchmarks and add more tests
1 parent b9e4810 commit eda0869

File tree

12 files changed

+500
-42
lines changed

12 files changed

+500
-42
lines changed

src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
6464
ref byte sBase = ref MemoryMarshal.GetReference(source);
6565
ref byte dBase = ref MemoryMarshal.GetReference(dest);
6666

67-
Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
67+
Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
6868

6969
for (int i = 0; i < source.Length; i += 4)
7070
{

src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
3232
ref byte sBase = ref MemoryMarshal.GetReference(source);
3333
ref byte dBase = ref MemoryMarshal.GetReference(dest);
3434

35-
Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
35+
Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
3636

3737
Span<byte> temp = stackalloc byte[4];
3838
ref byte t = ref MemoryMarshal.GetReference(temp);

src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
3232
ref byte sBase = ref MemoryMarshal.GetReference(source);
3333
ref byte dBase = ref MemoryMarshal.GetReference(dest);
3434

35-
Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0);
35+
Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0);
3636

3737
for (int i = 0; i < source.Length; i += 3)
3838
{

src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
3232
ref byte sBase = ref MemoryMarshal.GetReference(source);
3333
ref byte dBase = ref MemoryMarshal.GetReference(dest);
3434

35-
Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0);
35+
Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0);
3636

3737
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
3838
{

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ private static void Shuffle4(
297297
// shuffle controls to add to the library.
298298
// We can add static ROS instances if need be in the future.
299299
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
300-
Shuffle.MmShuffleSpan(ref bytes, control);
300+
Shuffle.MMShuffleSpan(ref bytes, control);
301301
Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
302302

303303
ref Vector256<byte> sourceBase =
@@ -333,7 +333,7 @@ private static void Shuffle4(
333333
{
334334
// Ssse3
335335
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
336-
Shuffle.MmShuffleSpan(ref bytes, control);
336+
Shuffle.MMShuffleSpan(ref bytes, control);
337337
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
338338

339339
ref Vector128<byte> sourceBase =
@@ -382,7 +382,7 @@ private static void Shuffle3(
382382
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
383383

384384
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
385-
Shuffle.MmShuffleSpan(ref bytes, control);
385+
Shuffle.MMShuffleSpan(ref bytes, control);
386386
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
387387

388388
ref Vector128<byte> sourceBase =
@@ -445,7 +445,7 @@ private static void Pad3Shuffle4(
445445
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
446446

447447
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
448-
Shuffle.MmShuffleSpan(ref bytes, control);
448+
Shuffle.MMShuffleSpan(ref bytes, control);
449449
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
450450

451451
ref Vector128<byte> sourceBase =
@@ -489,7 +489,7 @@ private static void Shuffle4Slice3(
489489
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
490490

491491
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
492-
Shuffle.MmShuffleSpan(ref bytes, control);
492+
Shuffle.MMShuffleSpan(ref bytes, control);
493493
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
494494

495495
ref Vector128<byte> sourceBase =

src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ private static void Shuffle4Remainder(
145145
{
146146
ref float sBase = ref MemoryMarshal.GetReference(source);
147147
ref float dBase = ref MemoryMarshal.GetReference(dest);
148-
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
148+
Shuffle.InverseMMShuffle(control, out int p3, out int p2, out int p1, out int p0);
149149

150150
for (int i = 0; i < source.Length; i += 4)
151151
{
@@ -484,13 +484,13 @@ public static class Shuffle
484484
public const byte MMShuffle3333 = 0b11111111;
485485

486486
[MethodImpl(InliningOptions.ShortMethod)]
487-
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
487+
public static byte MMShuffle(byte p3, byte p2, byte p1, byte p0)
488488
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
489489

490490
[MethodImpl(InliningOptions.ShortMethod)]
491-
public static void MmShuffleSpan(ref Span<byte> span, byte control)
491+
public static void MMShuffleSpan(ref Span<byte> span, byte control)
492492
{
493-
InverseMmShuffle(
493+
InverseMMShuffle(
494494
control,
495495
out int p3,
496496
out int p2,
@@ -509,7 +509,7 @@ public static void MmShuffleSpan(ref Span<byte> span, byte control)
509509
}
510510

511511
[MethodImpl(InliningOptions.ShortMethod)]
512-
public static void InverseMmShuffle(
512+
public static void InverseMMShuffle(
513513
byte control,
514514
out int p3,
515515
out int p2,

tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
88
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
99
public class Pad3Shuffle4Channel
1010
{
11-
private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2);
12-
private static readonly XYZWPad3Shuffle4 ControlFast = default;
11+
private static readonly DefaultPad3Shuffle4 Control = new(SimdUtils.Shuffle.MMShuffle1032);
1312
private byte[] source;
1413
private byte[] destination;
1514

@@ -26,15 +25,11 @@ public void Setup()
2625

2726
[Benchmark]
2827
public void Pad3Shuffle4()
29-
{
30-
SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
31-
}
28+
=> SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
3229

3330
[Benchmark]
3431
public void Pad3Shuffle4FastFallback()
35-
{
36-
SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast);
37-
}
32+
=> SimdUtils.Pad3Shuffle4(this.source, this.destination, default(XYZWPad3Shuffle4));
3833
}
3934

4035
// 2020-10-30
@@ -83,3 +78,50 @@ public void Pad3Shuffle4FastFallback()
8378
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
8479
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
8580
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
81+
82+
// 2023-02-21
83+
// ##########
84+
//
85+
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
86+
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
87+
// .NET SDK= 7.0.103
88+
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
89+
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
90+
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
91+
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
92+
93+
// Runtime=.NET 6.0
94+
95+
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
96+
// |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
97+
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - |
98+
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - |
99+
// | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - |
100+
// | | | | | | | | | | | | |
101+
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - |
102+
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - |
103+
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - |
104+
// | | | | | | | | | | | | |
105+
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - |
106+
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - |
107+
// | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - |
108+
// | | | | | | | | | | | | |
109+
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - |
110+
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - |
111+
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - |
112+
// | | | | | | | | | | | | |
113+
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - |
114+
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - |
115+
// | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - |
116+
// | | | | | | | | | | | | |
117+
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - |
118+
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - |
119+
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - |
120+
// | | | | | | | | | | | | |
121+
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - |
122+
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - |
123+
// | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - |
124+
// | | | | | | | | | | | | |
125+
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - |
126+
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - |
127+
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - |

tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

4+
using System.Runtime.InteropServices;
45
using BenchmarkDotNet.Attributes;
6+
using Iced.Intel;
57

68
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
79

810
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
911
public class Shuffle3Channel
1012
{
11-
private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2);
13+
private static readonly DefaultShuffle3 Control = new(SimdUtils.Shuffle.MMShuffle3102);
1214
private byte[] source;
1315
private byte[] destination;
1416

@@ -25,9 +27,7 @@ public void Setup()
2527

2628
[Benchmark]
2729
public void Shuffle3()
28-
{
29-
SimdUtils.Shuffle3(this.source, this.destination, Control);
30-
}
30+
=> SimdUtils.Shuffle3(this.source, this.destination, Control);
3131
}
3232

3333
// 2020-11-02
@@ -60,3 +60,34 @@ public void Shuffle3()
6060
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - |
6161
// | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - |
6262
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - |
63+
64+
// 2023-02-21
65+
// ##########
66+
//
67+
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
68+
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
69+
// .NET SDK= 7.0.103
70+
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
71+
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
72+
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
73+
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
74+
75+
// Runtime=.NET 6.0
76+
77+
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
78+
// |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
79+
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - |
80+
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - |
81+
// | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - |
82+
// | | | | | | | | | | | | |
83+
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - |
84+
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - |
85+
// | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - |
86+
// | | | | | | | | | | | | |
87+
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - |
88+
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - |
89+
// | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - |
90+
// | | | | | | | | | | | | |
91+
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - |
92+
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - |
93+
// | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - |

0 commit comments

Comments
 (0)