@@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
8
8
[ Config ( typeof ( Config . HwIntrinsics_SSE_AVX ) ) ]
9
9
public class Pad3Shuffle4Channel
10
10
{
11
- private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4 ( 1 , 0 , 3 , 2 ) ;
12
- private static readonly XYZWPad3Shuffle4 ControlFast = default ;
11
+ private static readonly DefaultPad3Shuffle4 Control = new ( SimdUtils . Shuffle . MMShuffle1032 ) ;
13
12
private byte [ ] source ;
14
13
private byte [ ] destination ;
15
14
@@ -26,15 +25,11 @@ public void Setup()
26
25
27
26
[ Benchmark ]
28
27
public void Pad3Shuffle4 ( )
29
- {
30
- SimdUtils . Pad3Shuffle4 ( this . source , this . destination , Control ) ;
31
- }
28
+ => SimdUtils . Pad3Shuffle4 ( this . source , this . destination , Control ) ;
32
29
33
30
[ Benchmark ]
34
31
public void Pad3Shuffle4FastFallback ( )
35
- {
36
- SimdUtils . Pad3Shuffle4 ( this . source , this . destination , ControlFast ) ;
37
- }
32
+ => SimdUtils . Pad3Shuffle4 ( this . source , this . destination , default ( XYZWPad3Shuffle4 ) ) ;
38
33
}
39
34
40
35
// 2020-10-30
@@ -83,3 +78,50 @@ public void Pad3Shuffle4FastFallback()
83
78
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
84
79
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
85
80
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
81
+
82
+ // 2023-02-21
83
+ // ##########
84
+ //
85
+ // BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
86
+ // 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
87
+ // .NET SDK= 7.0.103
88
+ // [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
89
+ // 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
90
+ // 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
91
+ // 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
92
+
93
+ // Runtime=.NET 6.0
94
+
95
+ // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
96
+ // |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
97
+ // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - |
98
+ // | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - |
99
+ // | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - |
100
+ // | | | | | | | | | | | | |
101
+ // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - |
102
+ // | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - |
103
+ // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - |
104
+ // | | | | | | | | | | | | |
105
+ // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - |
106
+ // | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - |
107
+ // | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - |
108
+ // | | | | | | | | | | | | |
109
+ // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - |
110
+ // | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - |
111
+ // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - |
112
+ // | | | | | | | | | | | | |
113
+ // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - |
114
+ // | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - |
115
+ // | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - |
116
+ // | | | | | | | | | | | | |
117
+ // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - |
118
+ // | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - |
119
+ // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - |
120
+ // | | | | | | | | | | | | |
121
+ // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - |
122
+ // | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - |
123
+ // | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - |
124
+ // | | | | | | | | | | | | |
125
+ // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - |
126
+ // | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - |
127
+ // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - |
0 commit comments