5
5
using System . Numerics ;
6
6
using System . Runtime . CompilerServices ;
7
7
using System . Runtime . InteropServices ;
8
+ #if SUPPORTS_RUNTIME_INTRINSICS
9
+ using System . Runtime . Intrinsics ;
10
+ using System . Runtime . Intrinsics . X86 ;
11
+ #endif
8
12
9
13
namespace SixLabors . ImageSharp . ColorSpaces . Companding
10
14
{
@@ -18,49 +22,119 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
18
22
/// </remarks>
19
23
public static class SRgbCompanding
20
24
{
25
+ private const int Length = Scale + 2 ; // 256kb @ 16bit precision.
26
+ private const int Scale = ( 1 << 16 ) - 1 ;
27
+
28
+ private static readonly Lazy < float [ ] > LazyCompressTable = new Lazy < float [ ] > (
29
+ ( ) =>
30
+ {
31
+ var result = new float [ Length ] ;
32
+
33
+ for ( int i = 0 ; i < result . Length ; i ++ )
34
+ {
35
+ double d = ( double ) i / Scale ;
36
+ if ( d <= ( 0.04045 / 12.92 ) )
37
+ {
38
+ d *= 12.92 ;
39
+ }
40
+ else
41
+ {
42
+ d = ( 1.055 * Math . Pow ( d , 1.0 / 2.4 ) ) - 0.055 ;
43
+ }
44
+
45
+ result [ i ] = ( float ) d ;
46
+ }
47
+
48
+ return result ;
49
+ } ,
50
+ true ) ;
51
+
52
+ private static readonly Lazy < float [ ] > LazyExpandTable = new Lazy < float [ ] > (
53
+ ( ) =>
54
+ {
55
+ var result = new float [ Length ] ;
56
+
57
+ for ( int i = 0 ; i < result . Length ; i ++ )
58
+ {
59
+ double d = ( double ) i / Scale ;
60
+ if ( d <= 0.04045 )
61
+ {
62
+ d /= 12.92 ;
63
+ }
64
+ else
65
+ {
66
+ d = Math . Pow ( ( d + 0.055 ) / 1.055 , 2.4 ) ;
67
+ }
68
+
69
+ result [ i ] = ( float ) d ;
70
+ }
71
+
72
+ return result ;
73
+ } ,
74
+ true ) ;
75
+
76
+ private static float [ ] ExpandTable => LazyExpandTable . Value ;
77
+
78
+ private static float [ ] CompressTable => LazyCompressTable . Value ;
79
+
21
80
/// <summary>
22
81
/// Expands the companded vectors to their linear equivalents with respect to the energy.
23
82
/// </summary>
24
83
/// <param name="vectors">The span of vectors.</param>
25
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
84
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
26
85
public static void Expand ( Span < Vector4 > vectors )
27
86
{
28
- ref Vector4 vectorsStart = ref MemoryMarshal . GetReference ( vectors ) ;
29
- ref Vector4 vectorsEnd = ref Unsafe . Add ( ref vectorsStart , vectors . Length ) ;
30
-
31
- while ( Unsafe . IsAddressLessThan ( ref vectorsStart , ref vectorsEnd ) )
87
+ #if SUPPORTS_RUNTIME_INTRINSICS
88
+ if ( Avx2 . IsSupported && vectors . Length >= 2 )
32
89
{
33
- Expand ( ref vectorsStart ) ;
90
+ CompandAvx2 ( vectors , ExpandTable ) ;
34
91
35
- vectorsStart = ref Unsafe . Add ( ref vectorsStart , 1 ) ;
92
+ if ( Numerics . Modulo2 ( vectors . Length ) != 0 )
93
+ {
94
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
95
+ Expand ( ref MemoryMarshal . GetReference ( vectors . Slice ( vectors . Length - 1 ) ) ) ;
96
+ }
97
+ }
98
+ else
99
+ #endif
100
+ {
101
+ CompandScalar ( vectors , ExpandTable ) ;
36
102
}
37
103
}
38
104
39
105
/// <summary>
40
106
/// Compresses the uncompanded vectors to their nonlinear equivalents with respect to the energy.
41
107
/// </summary>
42
108
/// <param name="vectors">The span of vectors.</param>
43
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
44
- public static void Compress ( Span < Vector4 > vectors )
109
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
110
+ public static unsafe void Compress ( Span < Vector4 > vectors )
45
111
{
46
- ref Vector4 vectorsStart = ref MemoryMarshal . GetReference ( vectors ) ;
47
- ref Vector4 vectorsEnd = ref Unsafe . Add ( ref vectorsStart , vectors . Length ) ;
48
-
49
- while ( Unsafe . IsAddressLessThan ( ref vectorsStart , ref vectorsEnd ) )
112
+ #if SUPPORTS_RUNTIME_INTRINSICS
113
+ if ( Avx2 . IsSupported && vectors . Length >= 2 )
50
114
{
51
- Compress ( ref vectorsStart ) ;
115
+ CompandAvx2 ( vectors , CompressTable ) ;
52
116
53
- vectorsStart = ref Unsafe . Add ( ref vectorsStart , 1 ) ;
117
+ if ( Numerics . Modulo2 ( vectors . Length ) != 0 )
118
+ {
119
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
120
+ Compress ( ref MemoryMarshal . GetReference ( vectors . Slice ( vectors . Length - 1 ) ) ) ;
121
+ }
122
+ }
123
+ else
124
+ #endif
125
+ {
126
+ CompandScalar ( vectors , CompressTable ) ;
54
127
}
55
128
}
56
129
57
130
/// <summary>
58
131
/// Expands a companded vector to its linear equivalent with respect to the energy.
59
132
/// </summary>
60
133
/// <param name="vector">The vector.</param>
61
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
134
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
62
135
public static void Expand ( ref Vector4 vector )
63
136
{
137
+ // Alpha is already a linear representation of opacity so we do not want to convert it.
64
138
vector . X = Expand ( vector . X ) ;
65
139
vector . Y = Expand ( vector . Y ) ;
66
140
vector . Z = Expand ( vector . Z ) ;
@@ -70,9 +144,10 @@ public static void Expand(ref Vector4 vector)
70
144
/// Compresses an uncompanded vector (linear) to its nonlinear equivalent.
71
145
/// </summary>
72
146
/// <param name="vector">The vector.</param>
73
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
147
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
74
148
public static void Compress ( ref Vector4 vector )
75
149
{
150
+ // Alpha is already a linear representation of opacity so we do not want to convert it.
76
151
vector . X = Compress ( vector . X ) ;
77
152
vector . Y = Compress ( vector . Y ) ;
78
153
vector . Z = Compress ( vector . Z ) ;
@@ -83,15 +158,84 @@ public static void Compress(ref Vector4 vector)
83
158
/// </summary>
84
159
/// <param name="channel">The channel value.</param>
85
160
/// <returns>The <see cref="float"/> representing the linear channel value.</returns>
86
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
87
- public static float Expand ( float channel ) => channel <= 0.04045F ? channel / 12.92F : MathF . Pow ( ( channel + 0.055F ) / 1.055F , 2.4F ) ;
161
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
162
+ public static float Expand ( float channel )
163
+ => channel <= 0.04045F ? channel / 12.92F : MathF . Pow ( ( channel + 0.055F ) / 1.055F , 2.4F ) ;
88
164
89
165
/// <summary>
90
166
/// Compresses an uncompanded channel (linear) to its nonlinear equivalent.
91
167
/// </summary>
92
168
/// <param name="channel">The channel value.</param>
93
169
/// <returns>The <see cref="float"/> representing the nonlinear channel value.</returns>
94
- [ MethodImpl ( InliningOptions . ShortMethod ) ]
95
- public static float Compress ( float channel ) => channel <= 0.0031308F ? 12.92F * channel : ( 1.055F * MathF . Pow ( channel , 0.416666666666667F ) ) - 0.055F ;
170
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
171
+ public static float Compress ( float channel )
172
+ => channel <= 0.0031308F ? 12.92F * channel : ( 1.055F * MathF . Pow ( channel , 0.416666666666667F ) ) - 0.055F ;
173
+
174
+ #if SUPPORTS_RUNTIME_INTRINSICS
175
+
176
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
177
+ private static unsafe void CompandAvx2 ( Span < Vector4 > vectors , float [ ] table )
178
+ {
179
+ fixed ( float * tablePointer = & table [ 0 ] )
180
+ {
181
+ var scale = Vector256 . Create ( ( float ) Scale ) ;
182
+ Vector256 < float > zero = Vector256 < float > . Zero ;
183
+ var offset = Vector256 . Create ( 1 ) ;
184
+
185
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
186
+ ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
187
+ ref Vector256 < float > vectorsLast = ref Unsafe . Add ( ref vectorsBase , ( IntPtr ) ( ( uint ) vectors . Length / 2u ) ) ;
188
+
189
+ while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
190
+ {
191
+ Vector256 < float > multiplied = Avx . Multiply ( scale , vectorsBase ) ;
192
+ multiplied = Avx . Min ( Avx . Max ( zero , multiplied ) , scale ) ;
193
+
194
+ Vector256 < int > truncated = Avx . ConvertToVector256Int32WithTruncation ( multiplied ) ;
195
+ Vector256 < float > truncatedF = Avx . ConvertToVector256Single ( truncated ) ;
196
+
197
+ Vector256 < float > low = Avx2 . GatherVector256 ( tablePointer , truncated , sizeof ( float ) ) ;
198
+ Vector256 < float > high = Avx2 . GatherVector256 ( tablePointer , Avx2 . Add ( truncated , offset ) , sizeof ( float ) ) ;
199
+
200
+ // Alpha is already a linear representation of opacity so we do not want to convert it.
201
+ Vector256 < float > companded = Numerics . Lerp ( low , high , Avx . Subtract ( multiplied , truncatedF ) ) ;
202
+ vectorsBase = Avx . Blend ( companded , vectorsBase , Numerics . BlendAlphaControl ) ;
203
+ vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
204
+ }
205
+ }
206
+ }
207
+ #endif
208
+
209
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
210
+ private static unsafe void CompandScalar ( Span < Vector4 > vectors , float [ ] table )
211
+ {
212
+ fixed ( float * tablePointer = & table [ 0 ] )
213
+ {
214
+ Vector4 zero = Vector4 . Zero ;
215
+ var scale = new Vector4 ( Scale ) ;
216
+ ref Vector4 vectorsBase = ref MemoryMarshal . GetReference ( vectors ) ;
217
+ ref Vector4 vectorsLast = ref Unsafe . Add ( ref vectorsBase , vectors . Length ) ;
218
+
219
+ while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
220
+ {
221
+ Vector4 multiplied = Numerics . Clamp ( vectorsBase * Scale , zero , scale ) ;
222
+
223
+ float f0 = multiplied . X ;
224
+ float f1 = multiplied . Y ;
225
+ float f2 = multiplied . Z ;
226
+
227
+ uint i0 = ( uint ) f0 ;
228
+ uint i1 = ( uint ) f1 ;
229
+ uint i2 = ( uint ) f2 ;
230
+
231
+ // Alpha is already a linear representation of opacity so we do not want to convert it.
232
+ vectorsBase . X = Numerics . Lerp ( tablePointer [ i0 ] , tablePointer [ i0 + 1 ] , f0 - ( int ) i0 ) ;
233
+ vectorsBase . Y = Numerics . Lerp ( tablePointer [ i1 ] , tablePointer [ i1 + 1 ] , f1 - ( int ) i1 ) ;
234
+ vectorsBase . Z = Numerics . Lerp ( tablePointer [ i2 ] , tablePointer [ i2 + 1 ] , f2 - ( int ) i2 ) ;
235
+
236
+ vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
237
+ }
238
+ }
239
+ }
96
240
}
97
241
}
0 commit comments