Skip to content

Commit 5ab593f

Browse files
Merge pull request #1481 from SixLabors/js/faster-resize
Much Faster sRGB Companding
2 parents 611ff85 + 5659148 commit 5ab593f

File tree

8 files changed

+317
-117
lines changed

8 files changed

+317
-117
lines changed

src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs

Lines changed: 165 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
using System.Numerics;
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
8+
#if SUPPORTS_RUNTIME_INTRINSICS
9+
using System.Runtime.Intrinsics;
10+
using System.Runtime.Intrinsics.X86;
11+
#endif
812

913
namespace SixLabors.ImageSharp.ColorSpaces.Companding
1014
{
@@ -18,49 +22,119 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
1822
/// </remarks>
1923
public static class SRgbCompanding
2024
{
25+
private const int Length = Scale + 2; // 256kb @ 16bit precision.
26+
private const int Scale = (1 << 16) - 1;
27+
28+
private static readonly Lazy<float[]> LazyCompressTable = new Lazy<float[]>(
29+
() =>
30+
{
31+
var result = new float[Length];
32+
33+
for (int i = 0; i < result.Length; i++)
34+
{
35+
double d = (double)i / Scale;
36+
if (d <= (0.04045 / 12.92))
37+
{
38+
d *= 12.92;
39+
}
40+
else
41+
{
42+
d = (1.055 * Math.Pow(d, 1.0 / 2.4)) - 0.055;
43+
}
44+
45+
result[i] = (float)d;
46+
}
47+
48+
return result;
49+
},
50+
true);
51+
52+
private static readonly Lazy<float[]> LazyExpandTable = new Lazy<float[]>(
53+
() =>
54+
{
55+
var result = new float[Length];
56+
57+
for (int i = 0; i < result.Length; i++)
58+
{
59+
double d = (double)i / Scale;
60+
if (d <= 0.04045)
61+
{
62+
d /= 12.92;
63+
}
64+
else
65+
{
66+
d = Math.Pow((d + 0.055) / 1.055, 2.4);
67+
}
68+
69+
result[i] = (float)d;
70+
}
71+
72+
return result;
73+
},
74+
true);
75+
76+
private static float[] ExpandTable => LazyExpandTable.Value;
77+
78+
private static float[] CompressTable => LazyCompressTable.Value;
79+
2180
/// <summary>
2281
/// Expands the companded vectors to their linear equivalents with respect to the energy.
2382
/// </summary>
2483
/// <param name="vectors">The span of vectors.</param>
25-
[MethodImpl(InliningOptions.ShortMethod)]
84+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2685
public static void Expand(Span<Vector4> vectors)
2786
{
28-
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
29-
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
30-
31-
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
87+
#if SUPPORTS_RUNTIME_INTRINSICS
88+
if (Avx2.IsSupported && vectors.Length >= 2)
3289
{
33-
Expand(ref vectorsStart);
90+
CompandAvx2(vectors, ExpandTable);
3491

35-
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
92+
if (Numerics.Modulo2(vectors.Length) != 0)
93+
{
94+
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
95+
Expand(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
96+
}
97+
}
98+
else
99+
#endif
100+
{
101+
CompandScalar(vectors, ExpandTable);
36102
}
37103
}
38104

39105
/// <summary>
40106
/// Compresses the uncompanded vectors to their nonlinear equivalents with respect to the energy.
41107
/// </summary>
42108
/// <param name="vectors">The span of vectors.</param>
43-
[MethodImpl(InliningOptions.ShortMethod)]
44-
public static void Compress(Span<Vector4> vectors)
109+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
110+
public static unsafe void Compress(Span<Vector4> vectors)
45111
{
46-
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
47-
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
48-
49-
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
112+
#if SUPPORTS_RUNTIME_INTRINSICS
113+
if (Avx2.IsSupported && vectors.Length >= 2)
50114
{
51-
Compress(ref vectorsStart);
115+
CompandAvx2(vectors, CompressTable);
52116

53-
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
117+
if (Numerics.Modulo2(vectors.Length) != 0)
118+
{
119+
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
120+
Compress(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
121+
}
122+
}
123+
else
124+
#endif
125+
{
126+
CompandScalar(vectors, CompressTable);
54127
}
55128
}
56129

57130
/// <summary>
58131
/// Expands a companded vector to its linear equivalent with respect to the energy.
59132
/// </summary>
60133
/// <param name="vector">The vector.</param>
61-
[MethodImpl(InliningOptions.ShortMethod)]
134+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
62135
public static void Expand(ref Vector4 vector)
63136
{
137+
// Alpha is already a linear representation of opacity so we do not want to convert it.
64138
vector.X = Expand(vector.X);
65139
vector.Y = Expand(vector.Y);
66140
vector.Z = Expand(vector.Z);
@@ -70,9 +144,10 @@ public static void Expand(ref Vector4 vector)
70144
/// Compresses an uncompanded vector (linear) to its nonlinear equivalent.
71145
/// </summary>
72146
/// <param name="vector">The vector.</param>
73-
[MethodImpl(InliningOptions.ShortMethod)]
147+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
74148
public static void Compress(ref Vector4 vector)
75149
{
150+
// Alpha is already a linear representation of opacity so we do not want to convert it.
76151
vector.X = Compress(vector.X);
77152
vector.Y = Compress(vector.Y);
78153
vector.Z = Compress(vector.Z);
@@ -83,15 +158,84 @@ public static void Compress(ref Vector4 vector)
83158
/// </summary>
84159
/// <param name="channel">The channel value.</param>
85160
/// <returns>The <see cref="float"/> representing the linear channel value.</returns>
86-
[MethodImpl(InliningOptions.ShortMethod)]
87-
public static float Expand(float channel) => channel <= 0.04045F ? channel / 12.92F : MathF.Pow((channel + 0.055F) / 1.055F, 2.4F);
161+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
162+
public static float Expand(float channel)
163+
=> channel <= 0.04045F ? channel / 12.92F : MathF.Pow((channel + 0.055F) / 1.055F, 2.4F);
88164

89165
/// <summary>
90166
/// Compresses an uncompanded channel (linear) to its nonlinear equivalent.
91167
/// </summary>
92168
/// <param name="channel">The channel value.</param>
93169
/// <returns>The <see cref="float"/> representing the nonlinear channel value.</returns>
94-
[MethodImpl(InliningOptions.ShortMethod)]
95-
public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
170+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
171+
public static float Compress(float channel)
172+
=> channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
173+
174+
#if SUPPORTS_RUNTIME_INTRINSICS
175+
176+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
177+
private static unsafe void CompandAvx2(Span<Vector4> vectors, float[] table)
178+
{
179+
fixed (float* tablePointer = &table[0])
180+
{
181+
var scale = Vector256.Create((float)Scale);
182+
Vector256<float> zero = Vector256<float>.Zero;
183+
var offset = Vector256.Create(1);
184+
185+
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
186+
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
187+
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
188+
189+
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
190+
{
191+
Vector256<float> multiplied = Avx.Multiply(scale, vectorsBase);
192+
multiplied = Avx.Min(Avx.Max(zero, multiplied), scale);
193+
194+
Vector256<int> truncated = Avx.ConvertToVector256Int32WithTruncation(multiplied);
195+
Vector256<float> truncatedF = Avx.ConvertToVector256Single(truncated);
196+
197+
Vector256<float> low = Avx2.GatherVector256(tablePointer, truncated, sizeof(float));
198+
Vector256<float> high = Avx2.GatherVector256(tablePointer, Avx2.Add(truncated, offset), sizeof(float));
199+
200+
// Alpha is already a linear representation of opacity so we do not want to convert it.
201+
Vector256<float> companded = Numerics.Lerp(low, high, Avx.Subtract(multiplied, truncatedF));
202+
vectorsBase = Avx.Blend(companded, vectorsBase, Numerics.BlendAlphaControl);
203+
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
204+
}
205+
}
206+
}
207+
#endif
208+
209+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
210+
private static unsafe void CompandScalar(Span<Vector4> vectors, float[] table)
211+
{
212+
fixed (float* tablePointer = &table[0])
213+
{
214+
Vector4 zero = Vector4.Zero;
215+
var scale = new Vector4(Scale);
216+
ref Vector4 vectorsBase = ref MemoryMarshal.GetReference(vectors);
217+
ref Vector4 vectorsLast = ref Unsafe.Add(ref vectorsBase, vectors.Length);
218+
219+
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
220+
{
221+
Vector4 multiplied = Numerics.Clamp(vectorsBase * Scale, zero, scale);
222+
223+
float f0 = multiplied.X;
224+
float f1 = multiplied.Y;
225+
float f2 = multiplied.Z;
226+
227+
uint i0 = (uint)f0;
228+
uint i1 = (uint)f1;
229+
uint i2 = (uint)f2;
230+
231+
// Alpha is already a linear representation of opacity so we do not want to convert it.
232+
vectorsBase.X = Numerics.Lerp(tablePointer[i0], tablePointer[i0 + 1], f0 - (int)i0);
233+
vectorsBase.Y = Numerics.Lerp(tablePointer[i1], tablePointer[i1 + 1], f1 - (int)i1);
234+
vectorsBase.Z = Numerics.Lerp(tablePointer[i2], tablePointer[i2 + 1], f2 - (int)i2);
235+
236+
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
237+
}
238+
}
239+
}
96240
}
97241
}

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace SixLabors.ImageSharp
1919
internal static class Numerics
2020
{
2121
#if SUPPORTS_RUNTIME_INTRINSICS
22-
private const int BlendAlphaControl = 0b_10_00_10_00;
22+
public const int BlendAlphaControl = 0b_10_00_10_00;
2323
private const int ShuffleAlphaControl = 0b_11_11_11_11;
2424
#endif
2525

@@ -710,5 +710,43 @@ public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
710710
}
711711
}
712712
}
713+
714+
#if SUPPORTS_RUNTIME_INTRINSICS
715+
716+
/// <summary>
717+
/// Performs a linear interpolation between two values based on the given weighting.
718+
/// </summary>
719+
/// <param name="value1">The first value.</param>
720+
/// <param name="value2">The second value.</param>
721+
/// <param name="amount">Values between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
722+
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
723+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
724+
public static Vector256<float> Lerp(
725+
in Vector256<float> value1,
726+
in Vector256<float> value2,
727+
in Vector256<float> amount)
728+
{
729+
Vector256<float> diff = Avx.Subtract(value2, value1);
730+
if (Fma.IsSupported)
731+
{
732+
return Fma.MultiplyAdd(diff, amount, value1);
733+
}
734+
else
735+
{
736+
return Avx.Add(Avx.Multiply(diff, amount), value1);
737+
}
738+
}
739+
#endif
740+
741+
/// <summary>
742+
/// Performs a linear interpolation between two values based on the given weighting.
743+
/// </summary>
744+
/// <param name="value1">The first value.</param>
745+
/// <param name="value2">The second value.</param>
746+
/// <param name="amount">A value between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
747+
/// <returns>The <see cref="float"/>.</returns>
748+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
749+
public static float Lerp(float value1, float value2, float amount)
750+
=> ((value2 - value1) * amount) + value1;
713751
}
714752
}

src/ImageSharp/PixelFormats/Utils/Vector4Converters.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
@@ -45,4 +45,4 @@ internal static void ApplyBackwardConversionModifiers(Span<Vector4> vectors, Pix
4545
}
4646
}
4747
}
48-
}
48+
}

src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,7 @@ public Span<float> Values
6161
/// <returns>The weighted sum</returns>
6262
[MethodImpl(InliningOptions.ShortMethod)]
6363
public Vector4 Convolve(Span<Vector4> rowSpan)
64-
{
65-
return this.ConvolveCore(ref rowSpan[this.StartIndex]);
66-
}
64+
=> this.ConvolveCore(ref rowSpan[this.StartIndex]);
6765

6866
[MethodImpl(InliningOptions.ShortMethod)]
6967
public Vector4 ConvolveCore(ref Vector4 rowStartRef)
@@ -91,9 +89,7 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
9189
/// </summary>
9290
[MethodImpl(InliningOptions.ShortMethod)]
9391
internal ResizeKernel AlterLeftValue(int left)
94-
{
95-
return new ResizeKernel(left, this.bufferPtr, this.Length);
96-
}
92+
=> new ResizeKernel(left, this.bufferPtr, this.Length);
9793

9894
internal void Fill(Span<double> values)
9995
{

src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,10 @@ public void Dispose()
105105

106106
[MethodImpl(InliningOptions.ShortMethod)]
107107
public Span<Vector4> GetColumnSpan(int x, int startY)
108-
{
109-
return this.transposedFirstPassBuffer.GetRowSpan(x).Slice(startY - this.currentWindow.Min);
110-
}
108+
=> this.transposedFirstPassBuffer.GetRowSpan(x).Slice(startY - this.currentWindow.Min);
111109

112110
public void Initialize()
113-
{
114-
this.CalculateFirstPassValues(this.currentWindow);
115-
}
111+
=> this.CalculateFirstPassValues(this.currentWindow);
116112

117113
public void FillDestinationPixels(RowInterval rowInterval, Buffer2D<TPixel> destination)
118114
{

0 commit comments

Comments
 (0)