Skip to content

Commit d8b464b

Browse files
Merge pull request #2918 from SixLabors/js/block8x8-simd
Improve JPEG Block8x8F Intrinsics for Vector128 paths.
2 parents 62da42d + 4c1ecfa commit d8b464b

35 files changed

+1104
-970
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 114 additions & 122 deletions
Large diffs are not rendered by default.

src/ImageSharp/Common/Helpers/Vector128Utilities.cs

Lines changed: 77 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
using System.Diagnostics;
55
using System.Diagnostics.CodeAnalysis;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
78
using System.Runtime.Intrinsics;
89
using System.Runtime.Intrinsics.Arm;
10+
using System.Runtime.Intrinsics.Wasm;
911
using System.Runtime.Intrinsics.X86;
1012

1113
namespace SixLabors.ImageSharp.Common.Helpers;
@@ -18,30 +20,36 @@ namespace SixLabors.ImageSharp.Common.Helpers;
1820
/// </list>
1921
/// Should only be used if the intrinsics are available.
2022
/// </summary>
21-
internal static class Vector128Utilities
23+
#pragma warning disable SA1649 // File name should match first type name
24+
internal static class Vector128_
25+
#pragma warning restore SA1649 // File name should match first type name
2226
{
2327
/// <summary>
2428
/// Gets a value indicating whether shuffle operations are supported.
2529
/// </summary>
26-
public static bool SupportsShuffleFloat
30+
public static bool SupportsShuffleNativeByte
2731
{
2832
[MethodImpl(MethodImplOptions.AggressiveInlining)]
29-
get => Sse.IsSupported;
30-
}
33+
get
34+
{
35+
if (Vector128.IsHardwareAccelerated)
36+
{
37+
if (RuntimeInformation.ProcessArchitecture is Architecture.X86 or Architecture.X64)
38+
{
39+
return Ssse3.IsSupported;
40+
}
3141

32-
/// <summary>
33-
/// Gets a value indicating whether shuffle operations are supported.
34-
/// </summary>
35-
public static bool SupportsShuffleByte
36-
{
37-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
38-
get => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported;
42+
return true;
43+
}
44+
45+
return false;
46+
}
3947
}
4048

4149
/// <summary>
4250
/// Gets a value indicating whether right align operations are supported.
4351
/// </summary>
44-
public static bool SupportsRightAlign
52+
public static bool SupportsAlignRight
4553
{
4654
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4755
get => Ssse3.IsSupported || AdvSimd.IsSupported;
@@ -63,15 +71,21 @@ public static bool SupportsShiftByte
6371
/// <param name="control">The shuffle control byte.</param>
6472
/// <returns>The <see cref="Vector128{Single}"/>.</returns>
6573
[MethodImpl(MethodImplOptions.AggressiveInlining)]
66-
public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpected] byte control)
74+
public static Vector128<float> ShuffleNative(Vector128<float> vector, [ConstantExpected] byte control)
6775
{
6876
if (Sse.IsSupported)
6977
{
7078
return Sse.Shuffle(vector, vector, control);
7179
}
7280

73-
ThrowUnreachableException();
74-
return default;
81+
// Don't use InverseMMShuffle here as we want to avoid the cast.
82+
Vector128<int> indices = Vector128.Create(
83+
control & 0x3,
84+
(control >> 2) & 0x3,
85+
(control >> 4) & 0x3,
86+
(control >> 6) & 0x3);
87+
88+
return Vector128.Shuffle(vector, indices);
7589
}
7690

7791
/// <summary>
@@ -86,20 +100,18 @@ public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpecte
86100
/// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
87101
/// </returns>
88102
[MethodImpl(MethodImplOptions.AggressiveInlining)]
89-
public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
103+
public static Vector128<byte> ShuffleNative(Vector128<byte> vector, Vector128<byte> indices)
90104
{
105+
// For x64 we use the SSSE3 shuffle intrinsic to avoid additional instructions. 3 vs 1.
91106
if (Ssse3.IsSupported)
92107
{
93108
return Ssse3.Shuffle(vector, indices);
94109
}
95110

96-
if (AdvSimd.Arm64.IsSupported)
97-
{
98-
return AdvSimd.Arm64.VectorTableLookup(vector, indices);
99-
}
100-
101-
ThrowUnreachableException();
102-
return default;
111+
// For ARM and WASM, codegen will be optimal.
112+
// We don't throw for x86/x64 so we should never use this method without
113+
// checking for support.
114+
return Vector128.Shuffle(vector, indices);
103115
}
104116

105117
/// <summary>
@@ -193,6 +205,11 @@ public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector)
193205
return AdvSimd.ConvertToInt32RoundToEven(vector);
194206
}
195207

208+
if (PackedSimd.IsSupported)
209+
{
210+
return PackedSimd.ConvertToInt32Saturate(PackedSimd.RoundToNearest(vector));
211+
}
212+
196213
Vector128<float> sign = vector & Vector128.Create(-0F);
197214
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608F);
198215

@@ -218,6 +235,11 @@ public static Vector128<float> RoundToNearestInteger(Vector128<float> vector)
218235
return AdvSimd.RoundToNearest(vector);
219236
}
220237

238+
if (PackedSimd.IsSupported)
239+
{
240+
return PackedSimd.RoundToNearest(vector);
241+
}
242+
221243
Vector128<float> sign = vector & Vector128.Create(-0F);
222244
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608F);
223245

@@ -270,8 +292,16 @@ public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector
270292
return AdvSimd.ExtractNarrowingSaturateUnsignedUpper(AdvSimd.ExtractNarrowingSaturateUnsignedLower(left), right);
271293
}
272294

273-
ThrowUnreachableException();
274-
return default;
295+
if (PackedSimd.IsSupported)
296+
{
297+
return PackedSimd.ConvertNarrowingSaturateUnsigned(left, right);
298+
}
299+
300+
Vector128<short> min = Vector128.Create((short)byte.MinValue);
301+
Vector128<short> max = Vector128.Create((short)byte.MaxValue);
302+
Vector128<ushort> lefClamped = Clamp(left, min, max).AsUInt16();
303+
Vector128<ushort> rightClamped = Clamp(right, min, max).AsUInt16();
304+
return Vector128.Narrow(lefClamped, rightClamped);
275305
}
276306

277307
/// <summary>
@@ -293,10 +323,30 @@ public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128
293323
return AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(left), right);
294324
}
295325

296-
ThrowUnreachableException();
297-
return default;
326+
if (PackedSimd.IsSupported)
327+
{
328+
return PackedSimd.ConvertNarrowingSaturateSigned(left, right);
329+
}
330+
331+
Vector128<int> min = Vector128.Create((int)short.MinValue);
332+
Vector128<int> max = Vector128.Create((int)short.MaxValue);
333+
Vector128<int> lefClamped = Clamp(left, min, max);
334+
Vector128<int> rightClamped = Clamp(right, min, max);
335+
return Vector128.Narrow(lefClamped, rightClamped);
298336
}
299337

338+
/// <summary>
339+
/// Restricts a vector between a minimum and a maximum value.
340+
/// </summary>
341+
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
342+
/// <param name="value">The vector to restrict.</param>
343+
/// <param name="min">The minimum value.</param>
344+
/// <param name="max">The maximum value.</param>
345+
/// <returns>The restricted <see cref="Vector128{T}"/>.</returns>
346+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
347+
public static Vector128<T> Clamp<T>(Vector128<T> value, Vector128<T> min, Vector128<T> max)
348+
=> Vector128.Min(Vector128.Max(value, min), max);
349+
300350
[DoesNotReturn]
301351
private static void ThrowUnreachableException() => throw new UnreachableException();
302352
}

src/ImageSharp/Common/Helpers/Vector256Utilities.cs

Lines changed: 78 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,23 @@ namespace SixLabors.ImageSharp.Common.Helpers;
1717
/// </list>
1818
/// Should only be used if the intrinsics are available.
1919
/// </summary>
20-
internal static class Vector256Utilities
20+
#pragma warning disable SA1649 // File name should match first type name
21+
internal static class Vector256_
22+
#pragma warning restore SA1649 // File name should match first type name
2123
{
2224
/// <summary>
2325
/// Gets a value indicating whether shuffle byte operations are supported.
2426
/// </summary>
25-
public static bool SupportsShuffleFloat
27+
public static bool SupportsShuffleNativeFloat
2628
{
2729
[MethodImpl(MethodImplOptions.AggressiveInlining)]
28-
get => Avx.IsSupported || Sse.IsSupported;
30+
get => Avx.IsSupported;
2931
}
3032

3133
/// <summary>
3234
/// Gets a value indicating whether shuffle byte operations are supported.
3335
/// </summary>
34-
public static bool SupportsShuffleByte
36+
public static bool SupportsShuffleNativeByte
3537
{
3638
[MethodImpl(MethodImplOptions.AggressiveInlining)]
3739
get => Avx2.IsSupported;
@@ -44,20 +46,13 @@ public static bool SupportsShuffleByte
4446
/// <param name="control">The shuffle control byte.</param>
4547
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
4648
[MethodImpl(MethodImplOptions.AggressiveInlining)]
47-
public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpected] byte control)
49+
public static Vector256<float> ShuffleNative(Vector256<float> vector, [ConstantExpected] byte control)
4850
{
4951
if (Avx.IsSupported)
5052
{
5153
return Avx.Shuffle(vector, vector, control);
5254
}
5355

54-
if (Sse.IsSupported)
55-
{
56-
Vector128<float> lower = vector.GetLower();
57-
Vector128<float> upper = vector.GetUpper();
58-
return Vector256.Create(Sse.Shuffle(lower, lower, control), Sse.Shuffle(upper, upper, control));
59-
}
60-
6156
ThrowUnreachableException();
6257
return default;
6358
}
@@ -71,7 +66,7 @@ public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpecte
7166
/// </param>
7267
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
7368
[MethodImpl(MethodImplOptions.AggressiveInlining)]
74-
public static Vector256<byte> Shuffle(Vector256<byte> vector, Vector256<byte> indices)
69+
public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<byte> indices)
7570
{
7671
if (Avx2.IsSupported)
7772
{
@@ -96,13 +91,6 @@ public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
9691
return Avx.ConvertToVector256Int32(vector);
9792
}
9893

99-
if (Sse2.IsSupported)
100-
{
101-
Vector128<int> lower = Sse2.ConvertToVector128Int32(vector.GetLower());
102-
Vector128<int> upper = Sse2.ConvertToVector128Int32(vector.GetUpper());
103-
return Vector256.Create(lower, upper);
104-
}
105-
10694
Vector256<float> sign = vector & Vector256.Create(-0F);
10795
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608F);
10896

@@ -152,6 +140,76 @@ public static Vector256<float> MultiplyAdd(
152140
return va + (vm0 * vm1);
153141
}
154142

143+
/// <summary>
144+
/// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>.
145+
/// </summary>
146+
/// <remarks>ret = (vm0 * vm1) - vs</remarks>
147+
/// <param name="vs">The vector to subtract from the intermediate result.</param>
148+
/// <param name="vm0">The first vector to multiply.</param>
149+
/// <param name="vm1">The second vector to multiply.</param>
150+
/// <returns>The <see cref="Vector256{T}"/>.</returns>
151+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
152+
public static Vector256<float> MultiplySubtract(
153+
Vector256<float> vs,
154+
Vector256<float> vm0,
155+
Vector256<float> vm1)
156+
{
157+
if (Fma.IsSupported)
158+
{
159+
return Fma.MultiplySubtract(vm1, vm0, vs);
160+
}
161+
162+
return (vm0 * vm1) - vs;
163+
}
164+
165+
/// <summary>
166+
/// Packs signed 32-bit integers to signed 16-bit integers and saturates.
167+
/// </summary>
168+
/// <param name="left">The left hand source vector.</param>
169+
/// <param name="right">The right hand source vector.</param>
170+
/// <returns>The <see cref="Vector256{Int16}"/>.</returns>
171+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
172+
public static Vector256<short> PackSignedSaturate(Vector256<int> left, Vector256<int> right)
173+
{
174+
if (Avx2.IsSupported)
175+
{
176+
return Avx2.PackSignedSaturate(left, right);
177+
}
178+
179+
Vector256<int> min = Vector256.Create((int)short.MinValue);
180+
Vector256<int> max = Vector256.Create((int)short.MaxValue);
181+
Vector256<int> lefClamped = Clamp(left, min, max);
182+
Vector256<int> rightClamped = Clamp(right, min, max);
183+
return Vector256.Narrow(lefClamped, rightClamped);
184+
}
185+
186+
/// <summary>
187+
/// Restricts a vector between a minimum and a maximum value.
188+
/// </summary>
189+
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
190+
/// <param name="value">The vector to restrict.</param>
191+
/// <param name="min">The minimum value.</param>
192+
/// <param name="max">The maximum value.</param>
193+
/// <returns>The restricted <see cref="Vector256{T}"/>.</returns>
194+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
195+
public static Vector256<T> Clamp<T>(Vector256<T> value, Vector256<T> min, Vector256<T> max)
196+
=> Vector256.Min(Vector256.Max(value, min), max);
197+
198+
/// <summary>
199+
/// Widens a <see cref="Vector128{Int16}"/> to a <see cref="Vector256{Int32}"/>.
200+
/// </summary>
201+
/// <param name="value">The vector to widen.</param>
202+
/// <returns>The widened <see cref="Vector256{Int32}"/>.</returns>
203+
public static Vector256<int> Widen(Vector128<short> value)
204+
{
205+
if (Avx2.IsSupported)
206+
{
207+
return Avx2.ConvertToVector256Int32(value);
208+
}
209+
210+
return Vector256.WidenLower(value.ToVector256());
211+
}
212+
155213
[DoesNotReturn]
156214
private static void ThrowUnreachableException() => throw new UnreachableException();
157215
}

0 commit comments

Comments
 (0)