Skip to content

Commit 8a23d42

Browse files
Port more V256 code
1 parent 038f047 commit 8a23d42

File tree

8 files changed

+164
-178
lines changed

8 files changed

+164
-178
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 62 additions & 62 deletions
Large diffs are not rendered by default.

src/ImageSharp/Common/Helpers/Vector128Utilities.cs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ internal static class Vector128_
2626
/// <summary>
2727
/// Gets a value indicating whether shuffle operations are supported.
2828
/// </summary>
29-
public static bool SupportsShuffleFloat
29+
public static bool SupportsShuffleNativeFloat
3030
{
3131
[MethodImpl(MethodImplOptions.AggressiveInlining)]
3232
get => Sse.IsSupported;
@@ -35,10 +35,10 @@ public static bool SupportsShuffleFloat
3535
/// <summary>
3636
/// Gets a value indicating whether shuffle operations are supported.
3737
/// </summary>
38-
public static bool SupportsShuffleByte
38+
public static bool SupportsShuffleNativeByte
3939
{
4040
[MethodImpl(MethodImplOptions.AggressiveInlining)]
41-
get => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported;
41+
get => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported;
4242
}
4343

4444
/// <summary>
@@ -66,7 +66,7 @@ public static bool SupportsShiftByte
6666
/// <param name="control">The shuffle control byte.</param>
6767
/// <returns>The <see cref="Vector128{Single}"/>.</returns>
6868
[MethodImpl(MethodImplOptions.AggressiveInlining)]
69-
public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpected] byte control)
69+
public static Vector128<float> ShuffleNative(Vector128<float> vector, [ConstantExpected] byte control)
7070
{
7171
if (Sse.IsSupported)
7272
{
@@ -89,7 +89,7 @@ public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpecte
8989
/// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
9090
/// </returns>
9191
[MethodImpl(MethodImplOptions.AggressiveInlining)]
92-
public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
92+
public static Vector128<byte> ShuffleNative(Vector128<byte> vector, Vector128<byte> indices)
9393
{
9494
if (Ssse3.IsSupported)
9595
{
@@ -101,6 +101,11 @@ public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> in
101101
return AdvSimd.Arm64.VectorTableLookup(vector, indices);
102102
}
103103

104+
if (PackedSimd.IsSupported)
105+
{
106+
return PackedSimd.Swizzle(vector, indices);
107+
}
108+
104109
ThrowUnreachableException();
105110
return default;
106111
}

src/ImageSharp/Common/Helpers/Vector256Utilities.cs

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ internal static class Vector256_
2424
/// <summary>
2525
/// Gets a value indicating whether shuffle byte operations are supported.
2626
/// </summary>
27-
public static bool SupportsShuffleFloat
27+
public static bool SupportsShuffleNativeFloat
2828
{
2929
[MethodImpl(MethodImplOptions.AggressiveInlining)]
30-
get => Avx.IsSupported || Sse.IsSupported;
30+
get => Avx.IsSupported;
3131
}
3232

3333
/// <summary>
@@ -46,20 +46,13 @@ public static bool SupportsShuffleByte
4646
/// <param name="control">The shuffle control byte.</param>
4747
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
4848
[MethodImpl(MethodImplOptions.AggressiveInlining)]
49-
public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpected] byte control)
49+
public static Vector256<float> ShuffleNative(Vector256<float> vector, [ConstantExpected] byte control)
5050
{
5151
if (Avx.IsSupported)
5252
{
5353
return Avx.Shuffle(vector, vector, control);
5454
}
5555

56-
if (Sse.IsSupported)
57-
{
58-
Vector128<float> lower = vector.GetLower();
59-
Vector128<float> upper = vector.GetUpper();
60-
return Vector256.Create(Sse.Shuffle(lower, lower, control), Sse.Shuffle(upper, upper, control));
61-
}
62-
6356
ThrowUnreachableException();
6457
return default;
6558
}
@@ -73,7 +66,7 @@ public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpecte
7366
/// </param>
7467
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
7568
[MethodImpl(MethodImplOptions.AggressiveInlining)]
76-
public static Vector256<byte> Shuffle(Vector256<byte> vector, Vector256<byte> indices)
69+
public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<byte> indices)
7770
{
7871
if (Avx2.IsSupported)
7972
{
@@ -98,13 +91,6 @@ public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
9891
return Avx.ConvertToVector256Int32(vector);
9992
}
10093

101-
if (Sse2.IsSupported)
102-
{
103-
Vector128<int> lower = Sse2.ConvertToVector128Int32(vector.GetLower());
104-
Vector128<int> upper = Sse2.ConvertToVector128Int32(vector.GetUpper());
105-
return Vector256.Create(lower, upper);
106-
}
107-
10894
Vector256<float> sign = vector & Vector256.Create(-0F);
10995
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608F);
11096

@@ -154,6 +140,27 @@ public static Vector256<float> MultiplyAdd(
154140
return va + (vm0 * vm1);
155141
}
156142

143+
/// <summary>
144+
/// Packs signed 32-bit integers to signed 16-bit integers and saturates.
145+
/// </summary>
146+
/// <param name="left">The left hand source vector.</param>
147+
/// <param name="right">The right hand source vector.</param>
148+
/// <returns>The <see cref="Vector256{Int16}"/>.</returns>
149+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
150+
public static Vector256<short> PackSignedSaturate(Vector256<int> left, Vector256<int> right)
151+
{
152+
if (Avx2.IsSupported)
153+
{
154+
return Avx2.PackSignedSaturate(left, right);
155+
}
156+
157+
Vector256<int> min = Vector256.Create((int)short.MinValue);
158+
Vector256<int> max = Vector256.Create((int)short.MaxValue);
159+
Vector256<int> lefClamped = Clamp(left, min, max);
160+
Vector256<int> rightClamped = Clamp(right, min, max);
161+
return Vector256.Narrow(lefClamped, rightClamped);
162+
}
163+
157164
/// <summary>
158165
/// Restricts a vector between a minimum and a maximum value.
159166
/// </summary>
@@ -166,6 +173,21 @@ public static Vector256<float> MultiplyAdd(
166173
public static Vector256<T> Clamp<T>(Vector256<T> value, Vector256<T> min, Vector256<T> max)
167174
=> Vector256.Min(Vector256.Max(value, min), max);
168175

176+
/// <summary>
177+
/// Widens a <see cref="Vector128{Int16}"/> to a <see cref="Vector256{Int32}"/>.
178+
/// </summary>
179+
/// <param name="value">The vector to widen.</param>
180+
/// <returns>The widened <see cref="Vector256{Int32}"/>.</returns>
181+
public static Vector256<int> Widen(Vector128<short> value)
182+
{
183+
if (Avx2.IsSupported)
184+
{
185+
return Avx2.ConvertToVector256Int32(value);
186+
}
187+
188+
return Vector256.WidenLower(value.ToVector256());
189+
}
190+
169191
[DoesNotReturn]
170192
private static void ThrowUnreachableException() => throw new UnreachableException();
171193
}

src/ImageSharp/Common/Helpers/Vector512Utilities.cs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ internal static class Vector512_
2424
/// <summary>
2525
/// Gets a value indicating whether shuffle float operations are supported.
2626
/// </summary>
27-
public static bool SupportsShuffleFloat
27+
public static bool SupportsShuffleNativeFloat
2828
{
2929
[MethodImpl(MethodImplOptions.AggressiveInlining)]
30-
get => Avx512F.IsSupported || Avx.IsSupported;
30+
get => Avx512F.IsSupported;
3131
}
3232

3333
/// <summary>
3434
/// Gets a value indicating whether shuffle byte operations are supported.
3535
/// </summary>
36-
public static bool SupportsShuffleByte
36+
public static bool SupportsShuffleNativeByte
3737
{
3838
[MethodImpl(MethodImplOptions.AggressiveInlining)]
3939
get => Avx512BW.IsSupported;
@@ -46,20 +46,13 @@ public static bool SupportsShuffleByte
4646
/// <param name="control">The shuffle control byte.</param>
4747
/// <returns>The <see cref="Vector512{Single}"/>.</returns>
4848
[MethodImpl(MethodImplOptions.AggressiveInlining)]
49-
public static Vector512<float> Shuffle(Vector512<float> vector, [ConstantExpected] byte control)
49+
public static Vector512<float> ShuffleNative(Vector512<float> vector, [ConstantExpected] byte control)
5050
{
5151
if (Avx512F.IsSupported)
5252
{
5353
return Avx512F.Shuffle(vector, vector, control);
5454
}
5555

56-
if (Avx.IsSupported)
57-
{
58-
Vector256<float> lower = vector.GetLower();
59-
Vector256<float> upper = vector.GetUpper();
60-
return Vector512.Create(Avx.Shuffle(lower, lower, control), Avx.Shuffle(upper, upper, control));
61-
}
62-
6356
ThrowUnreachableException();
6457
return default;
6558
}
@@ -73,7 +66,7 @@ public static Vector512<float> Shuffle(Vector512<float> vector, [ConstantExpecte
7366
/// </param>
7467
/// <returns>The <see cref="Vector512{Byte}"/>.</returns>
7568
[MethodImpl(MethodImplOptions.AggressiveInlining)]
76-
public static Vector512<byte> Shuffle(Vector512<byte> vector, Vector512<byte> indices)
69+
public static Vector512<byte> ShuffleNative(Vector512<byte> vector, Vector512<byte> indices)
7770
{
7871
if (Avx512BW.IsSupported)
7972
{

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector256.cs

Lines changed: 39 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

4-
using System.Numerics;
54
using System.Runtime.CompilerServices;
65
using System.Runtime.InteropServices;
76
using System.Runtime.Intrinsics;
@@ -60,109 +59,76 @@ public void NormalizeColorsAndRoundInPlaceVector256(float maximum)
6059
}
6160

6261
/// <summary>
63-
/// Loads values from <paramref name="source"/> using extended AVX2 intrinsics.
62+
/// Loads values from <paramref name="source"/> using <see cref="Vector256{T}"/> intrinsics.
6463
/// </summary>
6564
/// <param name="source">The source <see cref="Block8x8"/></param>
66-
public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)
65+
public void LoadFromInt16ExtendedVector256(ref Block8x8 source)
6766
{
6867
DebugGuard.IsTrue(
69-
Avx2.IsSupported,
70-
"LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!");
68+
Vector256.IsHardwareAccelerated,
69+
"LoadFromInt16ExtendedVector256 only works on Vector256 compatible architecture!");
7170

7271
ref short sRef = ref Unsafe.As<Block8x8, short>(ref source);
7372
ref Vector256<float> dRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this);
7473

75-
// Vector256<ushort>.Count == 16 on AVX2
74+
// Vector256<ushort>.Count == 16
7675
// We can process 2 block rows in a single step
77-
Vector256<int> top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef));
78-
Vector256<int> bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count));
79-
dRef = Avx.ConvertToVector256Single(top);
80-
Unsafe.Add(ref dRef, 1) = Avx.ConvertToVector256Single(bottom);
81-
82-
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2)));
83-
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3)));
84-
Unsafe.Add(ref dRef, 2) = Avx.ConvertToVector256Single(top);
85-
Unsafe.Add(ref dRef, 3) = Avx.ConvertToVector256Single(bottom);
86-
87-
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4)));
88-
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5)));
89-
Unsafe.Add(ref dRef, 4) = Avx.ConvertToVector256Single(top);
90-
Unsafe.Add(ref dRef, 5) = Avx.ConvertToVector256Single(bottom);
91-
92-
top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6)));
93-
bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7)));
94-
Unsafe.Add(ref dRef, 6) = Avx.ConvertToVector256Single(top);
95-
Unsafe.Add(ref dRef, 7) = Avx.ConvertToVector256Single(bottom);
76+
Vector256<int> top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef));
77+
Vector256<int> bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count));
78+
dRef = Vector256.ConvertToSingle(top);
79+
Unsafe.Add(ref dRef, 1) = Vector256.ConvertToSingle(bottom);
80+
81+
top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2)));
82+
bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3)));
83+
Unsafe.Add(ref dRef, 2) = Vector256.ConvertToSingle(top);
84+
Unsafe.Add(ref dRef, 3) = Vector256.ConvertToSingle(bottom);
85+
86+
top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4)));
87+
bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5)));
88+
Unsafe.Add(ref dRef, 4) = Vector256.ConvertToSingle(top);
89+
Unsafe.Add(ref dRef, 5) = Vector256.ConvertToSingle(bottom);
90+
91+
top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6)));
92+
bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7)));
93+
Unsafe.Add(ref dRef, 6) = Vector256.ConvertToSingle(top);
94+
Unsafe.Add(ref dRef, 7) = Vector256.ConvertToSingle(bottom);
9695
}
9796

9897
[MethodImpl(InliningOptions.ShortMethod)]
9998
private static Vector256<float> NormalizeAndRoundVector256(Vector256<float> value, Vector256<float> off, Vector256<float> max)
10099
=> Vector256_.RoundToNearestInteger(Vector256_.Clamp(value + off, Vector256<float>.Zero, max));
101100

102-
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
101+
private static unsafe void MultiplyIntoInt16Vector256(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
103102
{
104-
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
103+
DebugGuard.IsTrue(Vector256.IsHardwareAccelerated, "Vector256 support is required to run this operation!");
105104

106105
ref Vector256<float> aBase = ref a.V256_0;
107106
ref Vector256<float> bBase = ref b.V256_0;
108-
109107
ref Vector256<short> destRef = ref dest.V01;
110-
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
111108

112109
for (nuint i = 0; i < 8; i += 2)
113110
{
114-
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
115-
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
111+
Vector256<int> row0 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 0) * Unsafe.Add(ref bBase, i + 0));
112+
Vector256<int> row1 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 1) * Unsafe.Add(ref bBase, i + 1));
116113

117-
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
118-
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16();
114+
Vector256<short> row = Vector256_.PackSignedSaturate(row0, row1);
115+
row = Vector256.Shuffle(row.AsInt32(), Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7)).AsInt16();
119116

120117
Unsafe.Add(ref destRef, i / 2) = row;
121118
}
122119
}
123120

124-
private void TransposeInPlace_Avx()
121+
private void TransposeInPlaceVector256()
125122
{
126123
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
127-
Vector256<float> r0 = Avx.InsertVector128(
128-
this.V256_0,
129-
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
130-
1);
131-
132-
Vector256<float> r1 = Avx.InsertVector128(
133-
this.V256_1,
134-
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
135-
1);
136-
137-
Vector256<float> r2 = Avx.InsertVector128(
138-
this.V256_2,
139-
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
140-
1);
141-
142-
Vector256<float> r3 = Avx.InsertVector128(
143-
this.V256_3,
144-
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
145-
1);
146-
147-
Vector256<float> r4 = Avx.InsertVector128(
148-
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
149-
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
150-
1);
151-
152-
Vector256<float> r5 = Avx.InsertVector128(
153-
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
154-
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
155-
1);
156-
157-
Vector256<float> r6 = Avx.InsertVector128(
158-
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
159-
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
160-
1);
161-
162-
Vector256<float> r7 = Avx.InsertVector128(
163-
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
164-
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
165-
1);
124+
Vector256<float> r0 = this.V256_0.WithUpper(this.V4L.AsVector128());
125+
Vector256<float> r1 = this.V256_1.WithUpper(this.V5L.AsVector128());
126+
Vector256<float> r2 = this.V256_2.WithUpper(this.V6L.AsVector128());
127+
Vector256<float> r3 = this.V256_3.WithUpper(this.V7L.AsVector128());
128+
Vector256<float> r4 = this.V0R.AsVector128().ToVector256().WithUpper(this.V4R.AsVector128());
129+
Vector256<float> r5 = this.V1R.AsVector128().ToVector256().WithUpper(this.V5R.AsVector128());
130+
Vector256<float> r6 = this.V2R.AsVector128().ToVector256().WithUpper(this.V6R.AsVector128());
131+
Vector256<float> r7 = this.V3R.AsVector128().ToVector256().WithUpper(this.V7R.AsVector128());
166132

167133
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
168134
Vector256<float> t2 = Avx.UnpackLow(r2, r3);

0 commit comments

Comments
 (0)