Skip to content

Commit e553807

Browse files
Port LosslessUtils V128
1 parent 1a63729 commit e553807

File tree

4 files changed

+144
-177
lines changed

4 files changed

+144
-177
lines changed

src/ImageSharp/Common/Helpers/Vector128Utilities.cs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,33 @@ public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpe
126126
return Vector128.Create(value.GetLower(), Vector64.Shuffle(value.GetUpper(), indices));
127127
}
128128

129+
/// <summary>
130+
/// Shuffle 16-bit integers in the low 64 bits of <paramref name="value"/> using the control in <paramref name="control"/>.
131+
/// Store the results in the low 64 bits of the destination, with the high 64 bits being copied from <paramref name="value"/>.
132+
/// </summary>
133+
/// <param name="value">The input vector containing packed 16-bit integers to shuffle.</param>
134+
/// <param name="control">The shuffle control byte.</param>
135+
/// <returns>
136+
/// A vector containing the shuffled 16-bit integers in the low 64 bits, with the high 64 bits copied from <paramref name="value"/>.
137+
/// </returns>
138+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
139+
public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control)
140+
{
141+
if (Sse2.IsSupported)
142+
{
143+
return Sse2.ShuffleLow(value, control);
144+
}
145+
146+
// Don't use InverseMMShuffle here as we want to avoid the cast.
147+
Vector64<short> indices = Vector64.Create(
148+
(short)(control & 0x3),
149+
(short)((control >> 2) & 0x3),
150+
(short)((control >> 4) & 0x3),
151+
(short)((control >> 6) & 0x3));
152+
153+
return Vector128.Create(Vector64.Shuffle(value.GetLower(), indices), value.GetUpper());
154+
}
155+
129156
/// <summary>
130157
/// Creates a new vector by selecting values from an input vector using a set of indices.
131158
/// </summary>
@@ -198,6 +225,42 @@ public static Vector128<byte> ShiftLeftBytesInVector(Vector128<byte> value, [Con
198225
return Vector128.Shuffle(value, Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) - Vector128.Create(numBytes));
199226
}
200227

228+
/// <summary>
229+
/// Shift packed 16-bit integers in <paramref name="value"/> left by <paramref name="value"/> while
230+
/// shifting in zeros, and store the results
231+
/// </summary>
232+
/// <param name="value">The vector containing packed 16-bit integers to shift.</param>
233+
/// <param name="count">The number of bits to shift left.</param>
234+
/// <returns>
235+
/// A vector containing the packed 16-bit integers shifted left by <paramref name="count"/>, with zeros shifted in.
236+
/// </returns>
237+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
238+
public static Vector128<short> ShiftLeftLogical(Vector128<short> value, [ConstantExpected] byte count)
239+
{
240+
if (Sse2.IsSupported)
241+
{
242+
return Sse2.ShiftLeftLogical(value, count);
243+
}
244+
245+
// Zero lanes where count >= 16 to match SSE2
246+
if (count >= 16)
247+
{
248+
return Vector128<short>.Zero;
249+
}
250+
251+
if (AdvSimd.IsSupported)
252+
{
253+
return AdvSimd.ShiftLogical(value, Vector128.Create((short)count));
254+
}
255+
256+
if (PackedSimd.IsSupported)
257+
{
258+
return PackedSimd.ShiftLeft(value, count);
259+
}
260+
261+
return Vector128.ShiftLeft(value, count);
262+
}
263+
201264
/// <summary>
202265
/// Right aligns elements of two source 128-bit values depending on bits in a mask.
203266
/// </summary>

src/ImageSharp/Common/Helpers/Vector256Utilities.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<by
4646
return Avx2.Shuffle(vector, indices);
4747
}
4848

49-
return Vector256.Create(
50-
Vector128_.ShuffleNative(vector.GetLower(), indices.GetLower()),
51-
Vector128_.ShuffleNative(vector.GetUpper(), indices.GetUpper()));
49+
return Vector256.Shuffle(vector, indices);
5250
}
5351

5452
/// <summary>

src/ImageSharp/Common/Helpers/Vector512Utilities.cs

Lines changed: 7 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ public static Vector512<byte> ShuffleNative(Vector512<byte> vector, Vector512<by
4646
return Avx512BW.Shuffle(vector, indices);
4747
}
4848

49-
return Vector512.Create(
50-
Vector256_.ShuffleNative(vector.GetLower(), indices.GetLower()),
51-
Vector256_.ShuffleNative(vector.GetUpper(), indices.GetUpper()));
49+
return Vector512.Shuffle(vector, indices);
5250
}
5351

5452
/// <summary>
@@ -59,25 +57,7 @@ public static Vector512<byte> ShuffleNative(Vector512<byte> vector, Vector512<by
5957
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
6058
[MethodImpl(MethodImplOptions.AggressiveInlining)]
6159
public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
62-
{
63-
if (Avx512F.IsSupported)
64-
{
65-
return Avx512F.ConvertToVector512Int32(vector);
66-
}
67-
68-
if (Avx.IsSupported)
69-
{
70-
Vector256<int> lower = Avx.ConvertToVector256Int32(vector.GetLower());
71-
Vector256<int> upper = Avx.ConvertToVector256Int32(vector.GetUpper());
72-
return Vector512.Create(lower, upper);
73-
}
74-
75-
Vector512<float> sign = vector & Vector512.Create(-0.0f);
76-
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608.0f);
77-
78-
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
79-
return Vector512.ConvertToInt32(val_2p23_f32 | sign);
80-
}
60+
=> Avx512F.ConvertToVector512Int32(vector);
8161

8262
/// <summary>
8363
/// Rounds all values in <paramref name="vector"/> to the nearest integer
@@ -86,28 +66,11 @@ public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
8666
/// <param name="vector">The vector</param>
8767
[MethodImpl(MethodImplOptions.AggressiveInlining)]
8868
public static Vector512<float> RoundToNearestInteger(Vector512<float> vector)
89-
{
90-
if (Avx512F.IsSupported)
91-
{
92-
// imm8 = 0b1000:
93-
// imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers)
94-
// imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions)
95-
return Avx512F.RoundScale(vector, 0b0000_1000);
96-
}
9769

98-
if (Avx.IsSupported)
99-
{
100-
Vector256<float> lower = Avx.RoundToNearestInteger(vector.GetLower());
101-
Vector256<float> upper = Avx.RoundToNearestInteger(vector.GetUpper());
102-
return Vector512.Create(lower, upper);
103-
}
104-
105-
Vector512<float> sign = vector & Vector512.Create(-0F);
106-
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608F);
107-
108-
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
109-
return val_2p23_f32 | sign;
110-
}
70+
// imm8 = 0b1000:
71+
// imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers)
72+
// imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions)
73+
=> Avx512F.RoundScale(vector, 0b0000_1000);
11174

11275
/// <summary>
11376
/// Performs a multiplication and an addition of the <see cref="Vector512{Single}"/>.
@@ -122,21 +85,7 @@ public static Vector512<float> MultiplyAdd(
12285
Vector512<float> va,
12386
Vector512<float> vm0,
12487
Vector512<float> vm1)
125-
{
126-
if (Avx512F.IsSupported)
127-
{
128-
return Avx512F.FusedMultiplyAdd(vm0, vm1, va);
129-
}
130-
131-
if (Fma.IsSupported)
132-
{
133-
Vector256<float> lower = Fma.MultiplyAdd(vm0.GetLower(), vm1.GetLower(), va.GetLower());
134-
Vector256<float> upper = Fma.MultiplyAdd(vm0.GetUpper(), vm1.GetUpper(), va.GetUpper());
135-
return Vector512.Create(lower, upper);
136-
}
137-
138-
return va + (vm0 * vm1);
139-
}
88+
=> Avx512F.FusedMultiplyAdd(vm0, vm1, va);
14089

14190
/// <summary>
14291
/// Restricts a vector between a minimum and a maximum value.

0 commit comments

Comments
 (0)