Port LosslessUtils V128

JimBobSquarePants · JimBobSquarePants · commit e553807429ff · 2025-06-04T12:10:45.000+10:00
diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
@@ -126,6 +126,33 @@ public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpe
         return Vector128.Create(value.GetLower(), Vector64.Shuffle(value.GetUpper(), indices));
     }
 
+    /// <summary>
+    /// Shuffle 16-bit integers in the low 64 bits of <paramref name="value"/> using the control in <paramref name="control"/>.
+    /// Store the results in the low 64 bits of the destination, with the high 64 bits being copied from <paramref name="value"/>.
+    /// </summary>
+    /// <param name="value">The input vector containing packed 16-bit integers to shuffle.</param>
+    /// <param name="control">The shuffle control byte.</param>
+    /// <returns>
+    /// A vector containing the shuffled 16-bit integers in the low 64 bits, with the high 64 bits copied from <paramref name="value"/>.
+    /// </returns>
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control)
+    {
+        if (Sse2.IsSupported)
+        {
+            return Sse2.ShuffleLow(value, control);
+        }
+
+        // Don't use InverseMMShuffle here as we want to avoid the cast.
+        Vector64<short> indices = Vector64.Create(
+            (short)(control & 0x3),
+            (short)((control >> 2) & 0x3),
+            (short)((control >> 4) & 0x3),
+            (short)((control >> 6) & 0x3));
+
+        return Vector128.Create(Vector64.Shuffle(value.GetLower(), indices), value.GetUpper());
+    }
+
     /// <summary>
     /// Creates a new vector by selecting values from an input vector using a set of indices.
     /// </summary>
@@ -198,6 +225,42 @@ public static Vector128<byte> ShiftLeftBytesInVector(Vector128<byte> value, [Con
         return Vector128.Shuffle(value, Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) - Vector128.Create(numBytes));
     }
 
+    /// <summary>
+    /// Shift packed 16-bit integers in <paramref name="value"/> left by <paramref name="value"/> while
+    /// shifting in zeros, and store the results
+    /// </summary>
+    /// <param name="value">The vector containing packed 16-bit integers to shift.</param>
+    /// <param name="count">The number of bits to shift left.</param>
+    /// <returns>
+    /// A vector containing the packed 16-bit integers shifted left by <paramref name="count"/>, with zeros shifted in.
+    /// </returns>
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public static Vector128<short> ShiftLeftLogical(Vector128<short> value, [ConstantExpected] byte count)
+    {
+        if (Sse2.IsSupported)
+        {
+            return Sse2.ShiftLeftLogical(value, count);
+        }
+
+        // Zero lanes where count >= 16 to match SSE2
+        if (count >= 16)
+        {
+            return Vector128<short>.Zero;
+        }
+
+        if (AdvSimd.IsSupported)
+        {
+            return AdvSimd.ShiftLogical(value, Vector128.Create((short)count));
+        }
+
+        if (PackedSimd.IsSupported)
+        {
+            return PackedSimd.ShiftLeft(value, count);
+        }
+
+        return Vector128.ShiftLeft(value, count);
+    }
+
     /// <summary>
     /// Right aligns elements of two source 128-bit values depending on bits in a mask.
     /// </summary>
diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
@@ -46,9 +46,7 @@ public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<by
             return Avx2.Shuffle(vector, indices);
         }
 
-        return Vector256.Create(
-            Vector128_.ShuffleNative(vector.GetLower(), indices.GetLower()),
-            Vector128_.ShuffleNative(vector.GetUpper(), indices.GetUpper()));
+        return Vector256.Shuffle(vector, indices);
     }
 
     /// <summary>
diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
@@ -46,9 +46,7 @@ public static Vector512<byte> ShuffleNative(Vector512<byte> vector, Vector512<by
             return Avx512BW.Shuffle(vector, indices);
         }
 
-        return Vector512.Create(
-            Vector256_.ShuffleNative(vector.GetLower(), indices.GetLower()),
-            Vector256_.ShuffleNative(vector.GetUpper(), indices.GetUpper()));
+        return Vector512.Shuffle(vector, indices);
     }
 
     /// <summary>
@@ -59,25 +57,7 @@ public static Vector512<byte> ShuffleNative(Vector512<byte> vector, Vector512<by
     /// <returns>The <see cref="Vector128{Int32}"/>.</returns>
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
-    {
-        if (Avx512F.IsSupported)
-        {
-            return Avx512F.ConvertToVector512Int32(vector);
-        }
-
-        if (Avx.IsSupported)
-        {
-            Vector256<int> lower = Avx.ConvertToVector256Int32(vector.GetLower());
-            Vector256<int> upper = Avx.ConvertToVector256Int32(vector.GetUpper());
-            return Vector512.Create(lower, upper);
-        }
-
-        Vector512<float> sign = vector & Vector512.Create(-0.0f);
-        Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608.0f);
-
-        val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
-        return Vector512.ConvertToInt32(val_2p23_f32 | sign);
-    }
+        => Avx512F.ConvertToVector512Int32(vector);
 
     /// <summary>
     /// Rounds all values in <paramref name="vector"/> to the nearest integer
@@ -86,28 +66,11 @@ public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
     /// <param name="vector">The vector</param>
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     public static Vector512<float> RoundToNearestInteger(Vector512<float> vector)
-    {
-        if (Avx512F.IsSupported)
-        {
-            // imm8 = 0b1000:
-            //   imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers)
-            //   imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions)
-            return Avx512F.RoundScale(vector, 0b0000_1000);
-        }
 
-        if (Avx.IsSupported)
-        {
-            Vector256<float> lower = Avx.RoundToNearestInteger(vector.GetLower());
-            Vector256<float> upper = Avx.RoundToNearestInteger(vector.GetUpper());
-            return Vector512.Create(lower, upper);
-        }
-
-        Vector512<float> sign = vector & Vector512.Create(-0F);
-        Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608F);
-
-        val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
-        return val_2p23_f32 | sign;
-    }
+          // imm8 = 0b1000:
+          //   imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers)
+          //   imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions)
+          => Avx512F.RoundScale(vector, 0b0000_1000);
 
     /// <summary>
     /// Performs a multiplication and an addition of the <see cref="Vector512{Single}"/>.
@@ -122,21 +85,7 @@ public static Vector512<float> MultiplyAdd(
         Vector512<float> va,
         Vector512<float> vm0,
         Vector512<float> vm1)
-    {
-        if (Avx512F.IsSupported)
-        {
-            return Avx512F.FusedMultiplyAdd(vm0, vm1, va);
-        }
-
-        if (Fma.IsSupported)
-        {
-            Vector256<float> lower = Fma.MultiplyAdd(vm0.GetLower(), vm1.GetLower(), va.GetLower());
-            Vector256<float> upper = Fma.MultiplyAdd(vm0.GetUpper(), vm1.GetUpper(), va.GetUpper());
-            return Vector512.Create(lower, upper);
-        }
-
-        return va + (vm0 * vm1);
-    }
+        => Avx512F.FusedMultiplyAdd(vm0, vm1, va);
 
     /// <summary>
     /// Restricts a vector between a minimum and a maximum value.
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

Original file line number	Diff line number	Diff line change
`@@ -46,9 +46,7 @@ public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<by`
`46`	`46`	`return Avx2.Shuffle(vector, indices);`
`47`	`47`	`}`
`48`	`48`
`49`		`- return Vector256.Create(`
`50`		`- Vector128_.ShuffleNative(vector.GetLower(), indices.GetLower()),`
`51`		`- Vector128_.ShuffleNative(vector.GetUpper(), indices.GetUpper()));`
	`49`	`+ return Vector256.Shuffle(vector, indices);`
`52`	`50`	`}`
`53`	`51`
`54`	`52`	`/// <summary>`