Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1097,4 +1097,83 @@ public static nuint Vector512Count<TVector>(this Span<float> span)
public static nuint Vector512Count<TVector>(int length)
where TVector : struct
=> (uint)length / (uint)Vector512<TVector>.Count;

/// <summary>
/// Normalizes the values in a given <see cref="Span{T}"/>.
/// </summary>
/// <param name="span">The sequence of <see cref="float"/> values to normalize.</param>
/// <param name="sum">The sum of the values in <paramref name="span"/>.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Normalize(Span<float> span, float sum)
{
if (Vector512.IsHardwareAccelerated)
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15);
Vector512<float> sum512 = Vector512.Create(sum);

while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
Unsafe.As<float, Vector512<float>>(ref startRef) /= sum512;
startRef = ref Unsafe.Add(ref startRef, (nuint)16);
}

if ((span.Length & 15) >= 8)
{
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum512.GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
}

if ((span.Length & 7) >= 4)
{
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum512.GetLower().GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
}

endRef = ref Unsafe.Add(ref startRef, span.Length & 3);

while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
startRef /= sum;
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
}
}
else if (Vector256.IsHardwareAccelerated)
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);
Vector256<float> sum256 = Vector256.Create(sum);

while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum256;
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
}

if ((span.Length & 7) >= 4)
{
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum256.GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
}

endRef = ref Unsafe.Add(ref startRef, span.Length & 3);

while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
startRef /= sum;
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
}
}
else
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length);

while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
startRef /= sum;
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
}
}
}
}
38 changes: 38 additions & 0 deletions src/ImageSharp/Common/Helpers/Vector128Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,44 @@ public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128
return default;
}

/// <summary>
/// Performs a multiply-add operation on three vectors, where each element of the resulting vector is the
/// product of corresponding elements in <paramref name="a"/> and <paramref name="b"/> added to the
/// corresponding element in <paramref name="c"/>.
/// If the CPU supports FMA (Fused Multiply-Add) instructions, the operation is performed as a single
/// fused operation for better performance and precision.
/// </summary>
/// <param name="a">The first vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="b">The second vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="c">The vector of single-precision floating-point numbers to be added to the product of
/// <paramref name="a"/> and <paramref name="b"/>.</param>
/// <returns>
/// A <see cref="Vector128{Single}"/> where each element is the result of multiplying the corresponding elements
/// of <paramref name="a"/> and <paramref name="b"/>, and then adding the corresponding element from <paramref name="c"/>.
/// </returns>
/// <remarks>
/// If the FMA (Fused Multiply-Add) instruction set is supported by the CPU, the operation is performed using
/// <see cref="Fma.MultiplyAdd(Vector128{float}, Vector128{float}, Vector128{float})"/>. This approach can result
/// in slightly different results compared to performing the multiplication and addition separately due to
/// differences in how floating-point
/// rounding is handled.
/// <para>
/// If FMA is not supported, the operation is performed as a separate multiplication and addition. This might lead
/// to a minor difference in precision compared to the fused operation, particularly in cases where numerical accuracy
/// is critical.
/// </para>
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MultiplyAddEstimate(Vector128<float> a, Vector128<float> b, Vector128<float> c)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(a, b, c);
}

return (a * b) + c;
}

[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}
38 changes: 38 additions & 0 deletions src/ImageSharp/Common/Helpers/Vector256Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,44 @@ public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
return Vector256.ConvertToInt32(val_2p23_f32 | sign);
}

/// <summary>
/// Performs a multiply-add operation on three vectors, where each element of the resulting vector is the
/// product of corresponding elements in <paramref name="a"/> and <paramref name="b"/> added to the
/// corresponding element in <paramref name="c"/>.
/// If the CPU supports FMA (Fused Multiply-Add) instructions, the operation is performed as a single
/// fused operation for better performance and precision.
/// </summary>
/// <param name="a">The first vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="b">The second vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="c">The vector of single-precision floating-point numbers to be added to the product of
/// <paramref name="a"/> and <paramref name="b"/>.</param>
/// <returns>
/// A <see cref="Vector256{Single}"/> where each element is the result of multiplying the corresponding elements
/// of <paramref name="a"/> and <paramref name="b"/>, and then adding the corresponding element from <paramref name="c"/>.
/// </returns>
/// <remarks>
/// If the FMA (Fused Multiply-Add) instruction set is supported by the CPU, the operation is performed using
/// <see cref="Fma.MultiplyAdd(Vector256{float}, Vector256{float}, Vector256{float})"/>. This approach can result
/// in slightly different results compared to performing the multiplication and addition separately due to
/// differences in how floating-point
/// rounding is handled.
/// <para>
/// If FMA is not supported, the operation is performed as a separate multiplication and addition. This might lead
/// to a minor difference in precision compared to the fused operation, particularly in cases where numerical accuracy
/// is critical.
/// </para>
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> MultiplyAddEstimate(Vector256<float> a, Vector256<float> b, Vector256<float> c)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(a, b, c);
}

return (a * b) + c;
}

[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}
33 changes: 33 additions & 0 deletions src/ImageSharp/Common/Helpers/Vector512Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,39 @@ public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
return Vector512.ConvertToInt32(val_2p23_f32 | sign);
}

/// <summary>
/// Performs a multiply-add operation on three vectors, where each element of the resulting vector is the
/// product of corresponding elements in <paramref name="a"/> and <paramref name="b"/> added to the
/// corresponding element in <paramref name="c"/>.
/// If the CPU supports FMA (Fused Multiply-Add) instructions, the operation is performed as a single
/// fused operation for better performance and precision.
/// </summary>
/// <param name="a">The first vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="b">The second vector of single-precision floating-point numbers to be multiplied.</param>
/// <param name="c">The vector of single-precision floating-point numbers to be added to the product of
/// <paramref name="a"/> and <paramref name="b"/>.</param>
/// <returns>
/// A <see cref="Vector512{Single}"/> where each element is the result of multiplying the corresponding elements
/// of <paramref name="a"/> and <paramref name="b"/>, and then adding the corresponding element from <paramref name="c"/>.
/// </returns>
/// <remarks>
/// If the FMA (Fused Multiply-Add) instruction set is supported by the CPU, the operation is performed using
/// <see cref="Fma.MultiplyAdd(Vector256{float}, Vector256{float}, Vector256{float})"/> against the upper and lower
/// buts. This approach can result in slightly different results compared to performing the multiplication and
/// addition separately due to differences in how floating-point rounding is handled.
/// <para>
/// If FMA is not supported, the operation is performed as a separate multiplication and addition. This might lead
/// to a minor difference in precision compared to the fused operation, particularly in cases where numerical accuracy
/// is critical.
/// </para>
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<float> MultiplyAddEstimate(Vector512<float> a, Vector512<float> b, Vector512<float> c)

// Don't actually use FMA as it requires many more instruction to extract the
// upper and lower parts of the vector and then recombine them.
=> (a + b) * c;

[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}
Loading