Skip to content

Commit a666372

Browse files
Use FMA where possible.
1 parent 8ffec30 commit a666372

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ private static void Shuffle4Slice3(
533533

534534
/// <summary>
535535
/// Performs a multiplication and an addition of the <see cref="Vector256{Single}"/>.
536+
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
536537
/// </summary>
537538
/// <remarks>ret = (vm0 * vm1) + va</remarks>
538539
/// <param name="va">The vector to add to the intermediate result.</param>
@@ -555,6 +556,7 @@ public static Vector256<float> MultiplyAdd(
555556

556557
/// <summary>
557558
/// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>.
559+
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
558560
/// </summary>
559561
/// <remarks>ret = (vm0 * vm1) - vs</remarks>
560562
/// <param name="vs">The vector to subtract from the intermediate result.</param>
@@ -575,6 +577,28 @@ public static Vector256<float> MultiplySubtract(
575577
return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
576578
}
577579

580+
/// <summary>
581+
/// Performs a multiplication and a negated addition of the <see cref="Vector256{Single}"/>.
582+
/// </summary>
583+
/// <remarks>ret = c - (a * b)</remarks>
584+
/// <param name="a">The first vector to multiply.</param>
585+
/// <param name="b">The second vector to multiply.</param>
586+
/// <param name="c">The vector to add negated to the intermediate result.</param>
587+
/// <returns>The <see cref="Vector256{T}"/>.</returns>
588+
[MethodImpl(InliningOptions.ShortMethod)]
589+
public static Vector256<float> MultiplyAddNegated(
590+
in Vector256<float> a,
591+
in Vector256<float> b,
592+
in Vector256<float> c)
593+
{
594+
if (Fma.IsSupported)
595+
{
596+
return Fma.MultiplyAddNegated(a, b, c);
597+
}
598+
599+
return Avx.Subtract(c, Avx.Multiply(a, b));
600+
}
601+
578602
/// <summary>
579603
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
580604
/// </summary>

src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public static Vector4 Screen(Vector4 backdrop, Vector4 source)
124124
public static Vector256<float> Screen(Vector256<float> backdrop, Vector256<float> source)
125125
{
126126
Vector256<float> vOne = Vector256.Create(1F);
127-
return Avx.Subtract(vOne, Avx.Multiply(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source)));
127+
return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne);
128128
}
129129

130130
/// <summary>
@@ -244,10 +244,10 @@ private static float OverlayValueFunction(float backdrop, float source)
244244
public static Vector256<float> OverlayValueFunction(Vector256<float> backdrop, Vector256<float> source)
245245
{
246246
Vector256<float> vOne = Vector256.Create(1F);
247-
Vector256<float> vTwo = Vector256.Create(2F);
248247
Vector256<float> left = Avx.Multiply(Avx.Add(backdrop, backdrop), source);
249-
Vector256<float> right = Avx.Subtract(vOne, Avx.Multiply(Avx.Multiply(vTwo, Avx.Subtract(vOne, source)), Avx.Subtract(vOne, backdrop)));
250248

249+
Vector256<float> vOneMinusSource = Avx.Subtract(vOne, source);
250+
Vector256<float> right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne);
251251
Vector256<float> cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F));
252252
return Avx.BlendVariable(left, right, cmp);
253253
}
@@ -430,9 +430,7 @@ public static Vector4 Out(Vector4 destination, Vector4 source)
430430
public static Vector256<float> Out(Vector256<float> destination, Vector256<float> source)
431431
{
432432
// calculate alpha
433-
Vector256<float> sW = Avx.Shuffle(source, source, ShuffleAlphaControl);
434-
Vector256<float> dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl);
435-
Vector256<float> alpha = Avx.Multiply(Avx.Subtract(Vector256.Create(1F), dW), sW);
433+
Vector256<float> alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl);
436434

437435
// premultiply
438436
Vector256<float> color = Avx.Multiply(source, alpha);

0 commit comments

Comments
 (0)