Skip to content

Commit cac0960

Browse files
Merge pull request #2369 from SixLabors/js/premultiply
Fix and normalize Vector4 UnPremultiply
2 parents a2429cc + aab9955 commit cac0960

17 files changed

+276
-244
lines changed

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 92 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -474,21 +474,10 @@ private static void ClampImpl<T>(Span<T> span, T min, T max)
474474
[MethodImpl(MethodImplOptions.AggressiveInlining)]
475475
public static void Premultiply(ref Vector4 source)
476476
{
477-
float w = source.W;
478-
source *= w;
479-
source.W = w;
480-
}
481-
482-
/// <summary>
483-
/// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
484-
/// </summary>
485-
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
486-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
487-
public static void UnPremultiply(ref Vector4 source)
488-
{
489-
float w = source.W;
490-
source /= w;
491-
source.W = w;
477+
// Load into a local variable to prevent accessing the source from memory multiple times.
478+
Vector4 src = source;
479+
Vector4 alpha = PermuteW(src);
480+
source = WithW(src * alpha, alpha);
492481
}
493482

494483
/// <summary>
@@ -498,7 +487,7 @@ public static void UnPremultiply(ref Vector4 source)
498487
[MethodImpl(MethodImplOptions.AggressiveInlining)]
499488
public static void Premultiply(Span<Vector4> vectors)
500489
{
501-
if (Avx2.IsSupported && vectors.Length >= 2)
490+
if (Avx.IsSupported && vectors.Length >= 2)
502491
{
503492
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
504493
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
@@ -507,8 +496,8 @@ public static void Premultiply(Span<Vector4> vectors)
507496
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
508497
{
509498
Vector256<float> source = vectorsBase;
510-
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
511-
vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
499+
Vector256<float> alpha = Avx.Permute(source, ShuffleAlphaControl);
500+
vectorsBase = Avx.Blend(Avx.Multiply(source, alpha), source, BlendAlphaControl);
512501
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
513502
}
514503

@@ -532,24 +521,49 @@ public static void Premultiply(Span<Vector4> vectors)
532521
}
533522
}
534523

524+
/// <summary>
525+
/// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
526+
/// </summary>
527+
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
528+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
529+
public static void UnPremultiply(ref Vector4 source)
530+
{
531+
Vector4 alpha = PermuteW(source);
532+
UnPremultiply(ref source, alpha);
533+
}
534+
535+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
536+
public static void UnPremultiply(ref Vector4 source, Vector4 alpha)
537+
{
538+
if (alpha == Vector4.Zero)
539+
{
540+
return;
541+
}
542+
543+
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
544+
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
545+
source = WithW(source / alpha, alpha);
546+
}
547+
535548
/// <summary>
536549
/// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
537550
/// </summary>
538551
/// <param name="vectors">The span of vectors</param>
539552
[MethodImpl(MethodImplOptions.AggressiveInlining)]
540553
public static void UnPremultiply(Span<Vector4> vectors)
541554
{
542-
if (Avx2.IsSupported && vectors.Length >= 2)
555+
if (Avx.IsSupported && vectors.Length >= 2)
543556
{
544557
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
545558
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
546559
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
560+
Vector256<float> epsilon = Vector256.Create(Constants.Epsilon);
547561

548562
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
549563
{
550564
Vector256<float> source = vectorsBase;
551-
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
552-
vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
565+
Vector256<float> alpha = Avx.Permute(source, ShuffleAlphaControl);
566+
vectorsBase = UnPremultiply(source, alpha);
553567
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
554568
}
555569

@@ -573,6 +587,61 @@ public static void UnPremultiply(Span<Vector4> vectors)
573587
}
574588
}
575589

590+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
591+
public static Vector256<float> UnPremultiply(Vector256<float> source, Vector256<float> alpha)
592+
{
593+
// Check if alpha is zero to avoid division by zero
594+
Vector256<float> zeroMask = Avx.CompareEqual(alpha, Vector256<float>.Zero);
595+
596+
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
597+
Vector256<float> result = Avx.BlendVariable(Avx.Divide(source, alpha), source, zeroMask);
598+
599+
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
600+
return Avx.Blend(result, alpha, BlendAlphaControl);
601+
}
602+
603+
/// <summary>
604+
/// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
605+
/// </summary>
606+
/// <param name="value">The vector.</param>
607+
/// <returns>The <see cref="Vector4"/>.</returns>
608+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
609+
public static Vector4 PermuteW(Vector4 value)
610+
{
611+
if (Sse.IsSupported)
612+
{
613+
return Sse.Shuffle(value.AsVector128(), value.AsVector128(), ShuffleAlphaControl).AsVector4();
614+
}
615+
616+
return new(value.W);
617+
}
618+
619+
/// <summary>
620+
/// Sets the W component of the given vector <paramref name="value"/> to the given value from <paramref name="w"/>.
621+
/// </summary>
622+
/// <param name="value">The vector to set.</param>
623+
/// <param name="w">The vector containing the W value.</param>
624+
/// <returns>The <see cref="Vector4"/>.</returns>
625+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
626+
public static Vector4 WithW(Vector4 value, Vector4 w)
627+
{
628+
if (Sse41.IsSupported)
629+
{
630+
return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4();
631+
}
632+
633+
if (Sse.IsSupported)
634+
{
635+
// Create tmp as <w[3], w[0], value[2], value[0]>
636+
// Then return <value[0], value[1], tmp[2], tmp[0]> (which is <value[0], value[1], value[2], w[3]>)
637+
Vector128<float> tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11);
638+
return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4();
639+
}
640+
641+
value.W = w.W;
642+
return value;
643+
}
644+
576645
/// <summary>
577646
/// Calculates the cube pow of all the XYZ channels of the input vectors.
578647
/// </summary>
@@ -586,7 +655,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
586655
while (Unsafe.IsAddressLessThan(ref baseRef, ref endRef))
587656
{
588657
Vector4 v = baseRef;
589-
float a = v.W;
658+
Vector4 a = PermuteW(v);
590659

591660
// Fast path for the default gamma exposure, which is 3. In this case we can skip
592661
// calling Math.Pow 3 times (one per component), as the method is an internal call and
@@ -595,7 +664,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
595664
// back to the target index in the temporary span. The whole iteration will get completely
596665
// inlined and traslated into vectorized instructions, with much better performance.
597666
v = v * v * v;
598-
v.W = a;
667+
v = WithW(v, a);
599668

600669
baseRef = v;
601670
baseRef = ref Unsafe.Add(ref baseRef, 1);

src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@ internal interface IShuffle4 : IComponentShuffle
4747
internal readonly struct DefaultShuffle4 : IShuffle4
4848
{
4949
public DefaultShuffle4(byte control)
50-
{
51-
DebugGuard.MustBeBetweenOrEqualTo<byte>(control, 0, 3, nameof(control));
52-
this.Control = control;
53-
}
50+
=> this.Control = control;
5451

5552
public byte Control { get; }
5653

src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@ internal interface IPad3Shuffle4 : IComponentShuffle
1515
internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
1616
{
1717
public DefaultPad3Shuffle4(byte control)
18-
{
19-
DebugGuard.MustBeBetweenOrEqualTo<byte>(control, 0, 3, nameof(control));
20-
this.Control = control;
21-
}
18+
=> this.Control = control;
2219

2320
public byte Control { get; }
2421

src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@ internal interface IShuffle3 : IComponentShuffle
1515
internal readonly struct DefaultShuffle3 : IShuffle3
1616
{
1717
public DefaultShuffle3(byte control)
18-
{
19-
DebugGuard.MustBeBetweenOrEqualTo<byte>(control, 0, 3, nameof(control));
20-
this.Control = control;
21-
}
18+
=> this.Control = control;
2219

2320
public byte Control { get; }
2421

src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@ internal interface IShuffle4Slice3 : IComponentShuffle
1515
internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
1616
{
1717
public DefaultShuffle4Slice3(byte control)
18-
{
19-
DebugGuard.MustBeBetweenOrEqualTo<byte>(control, 0, 3, nameof(control));
20-
this.Control = control;
21-
}
18+
=> this.Control = control;
2219

2320
public byte Control { get; }
2421

0 commit comments

Comments
 (0)