Skip to content

Commit 2c30c90

Browse files
Fix pre/unpremultiply methods
1 parent 75121b2 commit 2c30c90

File tree

9 files changed

+235
-205
lines changed

9 files changed

+235
-205
lines changed

.gitattributes

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,19 @@
6464
# Set explicit file behavior to:
6565
# treat as text
6666
# normalize to Unix-style line endings and
67-
# use a union merge when resoling conflicts
67+
# use a union merge when resolving conflicts
6868
###############################################################################
6969
*.csproj text eol=lf merge=union
7070
*.dbproj text eol=lf merge=union
7171
*.fsproj text eol=lf merge=union
7272
*.ncrunchproject text eol=lf merge=union
7373
*.vbproj text eol=lf merge=union
74+
*.shproj text eol=lf merge=union
7475
###############################################################################
7576
# Set explicit file behavior to:
7677
# treat as text
7778
# normalize to Windows-style line endings and
78-
# use a union merge when resoling conflicts
79+
# use a union merge when resolving conflicts
7980
###############################################################################
8081
*.sln text eol=crlf merge=union
8182
###############################################################################

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 87 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -474,21 +474,8 @@ private static void ClampImpl<T>(Span<T> span, T min, T max)
474474
[MethodImpl(MethodImplOptions.AggressiveInlining)]
475475
public static void Premultiply(ref Vector4 source)
476476
{
477-
float w = source.W;
478-
source *= w;
479-
source.W = w;
480-
}
481-
482-
/// <summary>
483-
/// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
484-
/// </summary>
485-
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
486-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
487-
public static void UnPremultiply(ref Vector4 source)
488-
{
489-
float w = source.W;
490-
source /= w;
491-
source.W = w;
477+
Vector4 alpha = PermuteW(source);
478+
source = WithW(source * alpha, alpha);
492479
}
493480

494481
/// <summary>
@@ -498,7 +485,7 @@ public static void UnPremultiply(ref Vector4 source)
498485
[MethodImpl(MethodImplOptions.AggressiveInlining)]
499486
public static void Premultiply(Span<Vector4> vectors)
500487
{
501-
if (Avx2.IsSupported && vectors.Length >= 2)
488+
if (Avx.IsSupported && vectors.Length >= 2)
502489
{
503490
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
504491
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
@@ -507,7 +494,7 @@ public static void Premultiply(Span<Vector4> vectors)
507494
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
508495
{
509496
Vector256<float> source = vectorsBase;
510-
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
497+
Vector256<float> multiply = Avx.Permute(source, ShuffleAlphaControl);
511498
vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
512499
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
513500
}
@@ -532,24 +519,47 @@ public static void Premultiply(Span<Vector4> vectors)
532519
}
533520
}
534521

522+
/// <summary>
523+
/// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
524+
/// </summary>
525+
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
526+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
527+
public static void UnPremultiply(ref Vector4 source)
528+
{
529+
Vector4 alpha = PermuteW(source);
530+
UnPremultiply(ref source, alpha);
531+
}
532+
533+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
534+
public static void UnPremultiply(ref Vector4 source, Vector4 alpha)
535+
{
536+
if (alpha == Vector4.Zero)
537+
{
538+
return;
539+
}
540+
541+
source = WithW(source / alpha, alpha);
542+
}
543+
535544
/// <summary>
536545
/// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
537546
/// </summary>
538547
/// <param name="vectors">The span of vectors</param>
539548
[MethodImpl(MethodImplOptions.AggressiveInlining)]
540549
public static void UnPremultiply(Span<Vector4> vectors)
541550
{
542-
if (Avx2.IsSupported && vectors.Length >= 2)
551+
if (Avx.IsSupported && vectors.Length >= 2)
543552
{
544553
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
545554
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
546555
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
556+
Vector256<float> epsilon = Vector256.Create(Constants.Epsilon);
547557

548558
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
549559
{
550560
Vector256<float> source = vectorsBase;
551-
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
552-
vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
561+
Vector256<float> alpha = Avx.Permute(source, ShuffleAlphaControl);
562+
vectorsBase = UnPremultiply(source, alpha);
553563
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
554564
}
555565

@@ -573,6 +583,61 @@ public static void UnPremultiply(Span<Vector4> vectors)
573583
}
574584
}
575585

586+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
587+
public static Vector256<float> UnPremultiply(Vector256<float> source, Vector256<float> alpha)
588+
{
589+
// Check if alpha is zero to avoid division by zero
590+
Vector256<float> zeroMask = Avx.CompareEqual(alpha, Vector256<float>.Zero);
591+
592+
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
593+
Vector256<float> result = Avx.BlendVariable(Avx.Divide(source, alpha), source, zeroMask);
594+
595+
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
596+
return Avx.Blend(result, alpha, BlendAlphaControl);
597+
}
598+
599+
/// <summary>
600+
/// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
601+
/// </summary>
602+
/// <param name="value">The vector.</param>
603+
/// <returns>The <see cref="Vector4"/>.</returns>
604+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
605+
public static Vector4 PermuteW(Vector4 value)
606+
{
607+
if (Sse.IsSupported)
608+
{
609+
return Sse.Shuffle(value.AsVector128(), value.AsVector128(), 0b11111111).AsVector4();
610+
}
611+
612+
return new(value.W);
613+
}
614+
615+
/// <summary>
616+
/// Sets the W component of the given vector <paramref name="value"/> to the given value from <paramref name="w"/>.
617+
/// </summary>
618+
/// <param name="value">The vector to set.</param>
619+
/// <param name="w">The vector containing the W value.</param>
620+
/// <returns>The <see cref="Vector4"/>.</returns>
621+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
622+
public static Vector4 WithW(Vector4 value, Vector4 w)
623+
{
624+
if (Sse41.IsSupported)
625+
{
626+
return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4();
627+
}
628+
629+
if (Sse.IsSupported)
630+
{
631+
// Create tmp as <w[3], w[0], value[2], value[0]>
632+
// Then return <value[0], value[1], tmp[2], tmp[0]> (which is <value[0], value[1], value[2], w[3]>)
633+
Vector128<float> tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11);
634+
return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4();
635+
}
636+
637+
value.W = w.W;
638+
return value;
639+
}
640+
576641
/// <summary>
577642
/// Calculates the cube pow of all the XYZ channels of the input vectors.
578643
/// </summary>
@@ -586,7 +651,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
586651
while (Unsafe.IsAddressLessThan(ref baseRef, ref endRef))
587652
{
588653
Vector4 v = baseRef;
589-
float a = v.W;
654+
Vector4 a = PermuteW(v);
590655

591656
// Fast path for the default gamma exposure, which is 3. In this case we can skip
592657
// calling Math.Pow 3 times (one per component), as the method is an internal call and
@@ -595,7 +660,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
595660
// back to the target index in the temporary span. The whole iteration will get completely
596661
// inlined and traslated into vectorized instructions, with much better performance.
597662
v = v * v * v;
598-
v.W = a;
663+
v = WithW(v, a);
599664

600665
baseRef = v;
601666
baseRef = ref Unsafe.Add(ref baseRef, 1);

0 commit comments

Comments
 (0)