@@ -474,21 +474,10 @@ private static void ClampImpl<T>(Span<T> span, T min, T max)
474
474
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
475
475
public static void Premultiply ( ref Vector4 source )
476
476
{
477
- float w = source . W ;
478
- source *= w ;
479
- source . W = w ;
480
- }
481
-
482
- /// <summary>
483
- /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
484
- /// </summary>
485
- /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
486
- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
487
- public static void UnPremultiply ( ref Vector4 source )
488
- {
489
- float w = source . W ;
490
- source /= w ;
491
- source . W = w ;
477
+ // Load into a local variable to prevent accessing the source from memory multiple times.
478
+ Vector4 src = source ;
479
+ Vector4 alpha = PermuteW ( src ) ;
480
+ source = WithW ( src * alpha , alpha ) ;
492
481
}
493
482
494
483
/// <summary>
@@ -498,7 +487,7 @@ public static void UnPremultiply(ref Vector4 source)
498
487
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
499
488
public static void Premultiply ( Span < Vector4 > vectors )
500
489
{
501
- if ( Avx2 . IsSupported && vectors . Length >= 2 )
490
+ if ( Avx . IsSupported && vectors . Length >= 2 )
502
491
{
503
492
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
504
493
ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
@@ -507,8 +496,8 @@ public static void Premultiply(Span<Vector4> vectors)
507
496
while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
508
497
{
509
498
Vector256 < float > source = vectorsBase ;
510
- Vector256 < float > multiply = Avx . Shuffle ( source , source , ShuffleAlphaControl ) ;
511
- vectorsBase = Avx . Blend ( Avx . Multiply ( source , multiply ) , source , BlendAlphaControl ) ;
499
+ Vector256 < float > alpha = Avx . Permute ( source , ShuffleAlphaControl ) ;
500
+ vectorsBase = Avx . Blend ( Avx . Multiply ( source , alpha ) , source , BlendAlphaControl ) ;
512
501
vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
513
502
}
514
503
@@ -532,24 +521,49 @@ public static void Premultiply(Span<Vector4> vectors)
532
521
}
533
522
}
534
523
524
+ /// <summary>
525
+ /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
526
+ /// </summary>
527
+ /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
528
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
529
+ public static void UnPremultiply ( ref Vector4 source )
530
+ {
531
+ Vector4 alpha = PermuteW ( source ) ;
532
+ UnPremultiply ( ref source , alpha ) ;
533
+ }
534
+
535
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
536
+ public static void UnPremultiply ( ref Vector4 source , Vector4 alpha )
537
+ {
538
+ if ( alpha == Vector4 . Zero )
539
+ {
540
+ return ;
541
+ }
542
+
543
+ // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
544
+ // Blend the result with the alpha vector to ensure that the alpha component is unchanged
545
+ source = WithW ( source / alpha , alpha ) ;
546
+ }
547
+
535
548
/// <summary>
536
549
/// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
537
550
/// </summary>
538
551
/// <param name="vectors">The span of vectors</param>
539
552
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
540
553
public static void UnPremultiply ( Span < Vector4 > vectors )
541
554
{
542
- if ( Avx2 . IsSupported && vectors . Length >= 2 )
555
+ if ( Avx . IsSupported && vectors . Length >= 2 )
543
556
{
544
557
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
545
558
ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
546
559
ref Vector256 < float > vectorsLast = ref Unsafe . Add ( ref vectorsBase , ( IntPtr ) ( ( uint ) vectors . Length / 2u ) ) ;
560
+ Vector256 < float > epsilon = Vector256 . Create ( Constants . Epsilon ) ;
547
561
548
562
while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
549
563
{
550
564
Vector256 < float > source = vectorsBase ;
551
- Vector256 < float > multiply = Avx . Shuffle ( source , source , ShuffleAlphaControl ) ;
552
- vectorsBase = Avx . Blend ( Avx . Divide ( source , multiply ) , source , BlendAlphaControl ) ;
565
+ Vector256 < float > alpha = Avx . Permute ( source , ShuffleAlphaControl ) ;
566
+ vectorsBase = UnPremultiply ( source , alpha ) ;
553
567
vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
554
568
}
555
569
@@ -573,6 +587,61 @@ public static void UnPremultiply(Span<Vector4> vectors)
573
587
}
574
588
}
575
589
590
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
591
+ public static Vector256 < float > UnPremultiply ( Vector256 < float > source , Vector256 < float > alpha )
592
+ {
593
+ // Check if alpha is zero to avoid division by zero
594
+ Vector256 < float > zeroMask = Avx . CompareEqual ( alpha , Vector256 < float > . Zero ) ;
595
+
596
+ // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
597
+ Vector256 < float > result = Avx . BlendVariable ( Avx . Divide ( source , alpha ) , source , zeroMask ) ;
598
+
599
+ // Blend the result with the alpha vector to ensure that the alpha component is unchanged
600
+ return Avx . Blend ( result , alpha , BlendAlphaControl ) ;
601
+ }
602
+
603
+ /// <summary>
604
+ /// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
605
+ /// </summary>
606
+ /// <param name="value">The vector.</param>
607
+ /// <returns>The <see cref="Vector4"/>.</returns>
608
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
609
+ public static Vector4 PermuteW ( Vector4 value )
610
+ {
611
+ if ( Sse . IsSupported )
612
+ {
613
+ return Sse . Shuffle ( value . AsVector128 ( ) , value . AsVector128 ( ) , ShuffleAlphaControl ) . AsVector4 ( ) ;
614
+ }
615
+
616
+ return new ( value . W ) ;
617
+ }
618
+
619
+ /// <summary>
620
+ /// Sets the W component of the given vector <paramref name="value"/> to the given value from <paramref name="w"/>.
621
+ /// </summary>
622
+ /// <param name="value">The vector to set.</param>
623
+ /// <param name="w">The vector containing the W value.</param>
624
+ /// <returns>The <see cref="Vector4"/>.</returns>
625
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
626
+ public static Vector4 WithW ( Vector4 value , Vector4 w )
627
+ {
628
+ if ( Sse41 . IsSupported )
629
+ {
630
+ return Sse41 . Insert ( value . AsVector128 ( ) , w . AsVector128 ( ) , 0b11_11_0000 ) . AsVector4 ( ) ;
631
+ }
632
+
633
+ if ( Sse . IsSupported )
634
+ {
635
+ // Create tmp as <w[3], w[0], value[2], value[0]>
636
+ // Then return <value[0], value[1], tmp[2], tmp[0]> (which is <value[0], value[1], value[2], w[3]>)
637
+ Vector128 < float > tmp = Sse . Shuffle ( w . AsVector128 ( ) , value . AsVector128 ( ) , 0b00_10_00_11 ) ;
638
+ return Sse . Shuffle ( value . AsVector128 ( ) , tmp , 0b00_10_01_00 ) . AsVector4 ( ) ;
639
+ }
640
+
641
+ value . W = w . W ;
642
+ return value ;
643
+ }
644
+
576
645
/// <summary>
577
646
/// Calculates the cube pow of all the XYZ channels of the input vectors.
578
647
/// </summary>
@@ -586,7 +655,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
586
655
while ( Unsafe . IsAddressLessThan ( ref baseRef , ref endRef ) )
587
656
{
588
657
Vector4 v = baseRef ;
589
- float a = v . W ;
658
+ Vector4 a = PermuteW ( v ) ;
590
659
591
660
// Fast path for the default gamma exposure, which is 3. In this case we can skip
592
661
// calling Math.Pow 3 times (one per component), as the method is an internal call and
@@ -595,7 +664,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
595
664
// back to the target index in the temporary span. The whole iteration will get completely
596
665
// inlined and traslated into vectorized instructions, with much better performance.
597
666
v = v * v * v ;
598
- v . W = a ;
667
+ v = WithW ( v , a ) ;
599
668
600
669
baseRef = v ;
601
670
baseRef = ref Unsafe . Add ( ref baseRef , 1 ) ;
0 commit comments