@@ -474,21 +474,8 @@ private static void ClampImpl<T>(Span<T> span, T min, T max)
474
474
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
475
475
public static void Premultiply ( ref Vector4 source )
476
476
{
477
- float w = source . W ;
478
- source *= w ;
479
- source . W = w ;
480
- }
481
-
482
- /// <summary>
483
- /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
484
- /// </summary>
485
- /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
486
- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
487
- public static void UnPremultiply ( ref Vector4 source )
488
- {
489
- float w = source . W ;
490
- source /= w ;
491
- source . W = w ;
477
+ Vector4 alpha = PermuteW ( source ) ;
478
+ source = WithW ( source * alpha , alpha ) ;
492
479
}
493
480
494
481
/// <summary>
@@ -498,7 +485,7 @@ public static void UnPremultiply(ref Vector4 source)
498
485
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
499
486
public static void Premultiply ( Span < Vector4 > vectors )
500
487
{
501
- if ( Avx2 . IsSupported && vectors . Length >= 2 )
488
+ if ( Avx . IsSupported && vectors . Length >= 2 )
502
489
{
503
490
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
504
491
ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
@@ -507,7 +494,7 @@ public static void Premultiply(Span<Vector4> vectors)
507
494
while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
508
495
{
509
496
Vector256 < float > source = vectorsBase ;
510
- Vector256 < float > multiply = Avx . Shuffle ( source , source , ShuffleAlphaControl ) ;
497
+ Vector256 < float > multiply = Avx . Permute ( source , ShuffleAlphaControl ) ;
511
498
vectorsBase = Avx . Blend ( Avx . Multiply ( source , multiply ) , source , BlendAlphaControl ) ;
512
499
vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
513
500
}
@@ -532,24 +519,47 @@ public static void Premultiply(Span<Vector4> vectors)
532
519
}
533
520
}
534
521
522
+ /// <summary>
523
+ /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
524
+ /// </summary>
525
+ /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
526
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
527
+ public static void UnPremultiply ( ref Vector4 source )
528
+ {
529
+ Vector4 alpha = PermuteW ( source ) ;
530
+ UnPremultiply ( ref source , alpha ) ;
531
+ }
532
+
533
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
534
+ public static void UnPremultiply ( ref Vector4 source , Vector4 alpha )
535
+ {
536
+ if ( alpha == Vector4 . Zero )
537
+ {
538
+ return ;
539
+ }
540
+
541
+ source = WithW ( source / alpha , alpha ) ;
542
+ }
543
+
535
544
/// <summary>
536
545
/// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
537
546
/// </summary>
538
547
/// <param name="vectors">The span of vectors</param>
539
548
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
540
549
public static void UnPremultiply ( Span < Vector4 > vectors )
541
550
{
542
- if ( Avx2 . IsSupported && vectors . Length >= 2 )
551
+ if ( Avx . IsSupported && vectors . Length >= 2 )
543
552
{
544
553
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
545
554
ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
546
555
ref Vector256 < float > vectorsLast = ref Unsafe . Add ( ref vectorsBase , ( IntPtr ) ( ( uint ) vectors . Length / 2u ) ) ;
556
+ Vector256 < float > epsilon = Vector256 . Create ( Constants . Epsilon ) ;
547
557
548
558
while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
549
559
{
550
560
Vector256 < float > source = vectorsBase ;
551
- Vector256 < float > multiply = Avx . Shuffle ( source , source , ShuffleAlphaControl ) ;
552
- vectorsBase = Avx . Blend ( Avx . Divide ( source , multiply ) , source , BlendAlphaControl ) ;
561
+ Vector256 < float > alpha = Avx . Permute ( source , ShuffleAlphaControl ) ;
562
+ vectorsBase = UnPremultiply ( source , alpha ) ;
553
563
vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
554
564
}
555
565
@@ -573,6 +583,61 @@ public static void UnPremultiply(Span<Vector4> vectors)
573
583
}
574
584
}
575
585
586
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
587
+ public static Vector256 < float > UnPremultiply ( Vector256 < float > source , Vector256 < float > alpha )
588
+ {
589
+ // Check if alpha is zero to avoid division by zero
590
+ Vector256 < float > zeroMask = Avx . CompareEqual ( alpha , Vector256 < float > . Zero ) ;
591
+
592
+ // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
593
+ Vector256 < float > result = Avx . BlendVariable ( Avx . Divide ( source , alpha ) , source , zeroMask ) ;
594
+
595
+ // Blend the result with the alpha vector to ensure that the alpha component is unchanged
596
+ return Avx . Blend ( result , alpha , BlendAlphaControl ) ;
597
+ }
598
+
599
+ /// <summary>
600
+ /// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
601
+ /// </summary>
602
+ /// <param name="value">The vector.</param>
603
+ /// <returns>The <see cref="Vector4"/>.</returns>
604
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
605
+ public static Vector4 PermuteW ( Vector4 value )
606
+ {
607
+ if ( Sse . IsSupported )
608
+ {
609
+ return Sse . Shuffle ( value . AsVector128 ( ) , value . AsVector128 ( ) , 0b11111111 ) . AsVector4 ( ) ;
610
+ }
611
+
612
+ return new ( value . W ) ;
613
+ }
614
+
615
+ /// <summary>
616
+ /// Sets the W component of the given vector <paramref name="value"/> to the given value from <paramref name="w"/>.
617
+ /// </summary>
618
+ /// <param name="value">The vector to set.</param>
619
+ /// <param name="w">The vector containing the W value.</param>
620
+ /// <returns>The <see cref="Vector4"/>.</returns>
621
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
622
+ public static Vector4 WithW ( Vector4 value , Vector4 w )
623
+ {
624
+ if ( Sse41 . IsSupported )
625
+ {
626
+ return Sse41 . Insert ( value . AsVector128 ( ) , w . AsVector128 ( ) , 0b11_11_0000 ) . AsVector4 ( ) ;
627
+ }
628
+
629
+ if ( Sse . IsSupported )
630
+ {
631
+ // Create tmp as <w[3], w[0], value[2], value[0]>
632
+ // Then return <value[0], value[1], tmp[2], tmp[0]> (which is <value[0], value[1], value[2], w[3]>)
633
+ Vector128 < float > tmp = Sse . Shuffle ( w . AsVector128 ( ) , value . AsVector128 ( ) , 0b00_10_00_11 ) ;
634
+ return Sse . Shuffle ( value . AsVector128 ( ) , tmp , 0b00_10_01_00 ) . AsVector4 ( ) ;
635
+ }
636
+
637
+ value . W = w . W ;
638
+ return value ;
639
+ }
640
+
576
641
/// <summary>
577
642
/// Calculates the cube pow of all the XYZ channels of the input vectors.
578
643
/// </summary>
@@ -586,7 +651,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
586
651
while ( Unsafe . IsAddressLessThan ( ref baseRef , ref endRef ) )
587
652
{
588
653
Vector4 v = baseRef ;
589
- float a = v . W ;
654
+ Vector4 a = PermuteW ( v ) ;
590
655
591
656
// Fast path for the default gamma exposure, which is 3. In this case we can skip
592
657
// calling Math.Pow 3 times (one per component), as the method is an internal call and
@@ -595,7 +660,7 @@ public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
595
660
// back to the target index in the temporary span. The whole iteration will get completely
596
661
// inlined and traslated into vectorized instructions, with much better performance.
597
662
v = v * v * v ;
598
- v . W = a ;
663
+ v = WithW ( v , a ) ;
599
664
600
665
baseRef = v ;
601
666
baseRef = ref Unsafe . Add ( ref baseRef , 1 ) ;
0 commit comments