@@ -71,7 +71,7 @@ public Vector4 Convolve(Span<Vector4> rowSpan)
7171 public Vector4 ConvolveCore ( ref Vector4 rowStartRef )
7272 {
7373#if SUPPORTS_RUNTIME_INTRINSICS
74- if ( Fma . IsSupported )
74+ if ( Avx2 . IsSupported )
7575 {
7676 float * bufferStart = this . bufferPtr ;
7777 float * bufferEnd = bufferStart + ( this . Length & ~ 1 ) ;
@@ -82,8 +82,9 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
8282 {
8383 Vector256 < float > rowItem256 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
8484 Vector256 < float > bufferItem256 = Avx2 . PermuteVar8x32 ( Vector256 . Create ( * ( double * ) bufferStart ) . AsSingle ( ) , mask ) ;
85+ Vector256 < float > multiply256 = Avx . Multiply ( rowItem256 , bufferItem256 ) ;
8586
86- result256 = Fma . MultiplyAdd ( rowItem256 , bufferItem256 , result256 ) ;
87+ result256 = Avx . Add ( multiply256 , result256 ) ;
8788
8889 bufferStart += 2 ;
8990 rowStartRef = ref Unsafe . Add ( ref rowStartRef , 2 ) ;
@@ -95,8 +96,9 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
9596 {
9697 Vector128 < float > rowItem128 = Unsafe . As < Vector4 , Vector128 < float > > ( ref rowStartRef ) ;
9798 var bufferItem128 = Vector128 . Create ( * bufferStart ) ;
99+ Vector128 < float > multiply128 = Sse . Multiply ( rowItem128 , bufferItem128 ) ;
98100
99- result128 = Fma . MultiplyAdd ( rowItem128 , bufferItem128 , result128 ) ;
101+ result128 = Sse . Add ( multiply128 , result128 ) ;
100102 }
101103
102104 return * ( Vector4 * ) & result128 ;
@@ -114,8 +116,8 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
114116 // Vector4 v = offsetedRowSpan[i];
115117 result += rowStartRef * * bufferStart ;
116118
117- rowStartRef = ref Unsafe . Add ( ref rowStartRef , 1 ) ;
118119 bufferStart ++ ;
120+ rowStartRef = ref Unsafe . Add ( ref rowStartRef , 1 ) ;
119121 }
120122
121123 return result ;
0 commit comments