@@ -779,8 +779,14 @@ pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
779779 static_assert_uimm_bits ! ( IMM8 , 8 ) ;
780780 {
781781 transmute ( simd_shuffle (
782- a. as_f32x4 ( ) , _mm_undefined_ps ( ) . as_f32x4 ( ) , [ ( IMM8 as u32 >> 0 ) & 0b11 , ( IMM8 as u32 >> 2 ) & 0b11 ,
783- ( IMM8 as u32 >> 4 ) & 0b11 , ( IMM8 as u32 >> 6 ) & 0b11 , ] ,
782+ a. as_f32x4 ( ) ,
783+ _mm_undefined_ps ( ) . as_f32x4 ( ) ,
784+ [
785+ ( IMM8 as u32 >> 0 ) & 0b11 ,
786+ ( IMM8 as u32 >> 2 ) & 0b11 ,
787+ ( IMM8 as u32 >> 4 ) & 0b11 ,
788+ ( IMM8 as u32 >> 6 ) & 0b11 ,
789+ ] ,
784790 ) )
785791 }
786792}
@@ -887,7 +893,13 @@ pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
887893///
888894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_broadcast_ps)
889895pub fn _mm256_broadcast_ps ( a : & __m128 ) -> __m256 {
890- { transmute ( simd_shuffle ( ( * a) . as_f32x4 ( ) , _mm_setzero_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 0 , 1 , 2 , 3 ] ) ) }
896+ {
897+ transmute ( simd_shuffle (
898+ ( * a) . as_f32x4 ( ) ,
899+ _mm_setzero_ps ( ) . as_f32x4 ( ) ,
900+ [ 0 , 1 , 2 , 3 , 0 , 1 , 2 , 3 ] ,
901+ ) )
902+ }
891903}
892904/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
893905/// (64-bit) floating-point elements) to all elements of the returned vector.
@@ -909,8 +921,9 @@ pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
909921 static_assert_uimm_bits ! ( IMM1 , 1 ) ;
910922 {
911923 transmute ( simd_shuffle (
912- a. as_f32x8 ( ) , _mm256_castps128_ps256 ( b) . as_f32x8 ( ) , [ [ 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 8 , 9 ,
913- 10 , 11 ] ] [ IMM1 as usize ] ,
924+ a. as_f32x8 ( ) ,
925+ _mm256_castps128_ps256 ( b) . as_f32x8 ( ) ,
926+ [ [ 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ] ] [ IMM1 as usize ] ,
914927 ) )
915928 }
916929}
@@ -923,8 +936,9 @@ pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d
923936 static_assert_uimm_bits ! ( IMM1 , 1 ) ;
924937 {
925938 transmute ( simd_shuffle (
926- a. as_f64x4 ( ) , _mm256_castpd128_pd256 ( b) . as_f64x4 ( ) ,
927- [ [ 4 , 5 , 2 , 3 ] , [ 0 , 1 , 4 , 5 ] ] [ IMM1 as usize ] ,
939+ a. as_f64x4 ( ) ,
940+ _mm256_castpd128_pd256 ( b) . as_f64x4 ( ) ,
941+ [ [ 4 , 5 , 2 , 3 ] , [ 0 , 1 , 4 , 5 ] ] [ IMM1 as usize ] ,
928942 ) )
929943 }
930944}
@@ -1599,7 +1613,13 @@ pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
15991613///
16001614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_castps128_ps256)
16011615pub fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
1602- { transmute ( simd_shuffle ( a. as_f32x4 ( ) , _mm_undefined_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] ) ) }
1616+ {
1617+ transmute ( simd_shuffle (
1618+ a. as_f32x4 ( ) ,
1619+ _mm_undefined_ps ( ) . as_f32x4 ( ) ,
1620+ [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] ,
1621+ ) )
1622+ }
16031623}
16041624/// Casts vector of type __m128d to type __m256d;
16051625/// the upper 128 bits of the result are undefined.
@@ -1630,7 +1650,13 @@ pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
16301650///
16311651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextps128_ps256)
16321652pub fn _mm256_zextps128_ps256 ( a : __m128 ) -> __m256 {
1633- { transmute ( simd_shuffle ( a. as_f32x4 ( ) , _mm_setzero_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ) }
1653+ {
1654+ transmute ( simd_shuffle (
1655+ a. as_f32x4 ( ) ,
1656+ _mm_setzero_ps ( ) . as_f32x4 ( ) ,
1657+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ,
1658+ ) )
1659+ }
16341660}
16351661/// Constructs a 256-bit integer vector from a 128-bit integer vector.
16361662/// The lower 128 bits contain the value of the source vector. The upper
@@ -1652,7 +1678,13 @@ pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
16521678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextpd128_pd256)
16531679// NOTE: Not modeled yet
16541680pub fn _mm256_zextpd128_pd256 ( a : __m128d ) -> __m256d {
1655- { transmute ( simd_shuffle ( a. as_f64x2 ( ) , _mm_setzero_pd ( ) . as_f64x2 ( ) , [ 0 , 1 , 2 , 3 ] ) ) }
1681+ {
1682+ transmute ( simd_shuffle (
1683+ a. as_f64x2 ( ) ,
1684+ _mm_setzero_pd ( ) . as_f64x2 ( ) ,
1685+ [ 0 , 1 , 2 , 3 ] ,
1686+ ) )
1687+ }
16561688}
16571689/// Returns vector of type `__m256` with indeterminate elements.
16581690/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
0 commit comments