@@ -779,8 +779,14 @@ pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
779
779
static_assert_uimm_bits ! ( IMM8 , 8 ) ;
780
780
{
781
781
transmute ( simd_shuffle (
782
- a. as_f32x4 ( ) , _mm_undefined_ps ( ) . as_f32x4 ( ) , [ ( IMM8 as u32 >> 0 ) & 0b11 , ( IMM8 as u32 >> 2 ) & 0b11 ,
783
- ( IMM8 as u32 >> 4 ) & 0b11 , ( IMM8 as u32 >> 6 ) & 0b11 , ] ,
782
+ a. as_f32x4 ( ) ,
783
+ _mm_undefined_ps ( ) . as_f32x4 ( ) ,
784
+ [
785
+ ( IMM8 as u32 >> 0 ) & 0b11 ,
786
+ ( IMM8 as u32 >> 2 ) & 0b11 ,
787
+ ( IMM8 as u32 >> 4 ) & 0b11 ,
788
+ ( IMM8 as u32 >> 6 ) & 0b11 ,
789
+ ] ,
784
790
) )
785
791
}
786
792
}
@@ -887,7 +893,13 @@ pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
887
893
///
888
894
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_broadcast_ps)
889
895
pub fn _mm256_broadcast_ps ( a : & __m128 ) -> __m256 {
890
- { transmute ( simd_shuffle ( ( * a) . as_f32x4 ( ) , _mm_setzero_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 0 , 1 , 2 , 3 ] ) ) }
896
+ {
897
+ transmute ( simd_shuffle (
898
+ ( * a) . as_f32x4 ( ) ,
899
+ _mm_setzero_ps ( ) . as_f32x4 ( ) ,
900
+ [ 0 , 1 , 2 , 3 , 0 , 1 , 2 , 3 ] ,
901
+ ) )
902
+ }
891
903
}
892
904
/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
893
905
/// (64-bit) floating-point elements) to all elements of the returned vector.
@@ -909,8 +921,9 @@ pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
909
921
static_assert_uimm_bits ! ( IMM1 , 1 ) ;
910
922
{
911
923
transmute ( simd_shuffle (
912
- a. as_f32x8 ( ) , _mm256_castps128_ps256 ( b) . as_f32x8 ( ) , [ [ 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 8 , 9 ,
913
- 10 , 11 ] ] [ IMM1 as usize ] ,
924
+ a. as_f32x8 ( ) ,
925
+ _mm256_castps128_ps256 ( b) . as_f32x8 ( ) ,
926
+ [ [ 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ] ] [ IMM1 as usize ] ,
914
927
) )
915
928
}
916
929
}
@@ -923,8 +936,9 @@ pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d
923
936
static_assert_uimm_bits ! ( IMM1 , 1 ) ;
924
937
{
925
938
transmute ( simd_shuffle (
926
- a. as_f64x4 ( ) , _mm256_castpd128_pd256 ( b) . as_f64x4 ( ) ,
927
- [ [ 4 , 5 , 2 , 3 ] , [ 0 , 1 , 4 , 5 ] ] [ IMM1 as usize ] ,
939
+ a. as_f64x4 ( ) ,
940
+ _mm256_castpd128_pd256 ( b) . as_f64x4 ( ) ,
941
+ [ [ 4 , 5 , 2 , 3 ] , [ 0 , 1 , 4 , 5 ] ] [ IMM1 as usize ] ,
928
942
) )
929
943
}
930
944
}
@@ -1599,7 +1613,13 @@ pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
1599
1613
///
1600
1614
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_castps128_ps256)
1601
1615
pub fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
1602
- { transmute ( simd_shuffle ( a. as_f32x4 ( ) , _mm_undefined_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] ) ) }
1616
+ {
1617
+ transmute ( simd_shuffle (
1618
+ a. as_f32x4 ( ) ,
1619
+ _mm_undefined_ps ( ) . as_f32x4 ( ) ,
1620
+ [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] ,
1621
+ ) )
1622
+ }
1603
1623
}
1604
1624
/// Casts vector of type __m128d to type __m256d;
1605
1625
/// the upper 128 bits of the result are undefined.
@@ -1630,7 +1650,13 @@ pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
1630
1650
///
1631
1651
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextps128_ps256)
1632
1652
pub fn _mm256_zextps128_ps256 ( a : __m128 ) -> __m256 {
1633
- { transmute ( simd_shuffle ( a. as_f32x4 ( ) , _mm_setzero_ps ( ) . as_f32x4 ( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ) }
1653
+ {
1654
+ transmute ( simd_shuffle (
1655
+ a. as_f32x4 ( ) ,
1656
+ _mm_setzero_ps ( ) . as_f32x4 ( ) ,
1657
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ,
1658
+ ) )
1659
+ }
1634
1660
}
1635
1661
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
1636
1662
/// The lower 128 bits contain the value of the source vector. The upper
@@ -1652,7 +1678,13 @@ pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
1652
1678
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextpd128_pd256)
1653
1679
// NOTE: Not modeled yet
1654
1680
pub fn _mm256_zextpd128_pd256 ( a : __m128d ) -> __m256d {
1655
- { transmute ( simd_shuffle ( a. as_f64x2 ( ) , _mm_setzero_pd ( ) . as_f64x2 ( ) , [ 0 , 1 , 2 , 3 ] ) ) }
1681
+ {
1682
+ transmute ( simd_shuffle (
1683
+ a. as_f64x2 ( ) ,
1684
+ _mm_setzero_pd ( ) . as_f64x2 ( ) ,
1685
+ [ 0 , 1 , 2 , 3 ] ,
1686
+ ) )
1687
+ }
1656
1688
}
1657
1689
/// Returns vector of type `__m256` with indeterminate elements.
1658
1690
/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
0 commit comments