Skip to content

Commit 2bba069

Browse files
format
1 parent 5b12d03 commit 2bba069

File tree

4 files changed

+77
-20
lines changed

4 files changed

+77
-20
lines changed

testable-simd-models/src/core_arch/x86/models/avx.rs

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -779,8 +779,14 @@ pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
779779
static_assert_uimm_bits!(IMM8, 8);
780780
{
781781
transmute(simd_shuffle(
782-
a.as_f32x4(), _mm_undefined_ps().as_f32x4(), [(IMM8 as u32 >> 0) & 0b11, (IMM8 as u32 >> 2) & 0b11,
783-
(IMM8 as u32 >> 4) & 0b11, (IMM8 as u32 >> 6) & 0b11,],
782+
a.as_f32x4(),
783+
_mm_undefined_ps().as_f32x4(),
784+
[
785+
(IMM8 as u32 >> 0) & 0b11,
786+
(IMM8 as u32 >> 2) & 0b11,
787+
(IMM8 as u32 >> 4) & 0b11,
788+
(IMM8 as u32 >> 6) & 0b11,
789+
],
784790
))
785791
}
786792
}
@@ -887,7 +893,13 @@ pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
887893
///
888894
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_broadcast_ps)
889895
pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
890-
{ transmute(simd_shuffle((*a).as_f32x4(), _mm_setzero_ps().as_f32x4(), [0, 1, 2, 3, 0, 1, 2, 3])) }
896+
{
897+
transmute(simd_shuffle(
898+
(*a).as_f32x4(),
899+
_mm_setzero_ps().as_f32x4(),
900+
[0, 1, 2, 3, 0, 1, 2, 3],
901+
))
902+
}
891903
}
892904
/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
893905
/// (64-bit) floating-point elements) to all elements of the returned vector.
@@ -909,8 +921,9 @@ pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
909921
static_assert_uimm_bits!(IMM1, 1);
910922
{
911923
transmute(simd_shuffle(
912-
a.as_f32x8(), _mm256_castps128_ps256(b).as_f32x8(), [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9,
913-
10, 11]] [IMM1 as usize],
924+
a.as_f32x8(),
925+
_mm256_castps128_ps256(b).as_f32x8(),
926+
[[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
914927
))
915928
}
916929
}
@@ -923,8 +936,9 @@ pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d
923936
static_assert_uimm_bits!(IMM1, 1);
924937
{
925938
transmute(simd_shuffle(
926-
a.as_f64x4(), _mm256_castpd128_pd256(b).as_f64x4(),
927-
[[4, 5, 2, 3], [0, 1, 4, 5]] [IMM1 as usize],
939+
a.as_f64x4(),
940+
_mm256_castpd128_pd256(b).as_f64x4(),
941+
[[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
928942
))
929943
}
930944
}
@@ -1599,7 +1613,13 @@ pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
15991613
///
16001614
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_castps128_ps256)
16011615
pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
1602-
{ transmute(simd_shuffle(a.as_f32x4(), _mm_undefined_ps().as_f32x4(), [0, 1, 2, 3, 4, 4, 4, 4])) }
1616+
{
1617+
transmute(simd_shuffle(
1618+
a.as_f32x4(),
1619+
_mm_undefined_ps().as_f32x4(),
1620+
[0, 1, 2, 3, 4, 4, 4, 4],
1621+
))
1622+
}
16031623
}
16041624
/// Casts vector of type __m128d to type __m256d;
16051625
/// the upper 128 bits of the result are undefined.
@@ -1630,7 +1650,13 @@ pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
16301650
///
16311651
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextps128_ps256)
16321652
pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
1633-
{ transmute(simd_shuffle(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), [0, 1, 2, 3, 4, 5, 6, 7])) }
1653+
{
1654+
transmute(simd_shuffle(
1655+
a.as_f32x4(),
1656+
_mm_setzero_ps().as_f32x4(),
1657+
[0, 1, 2, 3, 4, 5, 6, 7],
1658+
))
1659+
}
16341660
}
16351661
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
16361662
/// The lower 128 bits contain the value of the source vector. The upper
@@ -1652,7 +1678,13 @@ pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
16521678
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.htmlext=_mm256_zextpd128_pd256)
16531679
// NOTE: Not modeled yet
16541680
pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
1655-
{ transmute(simd_shuffle(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), [0, 1, 2, 3])) }
1681+
{
1682+
transmute(simd_shuffle(
1683+
a.as_f64x2(),
1684+
_mm_setzero_pd().as_f64x2(),
1685+
[0, 1, 2, 3],
1686+
))
1687+
}
16561688
}
16571689
/// Returns vector of type `__m256` with indeterminate elements.
16581690
/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically

testable-simd-models/src/core_arch/x86/models/avx2.rs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
use crate::abstractions::simd::*;
2323
use crate::abstractions::utilities::*;
2424

25-
use super::sse::*;
26-
use super::sse2::*;
2725
use super::avx::*;
2826
use super::avx2_handwritten::*;
27+
use super::sse::*;
28+
use super::sse2::*;
2929
use super::types::*;
3030

3131
/// Computes the absolute values of packed 32-bit integers in `a`.
@@ -389,14 +389,26 @@ pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
389389
///
390390
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
391391
pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
392-
{ transmute(simd_shuffle(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), [0_u32; 2])) }
392+
{
393+
transmute(simd_shuffle(
394+
a.as_f64x2(),
395+
_mm_setzero_pd().as_f64x2(),
396+
[0_u32; 2],
397+
))
398+
}
393399
}
394400
/// Broadcasts the low double-precision (64-bit) floating-point element
395401
/// from `a` to all elements of the 256-bit returned value.
396402
///
397403
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
398404
pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
399-
{ transmute(simd_shuffle(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), [0_u32; 4])) }
405+
{
406+
transmute(simd_shuffle(
407+
a.as_f64x2(),
408+
_mm_setzero_pd().as_f64x2(),
409+
[0_u32; 4],
410+
))
411+
}
400412
}
401413
/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
402414
/// the 256-bit returned value.
@@ -423,14 +435,26 @@ pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
423435
///
424436
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
425437
pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
426-
{ transmute(simd_shuffle(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), [0_u32; 4])) }
438+
{
439+
transmute(simd_shuffle(
440+
a.as_f32x4(),
441+
_mm_setzero_ps().as_f32x4(),
442+
[0_u32; 4],
443+
))
444+
}
427445
}
428446
/// Broadcasts the low single-precision (32-bit) floating-point element
429447
/// from `a` to all elements of the 256-bit returned value.
430448
///
431449
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
432450
pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
433-
{ transmute(simd_shuffle(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), [0_u32; 8])) }
451+
{
452+
transmute(simd_shuffle(
453+
a.as_f32x4(),
454+
_mm_setzero_ps().as_f32x4(),
455+
[0_u32; 8],
456+
))
457+
}
434458
}
435459
/// Broadcasts the low packed 16-bit integer from a to all elements of
436460
/// the 128-bit returned value

testable-simd-models/src/core_arch/x86/models/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
//! In general, it is best to gain an idea of how an implementation should be written by looking
2121
//! at how other functions are implemented. Also see `core::arch::x86` for [reference](https://github.com/rust-lang/stdarch/tree/master/crates/core_arch).
2222
23-
pub mod sse;
2423
pub mod avx;
2524
pub mod avx2;
2625
pub mod avx2_handwritten;
2726
pub mod avx_handwritten;
27+
pub mod sse;
2828
pub mod sse2;
2929
pub mod sse2_handwritten;
3030
pub mod ssse3;
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Streaming SIMD Extensions (SSE)
2+
use super::types::*;
23
use crate::abstractions::simd::*;
34
use crate::abstractions::utilities::*;
4-
use super::types::*;
55

66
/// Returns vector of type __m128 with indeterminate elements.with indetermination elements.
77
/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
@@ -10,11 +10,12 @@ use super::types::*;
1010
///
1111
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps)
1212
pub fn _mm_undefined_ps() -> __m128 {
13-
transmute(f32x4::ZERO())
13+
transmute(f32x4::ZERO())
1414
}
1515

1616
/// Construct a `__m128` with all elements initialized to zero.
1717
///
1818
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps)
1919
pub fn _mm_setzero_ps() -> __m128 {
20-
transmute(f32x4::ZERO()) }
20+
transmute(f32x4::ZERO())
21+
}

0 commit comments

Comments
 (0)