Skip to content

Commit 428fd3c

Browse files
edits
1 parent 69189c2 commit 428fd3c

File tree

16 files changed

+96
-847
lines changed

16 files changed

+96
-847
lines changed

testable-simd-models/README.md

Lines changed: 26 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
11
# testable-simd-models
22

3-
This crates contains models for the intrinsics provided by `core::arch`. Its structure is based off of
4-
[rust-lang/stdarch/crates/core_arch](https://github.com/rust-lang/stdarch/tree/master/crates/core_arch). Within the `core_arch` folder in this crate, there is a different
5-
folder for each architecture whose intrinsics are being implemented (corresponding to folders in the previous link). Each such
6-
folder has 3 sub-folders, `models`, `tests`, and `specs`.
3+
This crate contains executable, independently testable specifications
4+
for the SIMD intrinsics provided by the `core::arch` library in Rust.
5+
The structure of this crate is based on [rust-lang/stdarch/crates/core_arch](https://github.com/rust-lang/stdarch/tree/master/crates/core_arch).
76

8-
The `models` folder contains the models of the intrinsics, with a file corresponding to different target features,
9-
and are written using the various abstractions implementedin `crate::abstractions`, especially those
10-
in `crate::abstractions::simd`. These models are meant to closely resemble their implementations within
11-
the Rust core itself.
7+
## Code Structure
8+
Within the `core_arch` folder in this crate, there is a different
9+
folder for each architecture for which we have wrtten models.
10+
In particular, it contains folders for `x86` and `arm_shared`.
11+
Each such folder has 3 sub-folders, `models`, `tests`, and `specs`.
1212

13-
The `tests` folder contains the tests of these models, and is structured the same way as `models`. Each file
14-
additionally contains the definition of a macro that makes writing these tests easier. The tests
15-
work by testing the models against the intrinsics in the Rust core, trying out random inputs
16-
(generally 1000), and comparing their outputs.
13+
The `models` folder contains the models of the intrinsics, with a file
14+
corresponding to different target features, and are written using the
15+
various abstractions implementedin `crate::abstractions`, especially
16+
those in `crate::abstractions::simd`. These models are meant to
17+
closely resemble their implementations within the Rust core itself.
1718

18-
The `specs` folder contains specifications. These are implementatioons written without
19-
using the function abstractions in `crate::abstractions::simd`, and are written to be
20-
match their vendor specification as closely as possible.
19+
The `tests` folder contains the tests of these models, and is
20+
structured the same way as `models`. Each file additionally contains
21+
the definition of a macro that makes writing these tests easier. The
22+
tests work by testing the models against the intrinsics in the Rust
23+
core, trying out random inputs (generally 1000), and comparing their
24+
outputs.
2125

22-
The process of adding a specific intrinsic's model goes as follows. For this example,
23-
let us say the intrinsic we are adding is `_mm256_bsrli_epi128` from the avx2 feature set.
26+
The process of adding a specific intrinsic's model goes as follows.
27+
For this example, let us say the intrinsic we are adding is
28+
`_mm256_bsrli_epi128` from the avx2 feature set.
2429

2530
1. We go to [rust-lang/stdarch/crates/core_arch/src/x86/](https://github.com/rust-lang/stdarch/tree/master/crates/core_arch/src/x86/), and find the implementation of the intrinsic in `avx2.rs`.
31+
2632
2. We see that the implementation looks like this:
2733
``` rust
2834
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
@@ -53,33 +59,7 @@ pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
5359
mask(IMM8, 1),
5460
mask(IMM8, 2),
5561
mask(IMM8, 3),
56-
mask(IMM8, 4),
57-
mask(IMM8, 5),
58-
mask(IMM8, 6),
59-
mask(IMM8, 7),
60-
mask(IMM8, 8),
61-
mask(IMM8, 9),
62-
mask(IMM8, 10),
63-
mask(IMM8, 11),
64-
mask(IMM8, 12),
65-
mask(IMM8, 13),
66-
mask(IMM8, 14),
67-
mask(IMM8, 15),
68-
mask(IMM8, 16),
69-
mask(IMM8, 17),
70-
mask(IMM8, 18),
71-
mask(IMM8, 19),
72-
mask(IMM8, 20),
73-
mask(IMM8, 21),
74-
mask(IMM8, 22),
75-
mask(IMM8, 23),
76-
mask(IMM8, 24),
77-
mask(IMM8, 25),
78-
mask(IMM8, 26),
79-
mask(IMM8, 27),
80-
mask(IMM8, 28),
81-
mask(IMM8, 29),
82-
mask(IMM8, 30),
62+
...
8363
mask(IMM8, 31),
8464
],
8565
);
@@ -112,33 +92,7 @@ pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
11292
mask(IMM8, 1),
11393
mask(IMM8, 2),
11494
mask(IMM8, 3),
115-
mask(IMM8, 4),
116-
mask(IMM8, 5),
117-
mask(IMM8, 6),
118-
mask(IMM8, 7),
119-
mask(IMM8, 8),
120-
mask(IMM8, 9),
121-
mask(IMM8, 10),
122-
mask(IMM8, 11),
123-
mask(IMM8, 12),
124-
mask(IMM8, 13),
125-
mask(IMM8, 14),
126-
mask(IMM8, 15),
127-
mask(IMM8, 16),
128-
mask(IMM8, 17),
129-
mask(IMM8, 18),
130-
mask(IMM8, 19),
131-
mask(IMM8, 20),
132-
mask(IMM8, 21),
133-
mask(IMM8, 22),
134-
mask(IMM8, 23),
135-
mask(IMM8, 24),
136-
mask(IMM8, 25),
137-
mask(IMM8, 26),
138-
mask(IMM8, 27),
139-
mask(IMM8, 28),
140-
mask(IMM8, 29),
141-
mask(IMM8, 30),
95+
...
14296
mask(IMM8, 31),
14397
],
14498
);
@@ -149,25 +103,6 @@ pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
149103
3. Next, we add a test for this intrinsic. For this, we navigate to `core_arch/avx2/tests/avx2.rs`. Since the value of
150104
`IMM8` can be up to 8 bits, we want to test constant arguments up to 255. Thus, we write the following macro invocation.
151105
```rust
152-
mk!([100]_mm256_bsrli_epi128{<0>,<1>,<2>,<3>,<4>,<5>,<6>,<7>,<8>,<9>,<10>,<11>,<12>,<13>,<14>,<15>,<16>,<17>,<18>,<19>,<20>,<21>,<22>,<23>,<24>,<25>,<26>,<27>,<28>,<29>,<30>,<31>,<32>,<33>,<34>,<35>,<36>,<37>,<38>,<39>,<40>,<41>,<42>,<43>,<44>,<45>,<46>,<47>,<48>,<49>,<50>,<51>,<52>,<53>,<54>,<55>,<56>,<57>,<58>,<59>,<60>,<61>,<62>,<63>,<64>,<65>,<66>,<67>,<68>,<69>,<70>,<71>,<72>,<73>,<74>,<75>,<76>,<77>,<78>,<79>,<80>,<81>,<82>,<83>,<84>,<85>,<86>,<87>,<88>,<89>,<90>,<91>,<92>,<93>,<94>,<95>,<96>,<97>,<98>,<99>,<100>,<101>,<102>,<103>,<104>,<105>,<106>,<107>,<108>,<109>,<110>,<111>,<112>,<113>,<114>,<115>,<116>,<117>,<118>,<119>,<120>,<121>,<122>,<123>,<124>,<125>,<126>,<127>,<128>,<129>,<130>,<131>,<132>,<133>,<134>,<135>,<136>,<137>,<138>,<139>,<140>,<141>,<142>,<143>,<144>,<145>,<146>,<147>,<148>,<149>,<150>,<151>,<152>,<153>,<154>,<155>,<156>,<157>,<158>,<159>,<160>,<161>,<162>,<163>,<164>,<165>,<166>,<167>,<168>,<169>,<170>,<171>,<172>,<173>,<174>,<175>,<176>,<177>,<178>,<179>,<180>,<181>,<182>,<183>,<184>,<185>,<186>,<187>,<188>,<189>,<190>,<191>,<192>,<193>,<194>,<195>,<196>,<197>,<198>,<199>,<200>,<201>,<202>,<203>,<204>,<205>,<206>,<207>,<208>,<209>,<210>,<211>,<212>,<213>,<214>,<215>,<216>,<217>,<218>,<219>,<220>,<221>,<222>,<223>,<224>,<225>,<226>,<227>,<228>,<229>,<230>,<231>,<232>,<233>,<234>,<235>,<236>,<237>,<238>,<239>,<240>,<241>,<242>,<243>,<244>,<245>,<246>,<247>,<248>,<249>,<250>,<251>,<252>,<253>,<254>,<255>}(a: BitVec));
106+
mk!([100]_mm256_bsrli_epi128{<0>,<1>,<2>,<3>,...,<255>}(a: BitVec));
153107
```
154108
Here, the `[100]` means we test 100 random inputs for each constant value. This concludes the necessary steps for implementing an intrinsic.
155-
4. Optionally, we may want to add a specification, since the code for the Rust implemetation is non straightforward. For this, we look up the [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128).
156-
Based on the documentation, we may write the following specification.
157-
```rust
158-
pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
159-
let a = BitVec::to_i128x2(a);
160-
let a = i128x2::from_fn(|i| {
161-
let tmp = IMM8 % 256;
162-
if tmp > 15 {0} else {
163-
((a[i] as u128) >> (tmp * 8)) as i128
164-
}
165-
});
166-
BitVec::from_i128x2(a)
167-
}
168-
```
169-
There is no test for the specification, and thus it has to be manually reviewed to ensure that it perfectly captures the
170-
behaviour described by the documentation.
171-
172-
173-

testable-simd-models/src/abstractions/simd.rs

Lines changed: 60 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,74 +2,69 @@
22
//!
33
//! Operations are defined on FunArrs.
44
5-
use crate::abstractions::{bit::MachineInteger, funarr::FunArray};
6-
7-
pub mod int_vec_interp {
8-
use crate::abstractions::bit::MachineInteger;
9-
use crate::abstractions::bitvec::*;
10-
use crate::abstractions::funarr::*;
11-
12-
#[allow(dead_code)]
13-
/// Derives interpretations functions, and type synonyms.
14-
macro_rules! interpretations {
15-
($n:literal; $($name:ident [$ty:ty; $m:literal]),*) => {
16-
$(
17-
#[doc = concat!(stringify!($ty), " vectors of size ", stringify!($m))]
18-
#[allow(non_camel_case_types)]
19-
pub type $name = FunArray<$m, $ty>;
20-
pastey::paste! {
21-
const _: () = {
22-
impl BitVec<$n> {
23-
#[doc = concat!("Conversion from ", stringify!($ty), " vectors of size ", stringify!($m), "to bit vectors of size ", stringify!($n))]
24-
pub fn [< from_ $name >](iv: $name) -> BitVec<$n> {
25-
let vec: Vec<$ty> = iv.as_vec();
26-
Self::from_slice(&vec[..], <$ty>::bits() as u64)
27-
}
28-
#[doc = concat!("Conversion from bit vectors of size ", stringify!($n), " to ", stringify!($ty), " vectors of size ", stringify!($m))]
29-
pub fn [< to_ $name >](bv: BitVec<$n>) -> $name {
30-
let vec: Vec<$ty> = bv.to_vec();
31-
$name::from_fn(|i| vec[i as usize])
32-
}
33-
34-
35-
}
36-
37-
38-
impl From<BitVec<$n>> for $name {
39-
fn from(bv: BitVec<$n>) -> Self {
40-
BitVec::[< to_ $name >](bv)
41-
}
42-
}
43-
44-
impl From<$name> for BitVec<$n> {
45-
fn from(iv: $name) -> Self {
46-
BitVec::[< from_ $name >](iv)
47-
}
48-
}
49-
50-
impl $name {
51-
52-
pub fn splat(value: $ty) -> Self {
53-
FunArray::from_fn(|_| value)
54-
}
55-
}
56-
};
57-
}
58-
)*
59-
};
60-
}
5+
use crate::abstractions::{bit::MachineInteger, bitvec::*, funarr::*};
6+
use std::convert::*;
7+
use std::ops::*;
8+
9+
#[allow(dead_code)]
10+
/// Derives interpretations functions, and type synonyms.
11+
macro_rules! interpretations {
12+
($n:literal; $($name:ident [$ty:ty; $m:literal]),*) => {
13+
$(
14+
#[doc = concat!(stringify!($ty), " vectors of size ", stringify!($m))]
15+
#[allow(non_camel_case_types)]
16+
pub type $name = FunArray<$m, $ty>;
17+
pastey::paste! {
18+
const _: () = {
19+
impl BitVec<$n> {
20+
#[doc = concat!("Conversion from ", stringify!($ty), " vectors of size ", stringify!($m), "to bit vectors of size ", stringify!($n))]
21+
pub fn [< from_ $name >](iv: $name) -> BitVec<$n> {
22+
let vec: Vec<$ty> = iv.as_vec();
23+
Self::from_slice(&vec[..], <$ty>::bits() as u64)
24+
}
25+
#[doc = concat!("Conversion from bit vectors of size ", stringify!($n), " to ", stringify!($ty), " vectors of size ", stringify!($m))]
26+
pub fn [< to_ $name >](bv: BitVec<$n>) -> $name {
27+
let vec: Vec<$ty> = bv.to_vec();
28+
$name::from_fn(|i| vec[i as usize])
29+
}
30+
31+
32+
}
33+
34+
35+
impl From<BitVec<$n>> for $name {
36+
fn from(bv: BitVec<$n>) -> Self {
37+
BitVec::[< to_ $name >](bv)
38+
}
39+
}
40+
41+
impl From<$name> for BitVec<$n> {
42+
fn from(iv: $name) -> Self {
43+
BitVec::[< from_ $name >](iv)
44+
}
45+
}
6146

62-
interpretations!(256; i32x8 [i32; 8], i64x4 [i64; 4], i16x16 [i16; 16], i128x2 [i128; 2], i8x32 [i8; 32],
63-
u32x8 [u32; 8], u64x4 [u64; 4], u16x16 [u16; 16], u8x32 [u8; 32]);
64-
interpretations!(128; i32x4 [i32; 4], i64x2 [i64; 2], i16x8 [i16; 8], i128x1 [i128; 1], i8x16 [i8; 16],
65-
u32x4 [u32; 4], u64x2 [u64; 2], u16x8 [u16; 8], u8x16 [u8; 16]);
47+
impl $name {
6648

67-
interpretations!(512; u32x16 [u32; 16], u16x32 [u16; 32], i32x16 [i32; 16], i16x32 [i16; 32]);
68-
interpretations!(64; i64x1 [i64; 1], i32x2 [i32; 2], i16x4 [i16; 4], i8x8 [i8; 8], u64x1 [u64; 1], u32x2 [u32; 2],u16x4 [u16; 4], u8x8 [u8; 8]);
69-
interpretations!(32; i8x4 [i8; 4], u8x4 [u8; 4]);
49+
pub fn splat(value: $ty) -> Self {
50+
FunArray::from_fn(|_| value)
51+
}
52+
}
53+
};
54+
}
55+
)*
56+
};
7057
}
71-
use std::convert::*;
72-
use std::ops::*;
58+
59+
interpretations!(256; i32x8 [i32; 8], i64x4 [i64; 4], i16x16 [i16; 16], i128x2 [i128; 2], i8x32 [i8; 32],
60+
u32x8 [u32; 8], u64x4 [u64; 4], u16x16 [u16; 16], u8x32 [u8; 32]);
61+
interpretations!(128; i32x4 [i32; 4], i64x2 [i64; 2], i16x8 [i16; 8], i128x1 [i128; 1], i8x16 [i8; 16],
62+
u32x4 [u32; 4], u64x2 [u64; 2], u16x8 [u16; 8], u8x16 [u8; 16]);
63+
64+
interpretations!(512; u32x16 [u32; 16], u16x32 [u16; 32], i32x16 [i32; 16], i16x32 [i16; 32]);
65+
interpretations!(64; i64x1 [i64; 1], i32x2 [i32; 2], i16x4 [i16; 4], i8x8 [i8; 8], u64x1 [u64; 1], u32x2 [u32; 2],u16x4 [u16; 4], u8x8 [u8; 8]);
66+
interpretations!(32; i8x4 [i8; 4], u8x4 [u8; 4]);
67+
7368

7469
/// Inserts an element into a vector, returning the updated vector.
7570
///
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
pub mod models;
2-
pub mod specs;
32
#[cfg(test)]
43
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
54
pub mod tests;

testable-simd-models/src/core_arch/arm_shared/models/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#![allow(unused)]
2323
#[allow(non_camel_case_types)]
2424
mod types {
25-
use crate::abstractions::simd::int_vec_interp::*;
25+
use crate::abstractions::simd::*;
2626
pub type int32x4_t = i32x4;
2727
pub type int64x1_t = i64x1;
2828
pub type int64x2_t = i64x2;

testable-simd-models/src/core_arch/arm_shared/specs/mod.rs

Lines changed: 0 additions & 39 deletions
This file was deleted.

testable-simd-models/src/core_arch/arm_shared/tests/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub mod neon;
3434

3535
#[allow(non_camel_case_types)]
3636
mod types {
37-
use crate::abstractions::simd::int_vec_interp::*;
37+
use crate::abstractions::simd::*;
3838
pub type int32x4_t = i32x4;
3939
pub type int64x1_t = i64x1;
4040
pub type int64x2_t = i64x2;
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
pub mod models;
2-
pub mod specs;
32
#[cfg(test)]
43
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
54
mod tests;

testable-simd-models/src/core_arch/x86/models/avx.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
1515
1616
use super::types::*;
17-
use crate::abstractions::{bit::Bit, bitvec::BitVec, simd::int_vec_interp::*, simd::*};
17+
use crate::abstractions::{bit::Bit, bitvec::BitVec, simd::*};
1818

1919
mod c_extern {
20-
use crate::abstractions::simd::int_vec_interp::*;
20+
use crate::abstractions::simd::*;
2121

2222
pub fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8 {
2323
let temp = i128x2::from_fn(|i| match (imm8 as u8) >> (i * 4) {

testable-simd-models/src/core_arch/x86/models/avx2.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
2020
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
2121
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
22-
use crate::abstractions::{bitvec::BitVec, simd::int_vec_interp::*};
22+
use crate::abstractions::{bitvec::BitVec, simd::*};
2323

2424
mod c_extern {
25-
use crate::abstractions::{bit::MachineInteger, simd::int_vec_interp::*, simd::*};
25+
use crate::abstractions::{bit::MachineInteger, simd::*};
2626
pub fn phaddw(a: i16x16, b: i16x16) -> i16x16 {
2727
i16x16::from_fn(|i| {
2828
if i < 4 {

testable-simd-models/src/core_arch/x86/models/sse2.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
//! Streaming SIMD Extensions 2 (SSE2)
22
use super::types::*;
3-
use crate::abstractions::{bit::Bit, bitvec::BitVec, simd::int_vec_interp::*, simd::*};
3+
use crate::abstractions::{bit::Bit, bitvec::BitVec, simd::*};
44
mod c_extern {
5-
use crate::abstractions::{bit::MachineInteger, simd::int_vec_interp::*};
5+
use crate::abstractions::{bit::MachineInteger, simd::*};
66
pub fn packsswb(a: i16x8, b: i16x8) -> i8x16 {
77
i8x16::from_fn(|i| {
88
if i < 8 {

0 commit comments

Comments
 (0)