Skip to content

Commit bfda1c7

Browse files
committed
Save some generator changes, including adding x86_v{1,2,3,4}
1 parent 93b962f commit bfda1c7

File tree

5 files changed

+313
-8
lines changed

5 files changed

+313
-8
lines changed

fearless_simd_core/gen/src/data.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
mod x86;
2-
pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE};
2+
pub(crate) use x86::{
3+
X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4,
4+
};

fearless_simd_core/gen/src/data/x86.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,5 +422,29 @@ pub(crate) const X86_FEATURES: &[Feature] = &[
422422
),
423423
];
424424

425-
#[test]
426-
fn all_features_included() {}
425+
// All taken from <https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels>
426+
427+
pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_level.rs");
428+
429+
/// The target features required in the x86-64-v1 level.
430+
// Rust doesn't have target features for "cmov", "cmpxchg8b", "fpu", "sce", and "mmx".
431+
// The first four are all assumed, and the final is not implemented because
432+
// it's practically impossible to use correctly (and there's no reason to).
433+
pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"];
434+
/// The target features required in the x86-64-v1 level, in addition to those already in [`V1`].
435+
pub(crate) const X86_V2: &[&str] = &[
436+
"sse3",
437+
"ssse3",
438+
"sse4.1",
439+
"sse4.2",
440+
"popcnt",
441+
"cmpxchg16b",
442+
// The lahfahf target feature is currently in Rust beta.
443+
// "lahfsahf",
444+
];
445+
/// The target features required in the x86-64-v3 level, excluding those already in [`V2`].
446+
pub(crate) const X86_V3: &[&str] = &[
447+
"avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave",
448+
];
449+
/// The target features required in the x86-64-v4 level, excluding those already in [`V3`].
450+
pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"];

fearless_simd_core/gen/src/main.rs

Lines changed: 189 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
mod data;
22

3+
use std::collections::HashSet;
34
use std::fmt::Write;
45
use std::fs;
6+
use std::hash::RandomState;
57
use std::{
68
cell::RefCell,
79
collections::HashMap,
@@ -10,21 +12,34 @@ use std::{
1012
path::{Path, PathBuf},
1113
};
1214

15+
use crate::data::X86_LEVEL_TEMPLATE;
16+
1317
fn main() {
1418
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1519
let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src");
16-
generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap();
20+
{
21+
let x86_features = normalize_features(data::X86_FEATURES);
22+
generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, &x86_features).unwrap();
23+
let mut features: Vec<&'static str> = Vec::new();
24+
features.extend(data::X86_V1);
25+
generate_x86_level(&src_dir, "v1", &x86_features, &features).unwrap();
26+
features.extend(data::X86_V2);
27+
generate_x86_level(&src_dir, "v2", &x86_features, &features).unwrap();
28+
features.extend(data::X86_V3);
29+
generate_x86_level(&src_dir, "v3", &x86_features, &features).unwrap();
30+
features.extend(data::X86_V4);
31+
generate_x86_level(&src_dir, "v4", &x86_features, &features).unwrap();
32+
}
1733
}
1834

1935
fn generate_for_arch(
2036
root_dir: &Path,
2137
arch_module_name: &str,
2238
template: &str,
23-
features: &'static [Feature],
39+
features: &[NormalizedFeature],
2440
) -> io::Result<()> {
2541
let arch_dir = root_dir.join(arch_module_name);
26-
let features = normalize_features(features);
27-
for feature in &features {
42+
for feature in features {
2843
let mut new_docs = String::new();
2944
for line in feature.feature.extra_docs.lines() {
3045
writeln!(&mut new_docs, "///{line}").unwrap();
@@ -79,6 +94,7 @@ impl From<FEATURE_STRUCT_NAME> for {type_path} {{
7994
r#""{ENABLED_FEATURES_STR_LIST}""#,
8095
&enabled_feature_str_list,
8196
);
97+
8298
let module_dir = arch_dir.join(feature.feature.module);
8399
create_dir_all(&module_dir)?;
84100
let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_"));
@@ -88,6 +104,175 @@ impl From<FEATURE_STRUCT_NAME> for {type_path} {{
88104
Ok(())
89105
}
90106

107+
/// Generate the code for an X86 microarchitecture level.
108+
fn generate_x86_level(
109+
root_dir: &Path,
110+
level: &'static str,
111+
all_features: &[NormalizedFeature],
112+
required_features: &[&'static str],
113+
) -> io::Result<()> {
114+
// Precalculate the sets of features we need to support.
115+
// Intermediate value for
116+
let mut superset = HashSet::new();
117+
for feature in required_features {
118+
superset.insert(*feature);
119+
let normalized = all_features
120+
.iter()
121+
.find(|it| it.feature.feature_name == *feature)
122+
.unwrap();
123+
superset.extend(&normalized.children);
124+
}
125+
126+
// Every single target feature supported on this level, including those implied.
127+
// (In all likelihood, this is the same as `required_features`, but I'd rather validate that manually)
128+
let mut superset = superset.into_iter().collect::<Vec<_>>();
129+
superset.sort();
130+
let mut lcd = HashSet::<_, RandomState>::from_iter(superset.iter().copied());
131+
// We make the assumption that features are a tree, that is, there's no case where `A->B` and `B->A`.
132+
// However, even if that didn't hold, we at least use a consistent ordering here.
133+
// We test from the superset to be safe; this should be equivalent to using `required_features`, though.
134+
for feature in &superset {
135+
let normalized = all_features
136+
.iter()
137+
.find(|it| it.feature.feature_name == *feature)
138+
.unwrap();
139+
for feature in &normalized.children {
140+
// If the feature is a child of another required feature, we know we don't need it for this version.
141+
// We don't care whether or not it was actually removed.
142+
lcd.remove(*feature);
143+
}
144+
}
145+
// The set of features which are strictly required.
146+
// This is used to create the target feature string, so that it can be as short as possible.
147+
let mut lcd = lcd.into_iter().collect::<Vec<_>>();
148+
lcd.sort();
149+
// Now that we have lcd and superset, we can preprocess what we need for the actual file.
150+
151+
let level_struct_name = level.to_uppercase();
152+
// The target_feature(enable = "...") string.
153+
let lcd_contents = lcd.join(", ");
154+
// The fields of the new struct.
155+
let lcd_field_definitions = lcd
156+
.iter()
157+
.map(|feature| {
158+
let normalized = all_features
159+
.iter()
160+
.find(|it| it.feature.feature_name == *feature)
161+
.unwrap();
162+
let type_path = format!(
163+
"crate::x86::{}::{}",
164+
normalized.feature.module, normalized.feature.struct_name
165+
);
166+
format!("{feature}: {type_path},\n")
167+
})
168+
.collect::<String>();
169+
// The enabled FEATURES.
170+
let superset_list = superset
171+
.iter()
172+
.map(|it| format!(r#""{it}""#))
173+
.collect::<Vec<_>>()
174+
.join(", ");
175+
// First argument to `trampoline!`
176+
let lcd_trampoline = lcd
177+
.iter()
178+
.map(|feature| {
179+
let normalized = all_features
180+
.iter()
181+
.find(|it| it.feature.feature_name == *feature)
182+
.unwrap();
183+
let type_path = format!(
184+
"crate::x86::{}::{}",
185+
normalized.feature.module, normalized.feature.struct_name
186+
);
187+
format!("{type_path} = self.{feature}")
188+
})
189+
.collect::<Vec<_>>()
190+
.join(",");
191+
// The version of the struct initializer in `try_new`.
192+
let struct_initializer_try_new = lcd
193+
.iter()
194+
.map(|feature| {
195+
let normalized = all_features
196+
.iter()
197+
.find(|it| it.feature.feature_name == *feature)
198+
.unwrap();
199+
let type_path = format!(
200+
"crate::x86::{}::{}",
201+
normalized.feature.module, normalized.feature.struct_name
202+
);
203+
// We rely on rustfmt to get the tab spacing right.
204+
format!("\t{feature}: {type_path}::try_new()?,\n")
205+
})
206+
.collect::<String>();
207+
// The version of the struct initializer in `new`.
208+
let struct_initializer_new = lcd
209+
.iter()
210+
.map(|feature| {
211+
let normalized = all_features
212+
.iter()
213+
.find(|it| it.feature.feature_name == *feature)
214+
.unwrap();
215+
let type_path = format!(
216+
"crate::x86::{}::{}",
217+
normalized.feature.module, normalized.feature.struct_name
218+
);
219+
format!("\t{feature}: {type_path}::new(),\n")
220+
})
221+
.collect::<String>();
222+
223+
let mut from_impls = String::new();
224+
for child in &superset {
225+
let from_feature = all_features
226+
.iter()
227+
.find(|it| it.feature.feature_name == *child)
228+
.unwrap();
229+
let type_path = format!(
230+
"crate::x86::{}::{}",
231+
from_feature.feature.module, from_feature.feature.struct_name
232+
);
233+
write!(
234+
from_impls,
235+
"\n\
236+
impl From<LEVEL_STRUCT_NAME> for {type_path} {{
237+
fn from(value: LEVEL_STRUCT_NAME) -> Self {{
238+
// This serves as a correctness check of the implicitly enabled features.
239+
trampoline!([LEVEL_STRUCT_NAME = value] => \"{{LEVEL_FEATURE_LCD_CONTENTS}}\", fn() -> {type_path} {{ {type_path}::new() }})
240+
}}
241+
}}\n"
242+
).unwrap();
243+
}
244+
245+
let mut result = format!(
246+
"// This file is automatically generated by `fearless_simd_core_gen`.\n\
247+
// Its template can be found in `fearless_simd_core/gen/templates`.\n\n\
248+
{X86_LEVEL_TEMPLATE}"
249+
);
250+
// We replace the from impls first, as they use template variables from the rest of this.
251+
result = result.replace("/*{FROM_IMPLS}*/", &from_impls);
252+
result = result.replace(
253+
"/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/",
254+
&lcd_field_definitions,
255+
);
256+
result = result.replace(r#""{LEVEL_FEATURE_SUPERSET_LIST}""#, &superset_list);
257+
result = result.replace("{LEVEL_FEATURE_LCD_TRAMPOLINE}", &lcd_trampoline);
258+
259+
result = result.replace(
260+
"/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/",
261+
&struct_initializer_try_new,
262+
);
263+
result = result.replace(
264+
"/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/",
265+
&struct_initializer_new,
266+
);
267+
268+
let arch_dir = root_dir.join("x86");
269+
let module_dir = arch_dir.join(level);
270+
create_dir_all(&module_dir)?;
271+
let output_path = module_dir.join("level.rs");
272+
fs::write(output_path, result)?;
273+
Ok(())
274+
}
275+
91276
#[derive(Debug)]
92277
struct Feature {
93278
/// The name of the struct to be generated.

fearless_simd_core/gen/templates/x86.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,15 @@ impl Debug for FEATURE_STRUCT_NAME {
3232
}
3333
}
3434

35+
// Safety: This token can only be constructed if you have proof that all the requisite
36+
// target feature is enabled.
3537
unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME {
3638
const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"];
3739

3840
#[inline(always)]
3941
fn vectorize<R>(self, f: impl FnOnce() -> R) -> R {
40-
// Because we want this constant to be eagerly evaluated.
42+
// Because we need the safety check to be eagerly evaluated, it uses an constant item.
43+
// This means we can't use `Self = self` here, unfortunately.
4144
trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
4245
}
4346
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//! The x86-64-{LEVEL_ID} microarchitecture level.
2+
3+
use crate::{TargetFeatureToken, trampoline};
4+
5+
use core::fmt::Debug;
6+
7+
// TODO: Level specific docs?
8+
/// A token indicating that the current CPU has the x86-64-{LEVEL_ID} microarchitecture level.
9+
///
10+
/// For more details on the microarchitecture levels, see
11+
/// <https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels>.
12+
///
13+
/// # Example
14+
///
15+
/// This can be used to [`trampoline!`] into functions like:
16+
///
17+
/// ```rust
18+
/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
19+
/// #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")]
20+
/// fn uses_x86_64_{LEVEL_ID}() {
21+
/// // ...
22+
/// }
23+
/// ```
24+
///
25+
/// This struct internally contains only the minimal features required to enable this level.
26+
/// This is done to ensure that the fewest target features are checked.
27+
/// However, it can be turned into any target feature it implies using the from impls.
28+
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
29+
pub struct LEVEL_STRUCT_NAME {
30+
/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/
31+
// This struct explicitly is not non_exhaustive, because it is
32+
// completely safe to construct from the fields.
33+
}
34+
35+
impl Debug for LEVEL_STRUCT_NAME {
36+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
37+
write!(f, r#"x86-64-{LEVEL_ID} enabled."#)
38+
}
39+
}
40+
41+
// Safety: This token can only be constructed if you have proofs that all the requisite
42+
// target features are enabled.
43+
unsafe impl TargetFeatureToken for LEVEL_STRUCT_NAME {
44+
const FEATURES: &[&str] = &["{LEVEL_FEATURE_SUPERSET_LIST}"];
45+
46+
#[inline(always)]
47+
fn vectorize<R>(self, f: impl FnOnce() -> R) -> R {
48+
// We use the explicitly written out form here as validation that the set of
49+
// features we've created correctly mapes to the target feature string.
50+
trampoline!([{LEVEL_FEATURE_LCD_TRAMPOLINE}] => "{LEVEL_FEATURE_LCD_CONTENTS}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
51+
}
52+
}
53+
54+
impl LEVEL_STRUCT_NAME {
55+
#[cfg(feature = "std")]
56+
/// Create a new token if the x86-64-{LEVEL_ID} target feature is detected as enabled.
57+
///
58+
/// This does not do any caching internally, although note that the standard
59+
/// library does internally cache the features it detects.
60+
// TODO: Consider a manual override feature/env var?
61+
pub fn try_new() -> Option<Self> {
62+
Some(Self {
63+
/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/
64+
})
65+
}
66+
67+
#[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")]
68+
/// Create a new token for the x86-64-{LEVEL_ID} microarchitecture level.
69+
///
70+
/// This method is useful to get a new token if you have an external proof that
71+
/// x86-64-{LEVEL_ID} is available. This could happen if you are in a target feature
72+
/// function called by an external library user.
73+
///
74+
/// # Safety
75+
///
76+
/// No conditions other than those inherited from the target feature attribute,
77+
/// i.e. that the "{LEVEL_FEATURE_LCD_CONTENTS}" target feature is available.
78+
pub fn new() -> Self {
79+
Self {
80+
/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/
81+
}
82+
}
83+
}
84+
/*{FROM_IMPLS}*/
85+
86+
const _: () = {
87+
assert!(
88+
core::mem::size_of::<LEVEL_STRUCT_NAME>() == 0,
89+
"Target feature tokens should be zero sized."
90+
);
91+
};

0 commit comments

Comments
 (0)