Skip to content

Commit 9570914

Browse files
authored
Rollup merge of rust-lang#138689 - jedbrown:jed/nvptx-target-feature, r=ZuseZ4
add nvptx_target_feature Tracking issue: rust-lang#141468 (nvptx), which is part of rust-lang#44839 (catch-all arches) The feature gate is `#![feature(nvptx_target_feature)]` This exposes the target features `sm_20` through `sm_120a` [as defined](https://github.com/llvm/llvm-project/blob/llvmorg-20.1.1/llvm/lib/Target/NVPTX/NVPTX.td#L59-L85) by LLVM. Cc: ``@gonzalobg`` ``@rustbot`` label +O-NVPTX +A-target-feature
2 parents 8b03efd + 35a485d commit 9570914

File tree

11 files changed

+222
-10
lines changed

11 files changed

+222
-10
lines changed

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
262262
// Filter out features that are not supported by the current LLVM version
263263
("aarch64", "fpmr") => None, // only existed in 18
264264
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
265+
// NVPTX targets added in LLVM 20
266+
("nvptx64", "sm_100") if get_version().0 < 20 => None,
267+
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
268+
("nvptx64", "sm_101") if get_version().0 < 20 => None,
269+
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
270+
("nvptx64", "sm_120") if get_version().0 < 20 => None,
271+
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
272+
("nvptx64", "ptx86") if get_version().0 < 20 => None,
273+
("nvptx64", "ptx87") if get_version().0 < 20 => None,
265274
// Filter out features that are not supported by the current LLVM version
266275
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
267276
if get_version().0 < 20 =>
@@ -324,15 +333,12 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
324333
///
325334
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
326335
pub(crate) fn target_config(sess: &Session) -> TargetConfig {
327-
// Add base features for the target.
328-
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
329-
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
330-
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
331-
// the target CPU, that is still expanded to target features (with all their implied features)
332-
// by LLVM.
333336
let target_machine = create_informational_target_machine(sess, true);
334337

335338
let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| {
339+
// This closure determines whether the target CPU has the feature according to LLVM. We do
340+
// *not* consider the `-Ctarget-feature`s here, as that will be handled later in
341+
// `cfg_target_feature`.
336342
if let Some(feat) = to_llvm_features(sess, feature) {
337343
// All the LLVM features this expands to must be enabled.
338344
for llvm_feature in feat {

compiler/rustc_codegen_ssa/src/target_features.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,10 @@ fn parse_rust_feature_flag<'a>(
197197
/// 2nd component of the return value, respectively).
198198
///
199199
/// `target_base_has_feature` should check whether the given feature (a Rust feature name!) is
200-
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`.
200+
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. Note that LLVM
201+
/// may consider features to be implied that we do not and vice-versa. We want `cfg` to be entirely
202+
/// consistent with Rust feature implications, and thus only consult LLVM to expand the target CPU
203+
/// to target features.
201204
///
202205
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled elsewhere.
203206
pub fn cfg_target_feature(
@@ -211,7 +214,15 @@ pub fn cfg_target_feature(
211214
.rust_target_features()
212215
.iter()
213216
.filter(|(feature, _, _)| target_base_has_feature(feature))
214-
.map(|(feature, _, _)| Symbol::intern(feature))
217+
.flat_map(|(base_feature, _, _)| {
218+
// Expand the direct base feature into all transitively-implied features. Note that we
219+
// cannot simply use the `implied` field of the tuple since that only contains
220+
// directly-implied features.
221+
//
222+
// Iteration order is irrelevant because we're collecting into an `UnordSet`.
223+
#[allow(rustc::potential_query_instability)]
224+
sess.target.implied_target_features(base_feature).into_iter().map(|f| Symbol::intern(f))
225+
})
215226
.collect();
216227

217228
// Add enabled and remove disabled features.

compiler/rustc_feature/src/unstable.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ declare_features! (
327327
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
328328
(unstable, mips_target_feature, "1.27.0", Some(44839)),
329329
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
330+
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
330331
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
331332
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
332333
(unstable, riscv_target_feature, "1.45.0", Some(44839)),

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,7 @@ symbols! {
15121512
not,
15131513
notable_trait,
15141514
note,
1515+
nvptx_target_feature,
15151516
object_safe_for_dispatch,
15161517
of,
15171518
off,

compiler/rustc_target/src/target_features.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
517517
// tidy-alphabetical-end
518518
];
519519

520+
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
521+
// tidy-alphabetical-start
522+
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
523+
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
524+
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
525+
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
526+
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
527+
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
528+
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
529+
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
530+
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
531+
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
532+
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
533+
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
534+
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
535+
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
536+
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
537+
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
538+
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
539+
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
540+
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
541+
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
542+
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
543+
// tidy-alphabetical-end
544+
// tidy-alphabetical-start
545+
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
546+
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
547+
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
548+
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
549+
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
550+
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
551+
// tidy-alphabetical-end
552+
// tidy-alphabetical-start
553+
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
554+
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
555+
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
556+
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
557+
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
558+
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
559+
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
560+
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
561+
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
562+
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
563+
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
564+
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
565+
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
566+
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
567+
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
568+
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
569+
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
570+
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
571+
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
572+
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
573+
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
574+
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
575+
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
576+
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
577+
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
578+
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
579+
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
580+
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
581+
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
582+
// tidy-alphabetical-end
583+
];
584+
520585
static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
521586
// tidy-alphabetical-start
522587
("a", Stable, &["zaamo", "zalrsc"]),
@@ -782,6 +847,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
782847
.chain(HEXAGON_FEATURES.iter())
783848
.chain(POWERPC_FEATURES.iter())
784849
.chain(MIPS_FEATURES.iter())
850+
.chain(NVPTX_FEATURES.iter())
785851
.chain(RISCV_FEATURES.iter())
786852
.chain(WASM_FEATURES.iter())
787853
.chain(BPF_FEATURES.iter())
@@ -847,6 +913,7 @@ impl Target {
847913
"x86" | "x86_64" => X86_FEATURES,
848914
"hexagon" => HEXAGON_FEATURES,
849915
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
916+
"nvptx64" => NVPTX_FEATURES,
850917
"powerpc" | "powerpc64" => POWERPC_FEATURES,
851918
"riscv32" | "riscv64" => RISCV_FEATURES,
852919
"wasm32" | "wasm64" => WASM_FEATURES,
@@ -873,7 +940,7 @@ impl Target {
873940
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
874941
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
875942
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
876-
"bpf" | "m68k" => &[], // no vector ABI
943+
"nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
877944
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
878945
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
879946
// when passing args in vector registers.

library/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@
195195
#![feature(hexagon_target_feature)]
196196
#![feature(loongarch_target_feature)]
197197
#![feature(mips_target_feature)]
198+
#![feature(nvptx_target_feature)]
198199
#![feature(powerpc_target_feature)]
199200
#![feature(riscv_target_feature)]
200201
#![feature(rtm_target_feature)]

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,46 @@ platform.
1010
[@RDambrosio016](https://github.com/RDambrosio016)
1111
[@kjetilkjeka](https://github.com/kjetilkjeka)
1212

13+
## Requirements
14+
15+
This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
16+
The necessary components for this workflow are:
17+
18+
- `rustup toolchain add nightly`
19+
- `rustup component add llvm-tools --toolchain nightly`
20+
- `rustup component add llvm-bitcode-linker --toolchain nightly`
21+
22+
There are two options for using the core library:
23+
24+
- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
25+
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`
26+
27+
### Target and features
28+
29+
It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
30+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
31+
Later PTX versions may allow more efficient code generation.
32+
33+
Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
34+
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
35+
36+
## Building Rust kernels
37+
38+
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
39+
40+
```console
41+
$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
42+
```
43+
44+
Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:
45+
46+
```console
47+
$ rustup component add rust-src --toolchain nightly
48+
$ rustup component add llvm-tools --toolchain nightly
49+
$ rustup component add llvm-bitcode-linker --toolchain nightly
50+
```
51+
52+
1353
<!-- FIXME: fill this out
1454
1555
## Requirements

tests/ui/check-cfg/target_feature.stderr

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
198198
`power9-altivec`
199199
`power9-vector`
200200
`prfchw`
201+
`ptx32`
202+
`ptx40`
203+
`ptx41`
204+
`ptx42`
205+
`ptx43`
206+
`ptx50`
207+
`ptx60`
208+
`ptx61`
209+
`ptx62`
210+
`ptx63`
211+
`ptx64`
212+
`ptx65`
213+
`ptx70`
214+
`ptx71`
215+
`ptx72`
216+
`ptx73`
217+
`ptx74`
218+
`ptx75`
219+
`ptx76`
220+
`ptx77`
221+
`ptx78`
222+
`ptx80`
223+
`ptx81`
224+
`ptx82`
225+
`ptx83`
226+
`ptx84`
227+
`ptx85`
228+
`ptx86`
229+
`ptx87`
201230
`quadword-atomics`
202231
`rand`
203232
`ras`
@@ -222,6 +251,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
222251
`simd128`
223252
`sm3`
224253
`sm4`
254+
`sm_100`
255+
`sm_100a`
256+
`sm_101`
257+
`sm_101a`
258+
`sm_120`
259+
`sm_120a`
260+
`sm_20`
261+
`sm_21`
262+
`sm_30`
263+
`sm_32`
264+
`sm_35`
265+
`sm_37`
266+
`sm_50`
267+
`sm_52`
268+
`sm_53`
269+
`sm_60`
270+
`sm_61`
271+
`sm_62`
272+
`sm_70`
273+
`sm_72`
274+
`sm_75`
275+
`sm_80`
276+
`sm_86`
277+
`sm_87`
278+
`sm_89`
279+
`sm_90`
280+
`sm_90a`
225281
`sme`
226282
`sme-b16b16`
227283
`sme-f16f16`

tests/ui/target-feature/gate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// gate-test-arm_target_feature
77
// gate-test-hexagon_target_feature
88
// gate-test-mips_target_feature
9+
// gate-test-nvptx_target_feature
910
// gate-test-wasm_target_feature
1011
// gate-test-adx_target_feature
1112
// gate-test-cmpxchg16b_target_feature

tests/ui/target-feature/gate.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error[E0658]: the target feature `x87` is currently unstable
2-
--> $DIR/gate.rs:29:18
2+
--> $DIR/gate.rs:30:18
33
|
44
LL | #[target_feature(enable = "x87")]
55
| ^^^^^^^^^^^^^^

0 commit comments

Comments
 (0)