Skip to content

Commit c8ffbdf

Browse files
committed
Increase the minimum CUDA version to 12.0 and the default arch to compute_75.
CUDA 12.0 was released in December 2022, and CUDA 13.0 was released in August 2025. It feels like a good time to drop CUDA 11.x support. This means later Kepler devices with compute capabilities of 3.5 and 3.7 will no longer be supported. The minimum version of NVVM IR increases from 1.6 to 2.0, because that's what CUDA 12.0 uses. Along with this, change the default compiler target to `compute_75`. This is a good choice because it's the minimum supported by CUDA 13.0, and gets Rust CUDA a step closer to working with CUDA 13.0. The existing defaults were all over the place. - `NvvmArch::default()` was `compute_52`. - `CudaBuilder`'s default was `compute_61`. - compiletest's default was `compute_70`. This commit makes the latter two determined by `NvvmArch::default()`, which is changed to `compute_75`. Currently CI runs compiletests on `compute_61`, `compute_70`, and `compute_90`; this commit changes the `compute_70` to `compute_75`. It seems sensible to have the default value as one of the things tested by CI. This comment also adds a comment on NvvmArch with a table of CUDA/`compute_*` values, which I found very useful. Resources: - https://en.wikipedia.org/wiki/CUDA#GPUs_supported for compute capabilities supported by different CUDA versions. - https://docs.nvidia.com/cuda/archive/12.0.0/cuda-toolkit-release-notes/index.html for NVVM IR version information.
1 parent 210b35a commit c8ffbdf

File tree

10 files changed

+78
-29
lines changed

10 files changed

+78
-29
lines changed

.github/workflows/ci_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,4 @@ jobs:
192192
shell: bash
193193
run: shopt -s globstar && rustfmt --check tests/compiletests/ui/**/*.rs
194194
- name: Compiletest
195-
run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
195+
run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90

.github/workflows/ci_windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,6 @@ jobs:
105105
RUSTDOCFLAGS: -Dwarnings
106106
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*" --exclude "cust_raw"
107107

108-
# Disabled due to dll issues, someone with Windows knowledge needed
108+
# Disabled due to dll issues, someone with Windows knowledge needed
109109
# - name: Compiletest
110-
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
110+
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/cuda_builder/src/lib.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,7 @@ pub struct CudaBuilder {
9191
/// will not work on older capabilities. It means that if it uses certain features
9292
/// it may not work.
9393
///
94-
/// This currently defaults to `6.1`. Which corresponds to Pascal, GPUs such as the
95-
/// GTX 1030, GTX 1050, GTX 1080, Tesla P40, etc. We default to this because Maxwell
96-
/// (5.x) will be deprecated in CUDA 12 and we anticipate for that. Moreover, `6.x`
97-
/// contains support for things like f64 atomic add and half precision float ops.
94+
/// This defaults to the default value of `NvvmArch`.
9895
///
9996
/// Starting with CUDA 12.9, architectures can have suffixes:
10097
///
@@ -207,7 +204,7 @@ impl CudaBuilder {
207204
ptx_file_copy_path: None,
208205
generate_line_info: true,
209206
nvvm_opts: true,
210-
arch: NvvmArch::Compute61,
207+
arch: NvvmArch::default(),
211208
ftz: false,
212209
fast_sqrt: false,
213210
fast_div: false,

crates/nvvm/src/lib.rs

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,20 +253,66 @@ impl FromStr for NvvmOption {
253253
}
254254
}
255255

256-
/// Nvvm architecture, default is `Compute52`
256+
/// Nvvm architecture.
257+
///
258+
/// The following table indicates which `compute_*` values are supported by which CUDA versions.
259+
///
260+
/// ```text
261+
/// -----------------------------------------------------------------------------
262+
/// | Supported `compute_*` values (written vertically)
263+
/// -----------------------------------------------------------------------------
264+
/// CUDA | 1 1 1 1 1 1
265+
/// Toolkit | 5 5 5 6 6 6 7 7 7 7 8 8 8 8 8 9 0 0 0 1 2 2
266+
/// version | 0 2 3 0 1 2 0 2 3 5 0 6 7 8 9 0 0 1 3 0 0 1
267+
/// -----------------------------------------------------------------------------
268+
/// 12.[01].0 | b b b b b b b b b b b b - - b b - - - - - -
269+
/// 12.2.0 | b b b b b b b b b b b b - - b a - - - - - -
270+
/// 12.[3456].0 | b b b b b b b b b b b b b - b a - - - - - -
271+
/// 12.8.0 | b b b b b b b b b b b b b - b a a a - - a -
272+
/// 12.9.0 | b b b b b b b b - b b b b - b a f f f - f f
273+
/// 13.0.0 | - - - - - - - - - b b b b b b a f - f f f f
274+
/// -----------------------------------------------------------------------------
275+
/// Legend:
276+
/// - 'b': baseline features only
277+
/// - 'a': baseline + architecture-specific features
278+
/// - 'f': baseline + architecture-specific + family-specific features
279+
///
280+
/// Note: there was no 12.7 release.
281+
/// ```
282+
///
283+
/// For example, CUDA 12.9.0 supports `compute_89`, `compute_90{,a}`, `compute_100{,a,f}`.
284+
///
285+
/// This information is from "PTX Compiler APIs" documents under
286+
/// <https://developer.nvidia.com/cuda-toolkit-archive>, e.g.
287+
/// <https://docs.nvidia.com/cuda/archive/13.0.0/ptx-compiler-api/index.html>. (Adjust the version
288+
/// in that URL as necessary.) Specifically, the `compute-*` values allowed with the `--gpu-name`
289+
/// option.
290+
///
291+
/// # Example
292+
///
293+
/// ```
294+
/// // The default value is `NvvmArch::Compute75`.
295+
/// # use nvvm::NvvmArch;
296+
/// assert_eq!(NvvmArch::default(), NvvmArch::Compute75);
297+
/// ```
257298
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, strum::EnumIter)]
258299
pub enum NvvmArch {
259300
Compute35,
260301
Compute37,
261302
Compute50,
262-
#[default]
263303
Compute52,
264304
Compute53,
265305
Compute60,
266306
Compute61,
267307
Compute62,
268308
Compute70,
269309
Compute72,
310+
/// This default value of 7.5 corresponds to Turing and later devices. We default to this
311+
/// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
312+
/// supported by CUDA 12.x.
313+
// WARNING: If you change the default, consider updating the `--target-arch` values used for
314+
// compiletests in `ci_linux.yml` and `.github/workflows/ci_{linux,windows}.yml`.
315+
#[default]
270316
Compute75,
271317
Compute80,
272318
Compute86,

crates/rustc_codegen_nvvm/src/nvvm.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ impl Display for CodegenErr {
4444
}
4545

4646
/// Take a list of bitcode module bytes and their names and codegen it
47-
/// into ptx bytes. The final PTX *should* be utf8, but just to be on the safe side
47+
/// into PTX bytes. The final PTX *should* be utf8, but just to be on the safe side
4848
/// it returns a vector of bytes.
4949
///
5050
/// Note that this will implicitly try to find libdevice and add it, so don't do that
@@ -57,15 +57,15 @@ pub fn codegen_bitcode_modules(
5757
) -> Result<Vec<u8>, CodegenErr> {
5858
debug!("Codegenning bitcode to PTX");
5959

60-
// make sure the nvvm version is high enough so users don't get confusing compilation errors.
60+
// Make sure the nvvm version is high enough so users don't get confusing compilation errors.
6161
let (major, minor) = nvvm::ir_version();
6262

63-
if major <= 1 && minor < 6 {
63+
if major <= 2 && minor < 0 {
6464
sess.dcx()
65-
.fatal("rustc_codegen_nvvm requires at least libnvvm 1.6 (CUDA 11.2)");
65+
.fatal("rustc_codegen_nvvm requires at least libnvvm 2.0 (CUDA 12.0)");
6666
}
6767

68-
// first, create the nvvm program we will add modules to.
68+
// First, create the nvvm program we will add modules to.
6969
let prog = NvvmProgram::new()?;
7070

7171
let module = merge_llvm_modules(modules, llcx);

guide/src/guide/getting_started.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ This section covers how to get started writing GPU crates with `cuda_std` and `c
66

77
Before you can use the project to write GPU crates, you will need a couple of prerequisites:
88

9-
- [The CUDA SDK](https://developer.nvidia.com/cuda-downloads), version 11.2 or later (and the appropriate driver - [see CUDA release notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)).
9+
- [The CUDA SDK](https://developer.nvidia.com/cuda-downloads), version 12.0 or later (and the
10+
appropriate driver - [see CUDA release
11+
notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)).
1012

1113
This is only for building GPU crates, to execute built PTX you only need CUDA `9+`.
1214

tests/compiletests/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ path = "src/main.rs"
1010
[dependencies]
1111
compiletest_rs = "0.11"
1212
clap = { version = "4.5", features = ["derive"] }
13+
nvvm = { path = "../../crates/nvvm" }
1314
tracing = "0.1"
1415
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
1516
cuda_builder = { workspace = true }

tests/compiletests/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ cargo run --release
2222
### Options
2323

2424
- `--bless` - Update expected output files
25-
- `--target-arch=compute_61,compute_70,compute_90` - Test multiple CUDA compute capabilities (comma-separated)
25+
- `--target-arch=compute_61,compute_75,compute_90` - Test multiple CUDA compute capabilities (comma-separated)
2626
- Filter by test name: `cargo compiletest simple`
2727
- `RUST_LOG=info` - Enable progress logging
2828
- `RUST_LOG=debug` - Enable detailed debug logging

tests/compiletests/src/main.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use clap::Parser;
2+
use nvvm::NvvmArch;
23
use std::env;
34
use std::io;
45
use std::path::{Path, PathBuf};
@@ -13,17 +14,18 @@ struct Opt {
1314

1415
/// The CUDA compute capability to target (e.g., compute_70, compute_80, compute_90).
1516
/// Can specify multiple architectures comma-separated.
16-
#[arg(long, default_value = "compute_70", value_delimiter = ',')]
17-
target_arch: Vec<String>,
17+
// WARNING: This should be kept in sync with the default on `CudaBuilder::arch`.
18+
#[arg(long, default_values_t = [NvvmArch::default()], value_delimiter = ',')]
19+
target_arch: Vec<NvvmArch>,
1820

1921
/// Only run tests that match these filters.
2022
#[arg(name = "FILTER")]
2123
filters: Vec<String>,
2224
}
2325

2426
impl Opt {
25-
pub fn architectures(&self) -> impl Iterator<Item = &str> {
26-
self.target_arch.iter().map(|s| s.as_str())
27+
pub fn architectures(&self) -> impl Iterator<Item = NvvmArch> + use<'_> {
28+
self.target_arch.iter().copied()
2729
}
2830
}
2931

@@ -136,18 +138,18 @@ impl Runner {
136138
extra_flags: "",
137139
}];
138140

139-
for (arch, variation) in self
140-
.opt
141-
.architectures()
142-
.flat_map(|arch| VARIATIONS.iter().map(move |variation| (arch, variation)))
143-
{
141+
for (arch, variation) in self.opt.architectures().flat_map(|arch| {
142+
VARIATIONS
143+
.iter()
144+
.map(move |variation| (arch.target_feature(), variation))
145+
}) {
144146
// HACK(eddyb) in order to allow *some* tests to have separate output
145147
// in different testing variations (i.e. experimental features), while
146148
// keeping *most* of the tests unchanged, we make use of "stage IDs",
147149
// which offer `// only-S` and `// ignore-S` for any stage ID `S`.
148150
let stage_id = if variation.name == "default" {
149151
// Use the architecture name as the stage ID.
150-
arch.to_string()
152+
arch.clone()
151153
} else {
152154
// Include the variation name in the stage ID.
153155
format!("{}-{}", arch, variation.name)
@@ -159,7 +161,7 @@ impl Runner {
159161
&self.deps_target_dir,
160162
&self.codegen_backend_path,
161163
CUDA_TARGET,
162-
arch,
164+
&arch,
163165
);
164166
let mut flags = test_rustc_flags(
165167
&self.codegen_backend_path,
@@ -172,7 +174,7 @@ impl Runner {
172174
.deps_target_dir
173175
.join(DepKind::ProcMacro.target_dir_suffix(CUDA_TARGET)),
174176
],
175-
arch,
177+
&arch,
176178
);
177179
flags += variation.extra_flags;
178180

0 commit comments

Comments
 (0)