Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ jobs:
fail-fast: false
matrix:
variance:
# - name: Ubuntu-22.04/CUDA-11.8.0
# image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda11:latest"
# runner: ubuntu-latest
- name: Ubuntu-22.04 / CUDA-12.8.1 / x86_64
image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda12:latest"
runner: ubuntu-latest
Expand Down Expand Up @@ -235,9 +232,6 @@ jobs:
matrix:
variance:
# Must match the build job's matrix definition
# - name: Ubuntu-22.04 / CUDA-11.8.0
# image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda11:latest"
# runner: ubuntu-latest
- name: Ubuntu-22.04 / CUDA-12.8.1 / x86_64
image: "ghcr.io/rust-gpu/rust-cuda-ubuntu22-cuda12:latest"
runner: ubuntu-latest
Expand Down Expand Up @@ -289,4 +283,4 @@ jobs:
shell: bash
run: shopt -s globstar && rustfmt --check tests/compiletests/ui/**/*.rs
- name: Compiletest
run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90
4 changes: 2 additions & 2 deletions .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,6 @@ jobs:
RUSTDOCFLAGS: -Dwarnings
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*" --exclude "cust_raw"

# Disabled due to dll issues, someone with Windows knowledge needed
# Disabled due to dll issues, someone with Windows knowledge needed
# - name: Compiletest
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90
5 changes: 0 additions & 5 deletions .github/workflows/container_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ jobs:
- runner: ubuntu-24.04-arm
arch: arm64
variance:
- name: Ubuntu-22.04/CUDA-11.8.0
image: "rust-gpu/rust-cuda-ubuntu22-cuda11"
dockerfile: ./container/ubuntu22-cuda11/Dockerfile
- name: Ubuntu-22.04/CUDA-12.8.1
image: "rust-gpu/rust-cuda-ubuntu22-cuda12"
dockerfile: ./container/ubuntu22-cuda12/Dockerfile
Expand Down Expand Up @@ -157,8 +154,6 @@ jobs:
fail-fast: false
matrix:
variance:
- name: Ubuntu-22.04/CUDA-11.8.0
image: "rust-gpu/rust-cuda-ubuntu22-cuda11"
- name: Ubuntu-22.04/CUDA-12.8.1
image: "rust-gpu/rust-cuda-ubuntu22-cuda12"
- name: Ubuntu-24.04/CUDA-12.8.1
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 0 additions & 89 deletions container/ubuntu22-cuda11/Dockerfile

This file was deleted.

24 changes: 2 additions & 22 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,7 @@ pub struct CudaBuilder {
/// will not work on older capabilities. It means that if it uses certain features
/// it may not work.
///
/// This currently defaults to `6.1`. Which corresponds to Pascal, GPUs such as the
/// GTX 1030, GTX 1050, GTX 1080, Tesla P40, etc. We default to this because Maxwell
/// (5.x) will be deprecated in CUDA 12 and we anticipate for that. Moreover, `6.x`
/// contains support for things like f64 atomic add and half precision float ops.
/// This defaults to the default value of `NvvmArch`.
///
/// Starting with CUDA 12.9, architectures can have suffixes:
///
Expand Down Expand Up @@ -207,7 +204,7 @@ impl CudaBuilder {
ptx_file_copy_path: None,
generate_line_info: true,
nvvm_opts: true,
arch: NvvmArch::Compute61,
arch: NvvmArch::default(),
ftz: false,
fast_sqrt: false,
fast_div: false,
Expand Down Expand Up @@ -257,23 +254,6 @@ impl CudaBuilder {
self
}

/// The virtual compute architecture to target for PTX generation. This
/// dictates how certain things are codegenned and may affect performance
/// and/or which gpus the code can run on.
///
/// You should generally try to pick an arch that will work with most
/// GPUs you want your program to work with.
///
/// If you are unsure, either leave this option to default, or pick something around 5.2 to 7.x.
///
/// You can find a list of features supported on each arch and a list of GPUs for every
/// arch [`here`](https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications).
///
/// NOTE that this does not necessarily mean that code using a certain capability
/// will not work on older capabilities. It means that if it uses certain
/// features it may not work.
///
/// The chosen architecture enables target features for conditional compilation.
/// See the documentation on the `arch` field for more details.
pub fn arch(mut self, arch: NvvmArch) -> Self {
self.arch = arch;
Expand Down
139 changes: 93 additions & 46 deletions crates/nvvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,54 +243,59 @@ impl FromStr for NvvmOption {
}
_ if s.starts_with("-arch=") => {
let slice = &s[6..];
if !slice.starts_with("compute_") {
return Err(format!("unknown -arch value: {slice}"));
match NvvmArch::from_str(slice) {
Ok(arch) => Self::Arch(arch),
Err(_) => return Err(format!("unknown -arch value: {slice}")),
}
let arch_num = &slice[8..];
let arch = match arch_num {
"35" => NvvmArch::Compute35,
"37" => NvvmArch::Compute37,
"50" => NvvmArch::Compute50,
"52" => NvvmArch::Compute52,
"53" => NvvmArch::Compute53,
"60" => NvvmArch::Compute60,
"61" => NvvmArch::Compute61,
"62" => NvvmArch::Compute62,
"70" => NvvmArch::Compute70,
"72" => NvvmArch::Compute72,
"75" => NvvmArch::Compute75,
"80" => NvvmArch::Compute80,
"86" => NvvmArch::Compute86,
"87" => NvvmArch::Compute87,
"89" => NvvmArch::Compute89,
"90" => NvvmArch::Compute90,
"90a" => NvvmArch::Compute90a,
"100" => NvvmArch::Compute100,
"100f" => NvvmArch::Compute100f,
"100a" => NvvmArch::Compute100a,
"101" => NvvmArch::Compute101,
"101f" => NvvmArch::Compute101f,
"101a" => NvvmArch::Compute101a,
"103" => NvvmArch::Compute103,
"103f" => NvvmArch::Compute103f,
"103a" => NvvmArch::Compute103a,
"120" => NvvmArch::Compute120,
"120f" => NvvmArch::Compute120f,
"120a" => NvvmArch::Compute120a,
"121" => NvvmArch::Compute121,
"121f" => NvvmArch::Compute121f,
"121a" => NvvmArch::Compute121a,
_ => return Err(format!("unknown -arch=compute_NN value: {arch_num}")),
};
Self::Arch(arch)
}
_ => return Err(format!("unknown option: {s}")),
})
}
}

/// Nvvm architecture, default is `Compute52`
#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::EnumIter)]
/// Nvvm architecture.
///
/// The following table indicates which `compute_*` values are supported by which CUDA versions.
///
/// ```text
/// -----------------------------------------------------------------------------
/// | Supported `compute_*` values (written vertically)
/// -----------------------------------------------------------------------------
/// CUDA | 1 1 1 1 1 1
/// Toolkit | 5 5 5 6 6 6 7 7 7 7 8 8 8 8 8 9 0 0 0 1 2 2
/// version | 0 2 3 0 1 2 0 2 3 5 0 6 7 8 9 0 0 1 3 0 0 1
/// -----------------------------------------------------------------------------
/// 12.[01].0 | b b b b b b b b b b b b - - b b - - - - - -
/// 12.2.0 | b b b b b b b b b b b b - - b a - - - - - -
/// 12.[3456].0 | b b b b b b b b b b b b b - b a - - - - - -
/// 12.8.0 | b b b b b b b b b b b b b - b a a a - - a -
/// 12.9.0 | b b b b b b b b - b b b b - b a f f f - f f
/// 13.0.0 | - - - - - - - - - b b b b b b a f - f f f f
/// -----------------------------------------------------------------------------
/// Legend:
/// - 'b': baseline features only
/// - 'a': baseline + architecture-specific features
/// - 'f': baseline + architecture-specific + family-specific features
///
/// Note: there was no 12.7 release.
/// ```
///
/// For example, CUDA 12.9.0 supports `compute_89`, `compute_90{,a}`, `compute_100{,a,f}`.
///
/// This information is from "PTX Compiler APIs" documents under
/// <https://developer.nvidia.com/cuda-toolkit-archive>, e.g.
/// <https://docs.nvidia.com/cuda/archive/13.0.0/ptx-compiler-api/index.html>. (Adjust the version
/// in that URL as necessary.) Specifically, the `compute-*` values allowed with the `--gpu-name`
/// option.
///
/// # Example
///
/// ```
/// // The default value is `NvvmArch::Compute75`.
/// # use nvvm::NvvmArch;
/// assert_eq!(NvvmArch::default(), NvvmArch::Compute75);
/// ```
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, strum::EnumIter)]
pub enum NvvmArch {
Compute35,
Compute37,
Expand All @@ -302,6 +307,12 @@ pub enum NvvmArch {
Compute62,
Compute70,
Compute72,
/// This default value of 7.5 corresponds to Turing and later devices. We default to this
/// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
/// supported by CUDA 12.x.
// WARNING: If you change the default, consider updating the `--target-arch` values used for
// compiletests in `ci_linux.yml` and `.github/workflows/ci_{linux,windows}.yml`.
#[default]
Compute75,
Compute80,
Compute86,
Expand Down Expand Up @@ -340,9 +351,45 @@ impl Display for NvvmArch {
}
}

impl Default for NvvmArch {
fn default() -> Self {
Self::Compute52
impl FromStr for NvvmArch {
type Err = &'static str;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"compute_35" => NvvmArch::Compute35,
"compute_37" => NvvmArch::Compute37,
"compute_50" => NvvmArch::Compute50,
"compute_52" => NvvmArch::Compute52,
"compute_53" => NvvmArch::Compute53,
"compute_60" => NvvmArch::Compute60,
"compute_61" => NvvmArch::Compute61,
"compute_62" => NvvmArch::Compute62,
"compute_70" => NvvmArch::Compute70,
"compute_72" => NvvmArch::Compute72,
"compute_75" => NvvmArch::Compute75,
"compute_80" => NvvmArch::Compute80,
"compute_86" => NvvmArch::Compute86,
"compute_87" => NvvmArch::Compute87,
"compute_89" => NvvmArch::Compute89,
"compute_90" => NvvmArch::Compute90,
"compute_90a" => NvvmArch::Compute90a,
"compute_100" => NvvmArch::Compute100,
"compute_100f" => NvvmArch::Compute100f,
"compute_100a" => NvvmArch::Compute100a,
"compute_101" => NvvmArch::Compute101,
"compute_101f" => NvvmArch::Compute101f,
"compute_101a" => NvvmArch::Compute101a,
"compute_103" => NvvmArch::Compute103,
"compute_103f" => NvvmArch::Compute103f,
"compute_103a" => NvvmArch::Compute103a,
"compute_120" => NvvmArch::Compute120,
"compute_120f" => NvvmArch::Compute120f,
"compute_120a" => NvvmArch::Compute120a,
"compute_121" => NvvmArch::Compute121,
"compute_121f" => NvvmArch::Compute121f,
"compute_121a" => NvvmArch::Compute121a,
_ => return Err("unknown compile target"),
})
}
}

Expand Down Expand Up @@ -1116,8 +1163,8 @@ mod tests {
err("blah", "unknown option: blah");
err("-aardvark", "unknown option: -aardvark");
err("-arch=compute75", "unknown -arch value: compute75");
err("-arch=compute_10", "unknown -arch=compute_NN value: 10");
err("-arch=compute_100x", "unknown -arch=compute_NN value: 100x");
err("-arch=compute_10", "unknown -arch value: compute_10");
err("-arch=compute_100x", "unknown -arch value: compute_100x");
err("-opt=3", "-opt=3 is the default");
err("-opt=99", "unknown -opt value: 99");
}
Expand Down
10 changes: 5 additions & 5 deletions crates/rustc_codegen_nvvm/src/nvvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl Display for CodegenErr {
}

/// Take a list of bitcode module bytes and their names and codegen it
/// into ptx bytes. The final PTX *should* be utf8, but just to be on the safe side
/// into PTX bytes. The final PTX *should* be utf8, but just to be on the safe side
/// it returns a vector of bytes.
///
/// Note that this will implicitly try to find libdevice and add it, so don't do that
Expand All @@ -57,15 +57,15 @@ pub fn codegen_bitcode_modules(
) -> Result<Vec<u8>, CodegenErr> {
debug!("Codegenning bitcode to PTX");

// make sure the nvvm version is high enough so users don't get confusing compilation errors.
// Make sure the nvvm version is high enough so users don't get confusing compilation errors.
let (major, minor) = nvvm::ir_version();

if major <= 1 && minor < 6 {
if major <= 2 && minor < 0 {
sess.dcx()
.fatal("rustc_codegen_nvvm requires at least libnvvm 1.6 (CUDA 11.2)");
.fatal("rustc_codegen_nvvm requires at least libnvvm 2.0 (CUDA 12.0)");
}

// first, create the nvvm program we will add modules to.
// First, create the nvvm program we will add modules to.
let prog = NvvmProgram::new()?;

let module = merge_llvm_modules(modules, llcx);
Expand Down
Loading
Loading