Skip to content

Commit ad4181e

Browse files
committed
compute_120?
1 parent 1f79d0c commit ad4181e

File tree

9 files changed

+45
-7
lines changed

9 files changed

+45
-7
lines changed

crates/cuda_builder/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ impl CudaBuilder {
163163
generate_line_info: true,
164164
nvvm_opts: true,
165165
arch: if cfg!(feature = "nvvm-v19") {
166-
NvvmArch::Compute100
166+
NvvmArch::Compute120
167167
} else if cfg!(feature = "nvvm-v7") {
168168
NvvmArch::default()
169169
} else {

crates/cuda_std/src/cfg.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ pub enum ComputeCapability {
2020
Compute87,
2121
Compute89,
2222
Compute90,
23-
Compute100
23+
Compute100,
24+
Compute120,
2425
}
2526

2627
impl ComputeCapability {
@@ -51,7 +52,8 @@ impl ComputeCapability {
5152
"870" => ComputeCapability::Compute87, // Ampere (Jetson AGX Orin)
5253
"890" => ComputeCapability::Compute89, // Ada Lovelace (RTX 40 series)
5354
"900" => ComputeCapability::Compute90, // Hopper (H100)
54-
"1000" => ComputeCapability::Compute100, // Blackwell (RTX 50 series, H200, B100)
55+
"1000" => ComputeCapability::Compute100, // Blackwell (RTX 50 series, H200, B100, CUDA 12.6 and later)
56+
"1200" => ComputeCapability::Compute120, // Blackwell (RTX 50 series, H200, B100, CUDA 12.8 and later)
5557
_ => panic!("CUDA_ARCH had an invalid value"),
5658
}
5759
}

crates/cust/src/module.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ pub enum JitTarget {
6060
Compute89 = 89,
6161
Compute90 = 90,
6262
Compute100 = 100,
63+
Compute120 = 120,
6364
}
6465

6566
/// How to handle cases where a loaded module's data does not contain an exact match for the

crates/nvvm/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ impl FromStr for NvvmOption {
259259
"89" => NvvmArch::Compute89,
260260
"90" => NvvmArch::Compute90,
261261
"100" => NvvmArch::Compute100,
262+
"120" => NvvmArch::Compute120,
262263
_ => return Err("unknown arch"),
263264
};
264265
Self::Arch(arch)
@@ -288,6 +289,7 @@ pub enum NvvmArch {
288289
Compute89,
289290
Compute90,
290291
Compute100,
292+
Compute120,
291293
}
292294

293295
impl Display for NvvmArch {
@@ -460,6 +462,7 @@ mod tests {
460462
"-arch=compute_89",
461463
"-arch=compute_90",
462464
"-arch=compute_100",
465+
"-arch=compute_120",
463466
"-ftz=1",
464467
"-prec-sqrt=0",
465468
"-prec-div=0",
@@ -486,6 +489,7 @@ mod tests {
486489
Arch(Compute89),
487490
Arch(Compute90),
488491
Arch(Compute100),
492+
Arch(Compute120),
489493
Ftz,
490494
FastSqrt,
491495
FastDiv,

crates/rustc_codegen_nvvm_v19/build.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ fn main() {
1919
rustc_llvm_build();
2020

2121
// this is set by cuda_builder, but in case somebody is using the codegen
22-
// manually, default to 1000 (which is what nvvm defaults to).
22+
// manually, default to 1200.
2323
if option_env!("CUDA_ARCH").is_none() {
24-
println!("cargo:rustc-env=CUDA_ARCH=1000")
24+
println!("cargo:rustc-env=CUDA_ARCH=1200")
2525
}
2626
}
2727

crates/rustc_codegen_nvvm_v19/src/back.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ pub fn target_machine_factory(
101101
let triple = sess.target.llvm_target.clone().to_string();
102102
let cpu_string = sess.opts.cg.target_cpu
103103
.as_deref()
104-
.unwrap_or("sm_100") // Use a more compatible target
104+
.unwrap_or("sm_120")
105105
.to_string();
106106
let features_string = "".to_string();
107107
let trap_unreachable = sess

crates/rustc_codegen_nvvm_v19/src/target.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pub fn target() -> Target {
2222
options.linker_flavor = LinkerFlavor::Ptx;
2323
// nvvm does all the linking for us, but technically its not a linker
2424
options.linker = None;
25-
options.cpu = "sm_100".into();
25+
options.cpu = "sm_120".into();
2626
options.max_atomic_width = Some(64);
2727
// Unwinding on CUDA is neither feasible nor useful.
2828
options.panic_strategy = PanicStrategy::Abort;

examples/cuda/vecadd/kernels/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ edition = "2024"
55

66
[dependencies]
77
cuda_std = { path = "../../../../crates/cuda_std" }
8+
rand_core = { version = "0.9.3" }
9+
rand_xoshiro = { version = "0.7.0", default-features = false }
810

911
[lib]
1012
crate-type = ["cdylib", "rlib"]

examples/cuda/vecadd/kernels/src/lib.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,34 @@
11
use cuda_std::prelude::*;
22

3+
use rand_core::{SeedableRng, RngCore};
4+
use rand_xoshiro::Xoroshiro128StarStar;
5+
6+
const BASE64_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
7+
8+
fn splitmix64(mut x: u64) -> u64 {
9+
x = x.wrapping_add(0x9e3779b97f4a7c15u64);
10+
x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9u64);
11+
x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111ebu64);
12+
x ^ (x >> 31)
13+
}
14+
15+
pub fn generate_random_private_key(thread_idx: usize, rng_seed: u64) -> [u8; 32] {
16+
let mixed_seed = splitmix64(rng_seed.wrapping_add(thread_idx as u64));
17+
let mut private_key = [0u8; 32];
18+
let mut rng = Xoroshiro128StarStar::seed_from_u64(mixed_seed);
19+
rng.fill_bytes(&mut private_key);
20+
private_key
21+
}
22+
23+
pub fn generate_base64_nonce(thread_idx: usize, rng_seed: u64, nonce: &mut [u8]) {
24+
let mixed_seed = splitmix64(rng_seed.wrapping_add(thread_idx as u64));
25+
let mut rng = Xoroshiro128StarStar::seed_from_u64(mixed_seed);
26+
for byte in nonce.iter_mut() {
27+
let idx = (rng.next_u32() % 64) as usize;
28+
*byte = BASE64_CHARS[idx];
29+
}
30+
}
31+
332
#[kernel]
433
#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)]
534
pub unsafe fn vecadd(a: &[f32], b: &[f32], c: *mut f32) {

0 commit comments

Comments
 (0)