Skip to content

Commit 3920058

Browse files
committed
Feat: experimental dynamic shared mem support, cuda_std 0.2.1
1 parent 924d605 commit 3920058

File tree

3 files changed

+52
-6
lines changed

3 files changed

+52
-6
lines changed

crates/cuda_std/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ Notable changes to this project will be documented in this file.
44

55
## Unreleased
66

7+
## 0.2.1 - 12/8/21
8+
9+
- Fixed `shared_array!` not using fully qualified MaybeUninit.
10+
- Fixed `shared_array!` working on the CPU.
11+
- Added experimental dynamic shared memory support through `shared::dynamic_shared_memory`.
12+
713
## 0.2.0 - 12/5/21
814

915
- Added `#[externally_visible]` in conjunction with cg_nvvm dead code elimination changes to mark that

crates/cuda_std/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cuda_std"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
edition = "2018"
55
license = "MIT OR Apache-2.0"
66
description = "Standard library for CUDA with rustc_codegen_nvvm"

crates/cuda_std/src/shared.rs

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
//! Shared memory handling. Currently only macros.
1+
//! Static and Dynamic shared memory handling.
2+
3+
use crate::gpu_only;
24

35
/// Statically allocates a buffer large enough for `len` elements of `array_type`, yielding
46
/// a `*mut array_type` that points to uninitialized shared memory. `len` must be a constant expression.
@@ -42,9 +44,47 @@
4244
#[macro_export]
4345
macro_rules! shared_array {
4446
($array_type:ty; $len:expr) => {{
45-
// the initializer is discarded when declaring shared globals, so it is unimportant.
46-
#[$crate::address_space(shared)]
47-
static mut SHARED: MaybeUninit<[$array_type; $len]> = MaybeUninit::uninit();
48-
SHARED.as_mut_ptr() as *mut $array_type
47+
#[$crate::gpu_only]
48+
#[inline(always)]
49+
fn shared_array() -> *mut $array_type {
50+
use ::core::{cell::UnsafeCell, mem::MaybeUninit};
51+
struct SyncWrapper(UnsafeCell<MaybeUninit<[$array_type; $len]>>);
52+
// SAFETY: it is up to the user to verify sound shared memory usage, we cannot
53+
// fundamentally check it for soundness.
54+
unsafe impl Send for SyncWrapper {}
55+
// SAFETY: see above
56+
unsafe impl Sync for SyncWrapper {}
57+
58+
// the initializer is discarded when declaring shared globals, so it is unimportant.
59+
#[$crate::address_space(shared)]
60+
static SHARED: SyncWrapper = SyncWrapper(UnsafeCell::new(MaybeUninit::uninit()));
61+
62+
SHARED.0.get() as *mut $array_type
63+
}
64+
shared_array()
4965
}};
5066
}
67+
68+
/// Gets a pointer to the dynamic shared memory that was allocated by the caller of the kernel. The
69+
/// data is left uninitialized.
70+
///
71+
/// **Calling this function multiple times will yield the same pointer**.
72+
#[gpu_only]
73+
pub fn dynamic_shared_mem<T>() -> *mut T {
74+
// it is unclear whether an alignment of 16 is actually required for correctness, however,
75+
// it seems like nvcc always generates the global with .align 16 no matter the type, so we just copy
76+
// nvcc's behavior for now.
77+
extern "C" {
78+
// need to use nvvm_internal and not address_space because address_space only parses
79+
// static definitions, not extern static definitions.
80+
#[nvvm_internal(addrspace(3))]
81+
#[allow(improper_ctypes)]
82+
// mangle it a bit to make sure nobody makes the same thing
83+
#[link_name = "_Zcuda_std_dyn_shared"]
84+
static DYN_SHARED: ::core::cell::UnsafeCell<u128>;
85+
}
86+
87+
// SAFETY: extern statics is how dynamic shared mem is done in CUDA. This will turn into
88+
// an extern variable decl in ptx, which is the same thing nvcc does if you dump the ptx from a cuda file.
89+
unsafe { DYN_SHARED.get() as *mut T }
90+
}

0 commit comments

Comments
 (0)