|
| 1 | +//! CUDA-specific pointer handling logic. |
| 2 | +
|
| 3 | +use crate::gpu_only; |
| 4 | + |
| 5 | +/// Special areas of GPU memory where a pointer could reside. |
| 6 | +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| 7 | +pub enum AddressSpace { |
| 8 | + /// Memory available for reading and writing to the entire device. |
| 9 | + Global, |
| 10 | + /// Block-local read/write memory available to all threads in a block. |
| 11 | + Shared, |
| 12 | + /// Read-only memory available to the whole device. |
| 13 | + Constant, |
| 14 | + /// Thread-local read/write memory only available to an individual thread. |
| 15 | + Local, |
| 16 | +} |
| 17 | + |
| 18 | +/// Determines whether a pointer is in a specific address space. |
| 19 | +/// |
| 20 | +/// # Safety |
| 21 | +/// |
| 22 | +/// The pointer must be valid for an instance of `T`, otherwise Undefined Behavior is exhibited. |
| 23 | +// TODO(RDambrosio016): Investigate subpar codegen for this function. It seems nvcc implements this not using |
| 24 | +// inline asm, but instead with some sort of compiler intrinsic, because its able to optimize away the function |
| 25 | +// a lot of the time. |
| 26 | +#[gpu_only] |
| 27 | +pub unsafe fn is_in_address_space<T>(ptr: *const T, address_space: AddressSpace) -> bool { |
| 28 | + let ret: u32; |
| 29 | + // create a predicate register to store the result of the isspacep into. |
| 30 | + asm!(".reg .pred p;"); |
| 31 | + |
| 32 | + // perform the actual isspacep operation, and store the result in the predicate register we made. |
| 33 | + match address_space { |
| 34 | + AddressSpace::Global => asm!("isspacep.global p, {}", in(reg64) ptr), |
| 35 | + AddressSpace::Shared => asm!("isspacep.shared p, {}", in(reg64) ptr), |
| 36 | + AddressSpace::Constant => asm!("isspacep.const p, {}", in(reg64) ptr), |
| 37 | + AddressSpace::Local => asm!("isspacep.local p, {}", in(reg64) ptr), |
| 38 | + } |
| 39 | + |
| 40 | + // finally, use the predicate register to write out a value. |
| 41 | + asm!("selp.u32 {}, 1, 0, p;", out(reg32) ret); |
| 42 | + |
| 43 | + ret != 0 |
| 44 | +} |
| 45 | + |
| 46 | +/// Converts a pointer from a generic address space, to a specific address space. |
| 47 | +/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction. |
| 48 | +/// |
| 49 | +/// # Safety |
| 50 | +/// |
| 51 | +/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory, |
| 52 | +/// otherwise Undefined Behavior is exhibited. |
| 53 | +#[gpu_only] |
| 54 | +pub unsafe fn convert_generic_to_specific_address_space<T>( |
| 55 | + ptr: *const T, |
| 56 | + address_space: AddressSpace, |
| 57 | +) -> *const T { |
| 58 | + let ret: *const T; |
| 59 | + |
| 60 | + match address_space { |
| 61 | + AddressSpace::Global => asm!( |
| 62 | + "cvta.to.global.u64 {}, {}", |
| 63 | + out(reg64) ret, |
| 64 | + in(reg64), ptr |
| 65 | + ), |
| 66 | + AddressSpace::Shared => asm!( |
| 67 | + "cvta.to.shared.u64 {}, {}", |
| 68 | + out(reg64) ret, |
| 69 | + in(reg64), ptr |
| 70 | + ), |
| 71 | + AddressSpace::Constant => asm!( |
| 72 | + "cvta.to.const.u64 {}, {}", |
| 73 | + out(reg64) ret, |
| 74 | + in(reg64), ptr |
| 75 | + ), |
| 76 | + AddressSpace::Local => asm!( |
| 77 | + "cvta.to.local.u64 {}, {}", |
| 78 | + out(reg64) ret, |
| 79 | + in(reg64), ptr |
| 80 | + ), |
| 81 | + } |
| 82 | + |
| 83 | + ret |
| 84 | +} |
| 85 | + |
| 86 | +/// Converts a pointer in a specific address space, to a generic address space. |
| 87 | +/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction. |
| 88 | +/// |
| 89 | +/// # Safety |
| 90 | +/// |
| 91 | +/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory, |
| 92 | +/// otherwise Undefined Behavior is exhibited. |
| 93 | +#[gpu_only] |
| 94 | +pub unsafe fn convert_specific_address_space_to_generic<T>( |
| 95 | + ptr: *const T, |
| 96 | + address_space: AddressSpace, |
| 97 | +) -> *const T { |
| 98 | + let ret: *const T; |
| 99 | + |
| 100 | + match address_space { |
| 101 | + AddressSpace::Global => asm!( |
| 102 | + "cvta.global.u64 {}, {}", |
| 103 | + out(reg64) ret, |
| 104 | + in(reg64), ptr |
| 105 | + ), |
| 106 | + AddressSpace::Shared => asm!( |
| 107 | + "cvta.shared.u64 {}, {}", |
| 108 | + out(reg64) ret, |
| 109 | + in(reg64), ptr |
| 110 | + ), |
| 111 | + AddressSpace::Constant => asm!( |
| 112 | + "cvta.const.u64 {}, {}", |
| 113 | + out(reg64) ret, |
| 114 | + in(reg64), ptr |
| 115 | + ), |
| 116 | + AddressSpace::Local => asm!( |
| 117 | + "cvta.local.u64 {}, {}", |
| 118 | + out(reg64) ret, |
| 119 | + in(reg64), ptr |
| 120 | + ), |
| 121 | + } |
| 122 | + |
| 123 | + ret |
| 124 | +} |
0 commit comments