Skip to content

Commit 1daa33d

Browse files
zdevitometa-codesync[bot]
authored andcommitted
Remove direct dependency on libcuda (#1926)
Summary: Pull Request resolved: #1926 Dynamically load any cuda driver functions so that monarch library can be loaded a machine that doesn't have a gpu even if the library is built with one. ghstack-source-id: 324500431 Reviewed By: dstaay-fb Differential Revision: D87380631 fbshipit-source-id: fbda5498f3edd6e063a01b77ea968f3afa4eb21e
1 parent 7bb3277 commit 1daa33d

File tree

12 files changed

+478
-137
lines changed

12 files changed

+478
-137
lines changed

cuda-sys/build.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ fn main() {
3131
.clang_arg("c++")
3232
.clang_arg("-std=gnu++20")
3333
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
34-
// Allow the specified functions and types
35-
.allowlist_function("cu.*")
36-
.allowlist_function("CU.*")
37-
.allowlist_type("cu.*")
38-
.allowlist_type("CU.*")
34+
// Allow the specified functions and types (CUDA Runtime API only)
35+
.allowlist_function("cuda.*")
36+
.allowlist_function("CUDA.*")
37+
.allowlist_type("cuda.*")
38+
.allowlist_type("CUDA.*")
3939
// Use newtype enum style
4040
.default_enum_style(bindgen::EnumVariation::NewType {
4141
is_bitfield: false,
@@ -78,7 +78,6 @@ fn main() {
7878
}
7979
};
8080
println!("cargo:rustc-link-search=native={}", cuda_lib_dir);
81-
println!("cargo:rustc-link-lib=cuda");
8281
println!("cargo:rustc-link-lib=cudart");
8382

8483
// Generate bindings - fail fast if this doesn't work

cuda-sys/src/lib.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,3 @@ mod inner {
3434
}
3535

3636
pub use inner::*;
37-
38-
#[cfg(test)]
39-
mod tests {
40-
use std::mem::MaybeUninit;
41-
42-
use super::*;
43-
44-
#[test]
45-
fn sanity() {
46-
// SAFETY: testing bindings
47-
unsafe {
48-
let mut version = MaybeUninit::<i32>::uninit();
49-
let result = cuDriverGetVersion(version.as_mut_ptr());
50-
assert_eq!(result, cudaError_enum(0));
51-
}
52-
}
53-
}

cuda-sys/src/wrapper.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,4 @@
88

99
#pragma once
1010

11-
#include <cuda.h>
1211
#include <cuda_runtime.h>

monarch_rdma/examples/cuda_ping_pong/src/cuda_ping_pong.rs

Lines changed: 51 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -269,45 +269,51 @@ impl Actor for CudaRdmaActor {
269269
// For this example, we'll use a regular Rust allocation as a placeholder
270270
// The actual CUDA allocation would be handled by the monarch_rdma library
271271
unsafe {
272-
cu_check!(cuda_sys::cuInit(0));
273-
let mut dptr: cuda_sys::CUdeviceptr = std::mem::zeroed();
274-
let mut handle: cuda_sys::CUmemGenericAllocationHandle = std::mem::zeroed();
275-
276-
let mut device: cuda_sys::CUdevice = std::mem::zeroed();
277-
cu_check!(cuda_sys::cuDeviceGet(&mut device, device_id as i32));
272+
cu_check!(rdmaxcel_sys::rdmaxcel_cuInit(0));
273+
let mut dptr: rdmaxcel_sys::CUdeviceptr = std::mem::zeroed();
274+
let mut handle: rdmaxcel_sys::CUmemGenericAllocationHandle = std::mem::zeroed();
275+
276+
let mut device: rdmaxcel_sys::CUdevice = std::mem::zeroed();
277+
cu_check!(rdmaxcel_sys::rdmaxcel_cuDeviceGet(
278+
&mut device,
279+
device_id as i32
280+
));
278281

279-
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
280-
cu_check!(cuda_sys::cuCtxCreate_v2(&mut context, 0, device_id as i32));
281-
cu_check!(cuda_sys::cuCtxSetCurrent(context));
282+
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
283+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
284+
&mut context,
285+
0,
286+
device_id as i32
287+
));
288+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
282289

283290
let mut granularity: usize = 0;
284-
let mut prop: cuda_sys::CUmemAllocationProp = std::mem::zeroed();
285-
prop.type_ = cuda_sys::CUmemAllocationType::CU_MEM_ALLOCATION_TYPE_PINNED;
286-
prop.location.type_ = cuda_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE;
291+
let mut prop: rdmaxcel_sys::CUmemAllocationProp = std::mem::zeroed();
292+
prop.type_ = rdmaxcel_sys::CU_MEM_ALLOCATION_TYPE_PINNED;
293+
prop.location.type_ = rdmaxcel_sys::CU_MEM_LOCATION_TYPE_DEVICE;
287294
prop.location.id = device;
288295
prop.allocFlags.gpuDirectRDMACapable = 1;
289-
prop.requestedHandleTypes =
290-
cuda_sys::CUmemAllocationHandleType::CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
296+
prop.requestedHandleTypes = rdmaxcel_sys::CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
291297

292-
cu_check!(cuda_sys::cuMemGetAllocationGranularity(
298+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemGetAllocationGranularity(
293299
&mut granularity as *mut usize,
294300
&prop,
295-
cuda_sys::CUmemAllocationGranularity_flags::CU_MEM_ALLOC_GRANULARITY_MINIMUM,
301+
rdmaxcel_sys::CU_MEM_ALLOC_GRANULARITY_MINIMUM,
296302
));
297303

298304
// ensure our size is aligned
299305
let padded_size: usize = ((buffer_size - 1) / granularity + 1) * granularity;
300306
assert!(padded_size == buffer_size);
301307

302-
cu_check!(cuda_sys::cuMemCreate(
303-
&mut handle as *mut cuda_sys::CUmemGenericAllocationHandle,
308+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemCreate(
309+
&mut handle as *mut rdmaxcel_sys::CUmemGenericAllocationHandle,
304310
padded_size,
305311
&prop,
306312
0
307313
));
308314
// reserve and map the memory
309-
cu_check!(cuda_sys::cuMemAddressReserve(
310-
&mut dptr as *mut cuda_sys::CUdeviceptr,
315+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemAddressReserve(
316+
&mut dptr as *mut rdmaxcel_sys::CUdeviceptr,
311317
padded_size,
312318
0,
313319
0,
@@ -317,23 +323,28 @@ impl Actor for CudaRdmaActor {
317323
assert!(padded_size % granularity == 0);
318324

319325
// fails if a add cu_check macro; but passes if we don't
320-
let err = cuda_sys::cuMemMap(
321-
dptr as cuda_sys::CUdeviceptr,
326+
let err = rdmaxcel_sys::rdmaxcel_cuMemMap(
327+
dptr as rdmaxcel_sys::CUdeviceptr,
322328
padded_size,
323329
0,
324-
handle as cuda_sys::CUmemGenericAllocationHandle,
330+
handle as rdmaxcel_sys::CUmemGenericAllocationHandle,
325331
0,
326332
);
327-
if err != cuda_sys::CUresult::CUDA_SUCCESS {
333+
if err != rdmaxcel_sys::CUDA_SUCCESS {
328334
panic!("failed reserving and mapping memory {:?}", err);
329335
}
330336

331337
// set access
332-
let mut access_desc: cuda_sys::CUmemAccessDesc = std::mem::zeroed();
333-
access_desc.location.type_ = cuda_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE;
338+
let mut access_desc: rdmaxcel_sys::CUmemAccessDesc = std::mem::zeroed();
339+
access_desc.location.type_ = rdmaxcel_sys::CU_MEM_LOCATION_TYPE_DEVICE;
334340
access_desc.location.id = device;
335-
access_desc.flags = cuda_sys::CUmemAccess_flags::CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
336-
cu_check!(cuda_sys::cuMemSetAccess(dptr, padded_size, &access_desc, 1));
341+
access_desc.flags = rdmaxcel_sys::CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
342+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemSetAccess(
343+
dptr,
344+
padded_size,
345+
&access_desc,
346+
1
347+
));
337348
Ok(Self {
338349
device_id,
339350
cpu_buffer,
@@ -385,15 +396,15 @@ impl Handler<InitializeBuffer> for CudaRdmaActor {
385396
self.cpu_buffer.fill(value);
386397

387398
unsafe {
388-
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
389-
cu_check!(cuda_sys::cuCtxCreate_v2(
399+
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
400+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
390401
&mut context,
391402
0,
392403
self.device_id as i32
393404
));
394-
cu_check!(cuda_sys::cuCtxSetCurrent(context));
405+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
395406
cuda_sys::cudaDeviceSynchronize();
396-
cu_check!(cuda_sys::cuMemcpyHtoD_v2(
407+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemcpyHtoD_v2(
397408
self.cu_ptr as u64,
398409
self.cpu_buffer.as_ptr() as *const std::ffi::c_void,
399410
self.cpu_buffer.len()
@@ -459,13 +470,13 @@ impl Handler<PerformPingPong> for CudaRdmaActor {
459470

460471
validate_execution_context().await?;
461472
unsafe {
462-
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
463-
cu_check!(cuda_sys::cuCtxCreate_v2(
473+
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
474+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
464475
&mut context,
465476
0,
466477
self.device_id as i32
467478
));
468-
cu_check!(cuda_sys::cuCtxSetCurrent(context));
479+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
469480
}
470481
let qp = self
471482
.rdma_manager
@@ -532,17 +543,17 @@ impl Handler<VerifyBuffer> for CudaRdmaActor {
532543
VerifyBuffer(expected_values, reply): VerifyBuffer,
533544
) -> Result<(), anyhow::Error> {
534545
unsafe {
535-
let mut context: cuda_sys::CUcontext = std::mem::zeroed();
536-
cu_check!(cuda_sys::cuCtxCreate_v2(
546+
let mut context: rdmaxcel_sys::CUcontext = std::mem::zeroed();
547+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxCreate_v2(
537548
&mut context,
538549
0,
539550
self.device_id as i32
540551
));
541-
cu_check!(cuda_sys::cuCtxSetCurrent(context));
552+
cu_check!(rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(context));
542553
cuda_sys::cudaDeviceSynchronize();
543-
cu_check!(cuda_sys::cuMemcpyDtoH_v2(
554+
cu_check!(rdmaxcel_sys::rdmaxcel_cuMemcpyDtoH_v2(
544555
self.cpu_buffer.as_mut_ptr() as *mut std::ffi::c_void,
545-
self.cu_ptr as cuda_sys::CUdeviceptr,
556+
self.cu_ptr as rdmaxcel_sys::CUdeviceptr,
546557
self.cpu_buffer.len(),
547558
));
548559
}

monarch_rdma/src/macros.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
#[macro_export]
1010
macro_rules! cu_check {
1111
($result:expr) => {
12-
if $result != cuda_sys::CUresult::CUDA_SUCCESS {
12+
if $result != rdmaxcel_sys::CUDA_SUCCESS {
1313
let mut error_string: *const std::os::raw::c_char = std::ptr::null();
14-
cuda_sys::cuGetErrorString($result, &mut error_string);
14+
rdmaxcel_sys::rdmaxcel_cuGetErrorString($result, &mut error_string);
1515
panic!(
1616
"cuda failure {}:{} {:?} '{}'",
1717
file!(),

monarch_rdma/src/rdma_manager_actor.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -366,13 +366,13 @@ impl RdmaManagerActor {
366366
) -> Result<(RdmaMemoryRegionView, String), anyhow::Error> {
367367
unsafe {
368368
let mut mem_type: i32 = 0;
369-
let ptr = addr as cuda_sys::CUdeviceptr;
370-
let err = cuda_sys::cuPointerGetAttribute(
369+
let ptr = addr as rdmaxcel_sys::CUdeviceptr;
370+
let err = rdmaxcel_sys::rdmaxcel_cuPointerGetAttribute(
371371
&mut mem_type as *mut _ as *mut std::ffi::c_void,
372-
cuda_sys::CUpointer_attribute_enum::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
372+
rdmaxcel_sys::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
373373
ptr,
374374
);
375-
let is_cuda = err == cuda_sys::CUresult::CUDA_SUCCESS;
375+
let is_cuda = err == rdmaxcel_sys::CUDA_SUCCESS;
376376

377377
let mut selected_rdma_device = None;
378378

@@ -457,11 +457,11 @@ impl RdmaManagerActor {
457457
mrv = maybe_mrv.unwrap();
458458
} else if is_cuda {
459459
let mut fd: i32 = -1;
460-
cuda_sys::cuMemGetHandleForAddressRange(
461-
&mut fd as *mut i32 as *mut std::ffi::c_void,
462-
addr as cuda_sys::CUdeviceptr,
460+
rdmaxcel_sys::rdmaxcel_cuMemGetHandleForAddressRange(
461+
&mut fd,
462+
addr as rdmaxcel_sys::CUdeviceptr,
463463
size,
464-
cuda_sys::CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
464+
rdmaxcel_sys::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
465465
0,
466466
);
467467
mr = rdmaxcel_sys::ibv_reg_dmabuf_mr(domain_pd, 0, size, 0, fd, access.0 as i32);

0 commit comments

Comments
 (0)