Skip to content

Commit c37f1c6

Browse files
committed
Add external resource interoperability
Signed-off-by: Dario Mylonopoulos <ramenguy99@gmail.com>
1 parent f3814e7 commit c37f1c6

File tree

5 files changed

+254
-0
lines changed

5 files changed

+254
-0
lines changed

warp/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
from warp.context import Kernel, Function, Launch
8383
from warp.context import Stream, get_stream, set_stream, wait_stream, synchronize_stream
8484
from warp.context import Event, record_event, wait_event, synchronize_event, get_event_elapsed_time
85+
from warp.context import ExternalMemoryBuffer, ExternalSemaphore, signal_external_semaphore, wait_external_semaphore
8586
from warp.context import RegisteredGLBuffer
8687
from warp.context import is_mempool_supported, is_mempool_enabled, set_mempool_enabled
8788
from warp.context import (

warp/context.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,6 +2669,16 @@ def wait_event(self, event: Event):
26692669
"""
26702670
runtime.core.cuda_stream_wait_event(self.cuda_stream, event.cuda_event)
26712671

2672+
def signal_external_semaphore(self, semaphore: ExternalSemaphore, value: int = 0):
2673+
"""TODO: docs"""
2674+
2675+
runtime.core.cuda_signal_external_semaphore_async(semaphore.device.context, semaphore.external_semaphore, value, self.cuda_stream)
2676+
2677+
def wait_external_semaphore(self, semaphore: ExternalSemaphore, value: int = 0):
2678+
"""TODO: docs"""
2679+
2680+
runtime.core.cuda_wait_external_semaphore_async(semaphore.device.context, semaphore.external_semaphore, value, self.cuda_stream)
2681+
26722682
def wait_stream(self, other_stream: "Stream", event: Optional[Event] = None):
26732683
"""Records an event on `other_stream` and makes this stream wait on it.
26742684
@@ -3809,6 +3819,50 @@ def __init__(self):
38093819
self.core.cuda_graphics_unregister_resource.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
38103820
self.core.cuda_graphics_unregister_resource.restype = None
38113821

3822+
self.core.cuda_import_external_memory.argtypes = [
3823+
ctypes.c_void_p,
3824+
ctypes.c_uint,
3825+
ctypes.c_void_p,
3826+
ctypes.c_uint64,
3827+
ctypes.c_uint,
3828+
]
3829+
self.core.cuda_import_external_memory.restype = ctypes.c_void_p
3830+
self.core.cuda_external_memory_get_mapped_buffer.argtypes = [
3831+
ctypes.c_void_p,
3832+
ctypes.c_void_p,
3833+
ctypes.c_uint64,
3834+
ctypes.c_uint64,
3835+
ctypes.c_uint,
3836+
ctypes.POINTER(ctypes.c_uint64),
3837+
]
3838+
self.core.cuda_external_memory_get_mapped_buffer.restype = None
3839+
self.core.cuda_destroy_external_memory.argtypes = [ ctypes.c_void_p, ctypes.c_void_p ]
3840+
self.core.cuda_destroy_external_memory.restype = None
3841+
3842+
self.core.cuda_import_external_semaphore.argtypes = [
3843+
ctypes.c_void_p,
3844+
ctypes.c_uint,
3845+
ctypes.c_void_p,
3846+
ctypes.c_uint,
3847+
]
3848+
self.core.cuda_import_external_semaphore.restype = ctypes.c_void_p
3849+
self.core.cuda_destroy_external_semaphore.argtypes = [ ctypes.c_void_p, ctypes.c_void_p ]
3850+
self.core.cuda_destroy_external_semaphore.restype = None
3851+
self.core.cuda_signal_external_semaphore_async.argtypes = [
3852+
ctypes.c_void_p,
3853+
ctypes.c_void_p,
3854+
ctypes.c_uint64,
3855+
ctypes.c_void_p,
3856+
]
3857+
self.core.cuda_signal_external_semaphore_async.restype = None
3858+
self.core.cuda_wait_external_semaphore_async.argtypes = [
3859+
ctypes.c_void_p,
3860+
ctypes.c_void_p,
3861+
ctypes.c_uint64,
3862+
ctypes.c_void_p,
3863+
]
3864+
self.core.cuda_wait_external_semaphore_async.restype = None
3865+
38123866
self.core.cuda_timing_begin.argtypes = [ctypes.c_int]
38133867
self.core.cuda_timing_begin.restype = None
38143868
self.core.cuda_timing_get_result_count.argtypes = []
@@ -4732,6 +4786,50 @@ def wait_event(event: Event):
47324786
get_stream().wait_event(event)
47334787

47344788

4789+
class ExternalSemaphore:
4790+
"""TODO: docs"""
4791+
4792+
HANDLE_TYPE_OPAQUEFD = 1
4793+
HANDLE_TYPE_OPAQUEWIN32 = 2
4794+
HANDLE_TYPE_OPAQUEWIN32KMT = 3
4795+
HANDLE_TYPE_D3D12HEAP = 4
4796+
HANDLE_TYPE_D3D12RESOURCE = 5
4797+
HANDLE_TYPE_D3D11RESOURCE = 6
4798+
HANDLE_TYPE_D3D11RESOURCEKMT = 7
4799+
HANDLE_TYPE_NVSCIBUF = 8
4800+
4801+
def __init__(self, handle: Union[ctypes.c_void_p, int], handle_type: int, flags: int = 0, device: Devicelike = None):
4802+
"""TODO: docs"""
4803+
4804+
self.device = get_device(device)
4805+
self.context = self.device.context
4806+
self.external_semaphore = runtime.core.cuda_import_external_semaphore(self.context, handle_type, handle, flags)
4807+
if self.external_semaphore is None:
4808+
raise RuntimeError(f"Failed to import external semaphore {handle} with CUDA")
4809+
4810+
def __del__(self):
4811+
"""TODO: docs"""
4812+
4813+
if not self.external_semaphore:
4814+
return
4815+
4816+
# use CUDA context guard to avoid side effects during garbage collection
4817+
with self.device.context_guard:
4818+
runtime.core.cuda_destroy_external_semaphore(self.context, self.external_semaphore)
4819+
4820+
4821+
def signal_external_semaphore(semaphore: ExternalSemaphore, value: int = 0):
4822+
"""TODO: docs"""
4823+
4824+
return get_stream().signal_external_semaphore(semaphore, value)
4825+
4826+
4827+
def wait_external_semaphore(semaphore: ExternalSemaphore, value: int = 0):
4828+
"""TODO: docs"""
4829+
4830+
get_stream().wait_external_semaphore(semaphore, value)
4831+
4832+
47354833
def get_event_elapsed_time(start_event: Event, end_event: Event, synchronize: bool = True):
47364834
"""Get the elapsed time between two recorded events.
47374835
@@ -4772,6 +4870,48 @@ def wait_stream(other_stream: Stream, event: Optional[Event] = None):
47724870
get_stream().wait_stream(other_stream, event=event)
47734871

47744872

4873+
class ExternalMemoryBuffer:
4874+
"""TODO: docs"""
4875+
4876+
HANDLE_TYPE_OPAQUEFD = 1
4877+
HANDLE_TYPE_OPAQUEWIN32 = 2
4878+
HANDLE_TYPE_OPAQUEWIN32KMT = 3
4879+
HANDLE_TYPE_D3D12HEAP = 4
4880+
HANDLE_TYPE_D3D12RESOURCE = 5
4881+
HANDLE_TYPE_D3D11RESOURCE = 6
4882+
HANDLE_TYPE_D3D11RESOURCEKMT = 7
4883+
HANDLE_TYPE_NVSCIBUF = 8
4884+
4885+
FLAG_DEDICATED = 1
4886+
4887+
def __init__(self, handle: Union[ctypes.c_void_p, int], handle_type: int, size: int, flags: int = 0, device: Devicelike = None):
4888+
"""TODO: docs"""
4889+
4890+
self.device = get_device(device)
4891+
self.context = self.device.context
4892+
self.external_memory = runtime.core.cuda_import_external_memory(self.context, handle_type, handle, size, flags)
4893+
self.size = size
4894+
if self.external_memory is None:
4895+
raise RuntimeError(f"Failed to import external memory {handle} with CUDA")
4896+
4897+
def map(self, dtype: type, shape: Sequence[int]) -> warp.array:
4898+
"""TODO: docs"""
4899+
4900+
ptr = ctypes.c_uint64(0)
4901+
runtime.core.cuda_external_memory_get_mapped_buffer(self.context, self.external_memory, 0, self.size, 0, ctypes.byref(ptr))
4902+
return warp.array(ptr=ptr.value, dtype=dtype, shape=shape, device=self.device, capacity=self.size)
4903+
4904+
def __del__(self):
4905+
"""TODO: docs"""
4906+
4907+
if not self.external_memory:
4908+
return
4909+
4910+
# use CUDA context guard to avoid side effects during garbage collection
4911+
with self.device.context_guard:
4912+
runtime.core.cuda_destroy_external_memory(self.context, self.external_memory)
4913+
4914+
47754915
class RegisteredGLBuffer:
47764916
"""
47774917
Helper class to register a GL buffer with CUDA so that it can be mapped to a Warp array.

warp/native/warp.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,15 @@ WP_API void cuda_graphics_device_ptr_and_size(void* context, void* resource, uin
10761076
WP_API void* cuda_graphics_register_gl_buffer(void* context, uint32_t gl_buffer, unsigned int flags) { return NULL; }
10771077
WP_API void cuda_graphics_unregister_resource(void* context, void* resource) {}
10781078

1079+
WP_API void* cuda_import_external_memory(void* context, unsigned int type, void* handle, uint64_t size, unsigned int flags) { return NULL}
1080+
WP_API void cuda_destroy_external_memory(void* context, void* external_memory) {}
1081+
WP_API void cuda_external_memory_get_mapped_buffer(void* context, void* external_memory, uint64_t offset, uint64_t size, unsigned int flags, uint64_t* ptr) {}
1082+
1083+
WP_API void* cuda_import_external_semaphore(void* context, unsigned int type, void* handle, unsigned int flags) { return NULL }
1084+
WP_API void cuda_destroy_external_semaphore(void* context, void* external_semaphore) {}
1085+
WP_API void cuda_signal_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream) {}
1086+
WP_API void cuda_wait_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream) {}
1087+
10791088
WP_API void cuda_timing_begin(int flags) {}
10801089
WP_API int cuda_timing_get_result_count() { return 0; }
10811090
WP_API void cuda_timing_end(timing_result_t* results, int size) {}

warp/native/warp.cu

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3584,6 +3584,100 @@ void cuda_graphics_unregister_resource(void* context, void* resource)
35843584
delete res;
35853585
}
35863586

3587+
void* cuda_import_external_memory(void* context, unsigned int type, void* handle, uint64_t size, unsigned int flags)
3588+
{
3589+
ContextGuard guard(context);
3590+
3591+
cudaExternalMemory_t *external_memory = new cudaExternalMemory_t;
3592+
cudaExternalMemoryHandleDesc desc = {};
3593+
desc.type = (cudaExternalMemoryHandleType)type;
3594+
desc.handle.win32.handle = handle;
3595+
desc.size = size;
3596+
desc.flags = flags;
3597+
3598+
bool success = check_cuda(cudaImportExternalMemory(external_memory, &desc));
3599+
if (!success)
3600+
{
3601+
delete external_memory;
3602+
return NULL;
3603+
}
3604+
3605+
return external_memory;
3606+
}
3607+
3608+
void cuda_external_memory_get_mapped_buffer(void* context, void* external_memory, uint64_t offset, uint64_t size, unsigned int flags, uint64_t* ptr)
3609+
{
3610+
ContextGuard guard(context);
3611+
3612+
cudaExternalMemory_t *memory = (cudaExternalMemory_t*)external_memory;
3613+
cudaExternalMemoryBufferDesc desc = {};
3614+
desc.offset = offset;
3615+
desc.size = size;
3616+
desc.flags = flags;
3617+
3618+
void* device_ptr;
3619+
check_cuda(cudaExternalMemoryGetMappedBuffer(&device_ptr, *memory, &desc));
3620+
*ptr = (uint64_t)device_ptr;
3621+
}
3622+
3623+
void cuda_destroy_external_memory(void* context, void* external_memory)
3624+
{
3625+
ContextGuard guard(context);
3626+
3627+
cudaExternalMemory_t *memory = (cudaExternalMemory_t*)external_memory;
3628+
check_cuda(cudaDestroyExternalMemory(*memory));
3629+
delete memory;
3630+
}
3631+
3632+
void* cuda_import_external_semaphore(void* context, unsigned int type, void* handle, unsigned int flags)
3633+
{
3634+
ContextGuard guard(context);
3635+
3636+
cudaExternalSemaphore_t *external_semaphore = new cudaExternalSemaphore_t;
3637+
cudaExternalSemaphoreHandleDesc desc = {};
3638+
desc.type = (cudaExternalSemaphoreHandleType)type;
3639+
desc.handle.win32.handle = handle;
3640+
desc.flags = flags;
3641+
3642+
bool success = check_cuda(cudaImportExternalSemaphore(external_semaphore, &desc));
3643+
if (!success)
3644+
{
3645+
delete external_semaphore;
3646+
return NULL;
3647+
}
3648+
3649+
return external_semaphore;
3650+
}
3651+
3652+
void cuda_destroy_external_semaphore(void* context, void* external_semaphore)
3653+
{
3654+
ContextGuard guard(context);
3655+
3656+
cudaExternalSemaphore_t *semaphore = (cudaExternalSemaphore_t*)external_semaphore;
3657+
check_cuda(cudaDestroyExternalSemaphore(*semaphore));
3658+
delete semaphore;
3659+
}
3660+
3661+
void cuda_signal_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream)
3662+
{
3663+
ContextGuard guard(context);
3664+
3665+
cudaExternalSemaphore_t *semaphore = (cudaExternalSemaphore_t*)external_semaphore;
3666+
cudaExternalSemaphoreSignalParams params = {};
3667+
params.params.fence.value = value;
3668+
check_cuda(cudaSignalExternalSemaphoresAsync(semaphore, &params, 1, static_cast<CUstream>(stream)));
3669+
}
3670+
3671+
void cuda_wait_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream)
3672+
{
3673+
ContextGuard guard(context);
3674+
3675+
cudaExternalSemaphore_t *semaphore = (cudaExternalSemaphore_t*)external_semaphore;
3676+
cudaExternalSemaphoreWaitParams params = {};
3677+
params.params.fence.value = value;
3678+
check_cuda(cudaWaitExternalSemaphoresAsync(semaphore, &params, 1, static_cast<CUstream>(stream)));
3679+
}
3680+
35873681
void cuda_timing_begin(int flags)
35883682
{
35893683
g_cuda_timing_state = new CudaTimingState(flags, g_cuda_timing_state);

warp/native/warp.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,16 @@ extern "C"
370370
WP_API void* cuda_graphics_register_gl_buffer(void* context, uint32_t gl_buffer, unsigned int flags);
371371
WP_API void cuda_graphics_unregister_resource(void* context, void* resource);
372372

373+
// external resource interoperability
374+
WP_API void* cuda_import_external_memory(void* context, unsigned int type, void* handle, uint64_t size, unsigned int flags);
375+
WP_API void cuda_destroy_external_memory(void* context, void* external_memory);
376+
WP_API void cuda_external_memory_get_mapped_buffer(void* context, void* external_memory, uint64_t offset, uint64_t size, unsigned int flags, uint64_t* ptr);
377+
378+
WP_API void* cuda_import_external_semaphore(void* context, unsigned int type, void* handle, unsigned int flags);
379+
WP_API void cuda_destroy_external_semaphore(void* context, void* external_semaphore);
380+
WP_API void cuda_signal_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream);
381+
WP_API void cuda_wait_external_semaphore_async(void* context, void* external_semaphore, uint64_t value, void* stream);
382+
373383
// CUDA timing
374384
WP_API void cuda_timing_begin(int flags);
375385
WP_API int cuda_timing_get_result_count();

0 commit comments

Comments
 (0)