@@ -2669,6 +2669,16 @@ def wait_event(self, event: Event):
26692669 """
26702670 runtime .core .cuda_stream_wait_event (self .cuda_stream , event .cuda_event )
26712671
2672+ def signal_external_semaphore (self , semaphore : ExternalSemaphore , value : int = 0 ):
2673+ """TODO: docs"""
2674+
2675+ runtime .core .cuda_signal_external_semaphore_async (semaphore .device .context , semaphore .external_semaphore , value , self .cuda_stream )
2676+
2677+ def wait_external_semaphore (self , semaphore : ExternalSemaphore , value : int = 0 ):
2678+ """TODO: docs"""
2679+
2680+ runtime .core .cuda_wait_external_semaphore_async (semaphore .device .context , semaphore .external_semaphore , value , self .cuda_stream )
2681+
26722682 def wait_stream (self , other_stream : "Stream" , event : Optional [Event ] = None ):
26732683 """Records an event on `other_stream` and makes this stream wait on it.
26742684
@@ -3809,6 +3819,50 @@ def __init__(self):
38093819 self .core .cuda_graphics_unregister_resource .argtypes = [ctypes .c_void_p , ctypes .c_void_p ]
38103820 self .core .cuda_graphics_unregister_resource .restype = None
38113821
3822+ self .core .cuda_import_external_memory .argtypes = [
3823+ ctypes .c_void_p ,
3824+ ctypes .c_uint ,
3825+ ctypes .c_void_p ,
3826+ ctypes .c_uint64 ,
3827+ ctypes .c_uint ,
3828+ ]
3829+ self .core .cuda_import_external_memory .restype = ctypes .c_void_p
3830+ self .core .cuda_external_memory_get_mapped_buffer .argtypes = [
3831+ ctypes .c_void_p ,
3832+ ctypes .c_void_p ,
3833+ ctypes .c_uint64 ,
3834+ ctypes .c_uint64 ,
3835+ ctypes .c_uint ,
3836+ ctypes .POINTER (ctypes .c_uint64 ),
3837+ ]
3838+ self .core .cuda_external_memory_get_mapped_buffer .restype = None
3839+ self .core .cuda_destroy_external_memory .argtypes = [ ctypes .c_void_p , ctypes .c_void_p ]
3840+ self .core .cuda_destroy_external_memory .restype = None
3841+
3842+ self .core .cuda_import_external_semaphore .argtypes = [
3843+ ctypes .c_void_p ,
3844+ ctypes .c_uint ,
3845+ ctypes .c_void_p ,
3846+ ctypes .c_uint ,
3847+ ]
3848+ self .core .cuda_import_external_semaphore .restype = ctypes .c_void_p
3849+ self .core .cuda_destroy_external_semaphore .argtypes = [ ctypes .c_void_p , ctypes .c_void_p ]
3850+ self .core .cuda_destroy_external_semaphore .restype = None
3851+ self .core .cuda_signal_external_semaphore_async .argtypes = [
3852+ ctypes .c_void_p ,
3853+ ctypes .c_void_p ,
3854+ ctypes .c_uint64 ,
3855+ ctypes .c_void_p ,
3856+ ]
3857+ self .core .cuda_signal_external_semaphore_async .restype = None
3858+ self .core .cuda_wait_external_semaphore_async .argtypes = [
3859+ ctypes .c_void_p ,
3860+ ctypes .c_void_p ,
3861+ ctypes .c_uint64 ,
3862+ ctypes .c_void_p ,
3863+ ]
3864+ self .core .cuda_wait_external_semaphore_async .restype = None
3865+
38123866 self .core .cuda_timing_begin .argtypes = [ctypes .c_int ]
38133867 self .core .cuda_timing_begin .restype = None
38143868 self .core .cuda_timing_get_result_count .argtypes = []
@@ -4732,6 +4786,50 @@ def wait_event(event: Event):
47324786 get_stream ().wait_event (event )
47334787
47344788
4789+ class ExternalSemaphore :
4790+ """TODO: docs"""
4791+
4792+ HANDLE_TYPE_OPAQUEFD = 1
4793+ HANDLE_TYPE_OPAQUEWIN32 = 2
4794+ HANDLE_TYPE_OPAQUEWIN32KMT = 3
4795+ HANDLE_TYPE_D3D12HEAP = 4
4796+ HANDLE_TYPE_D3D12RESOURCE = 5
4797+ HANDLE_TYPE_D3D11RESOURCE = 6
4798+ HANDLE_TYPE_D3D11RESOURCEKMT = 7
4799+ HANDLE_TYPE_NVSCIBUF = 8
4800+
4801+ def __init__ (self , handle : Union [ctypes .c_void_p , int ], handle_type : int , flags : int = 0 , device : Devicelike = None ):
4802+ """TODO: docs"""
4803+
4804+ self .device = get_device (device )
4805+ self .context = self .device .context
4806+ self .external_semaphore = runtime .core .cuda_import_external_semaphore (self .context , handle_type , handle , flags )
4807+ if self .external_semaphore is None :
4808+ raise RuntimeError (f"Failed to import external semaphore { handle } with CUDA" )
4809+
4810+ def __del__ (self ):
4811+ """TODO: docs"""
4812+
4813+ if not self .external_semaphore :
4814+ return
4815+
4816+ # use CUDA context guard to avoid side effects during garbage collection
4817+ with self .device .context_guard :
4818+ runtime .core .cuda_destroy_external_semaphore (self .context , self .external_semaphore )
4819+
4820+
4821+ def signal_external_semaphore (semaphore : ExternalSemaphore , value : int = 0 ):
4822+ """TODO: docs"""
4823+
4824+ return get_stream ().signal_external_semaphore (semaphore , value )
4825+
4826+
4827+ def wait_external_semaphore (semaphore : ExternalSemaphore , value : int = 0 ):
4828+ """TODO: docs"""
4829+
4830+ get_stream ().wait_external_semaphore (semaphore , value )
4831+
4832+
47354833def get_event_elapsed_time (start_event : Event , end_event : Event , synchronize : bool = True ):
47364834 """Get the elapsed time between two recorded events.
47374835
@@ -4772,6 +4870,48 @@ def wait_stream(other_stream: Stream, event: Optional[Event] = None):
47724870 get_stream ().wait_stream (other_stream , event = event )
47734871
47744872
4873+ class ExternalMemoryBuffer :
4874+ """TODO: docs"""
4875+
4876+ HANDLE_TYPE_OPAQUEFD = 1
4877+ HANDLE_TYPE_OPAQUEWIN32 = 2
4878+ HANDLE_TYPE_OPAQUEWIN32KMT = 3
4879+ HANDLE_TYPE_D3D12HEAP = 4
4880+ HANDLE_TYPE_D3D12RESOURCE = 5
4881+ HANDLE_TYPE_D3D11RESOURCE = 6
4882+ HANDLE_TYPE_D3D11RESOURCEKMT = 7
4883+ HANDLE_TYPE_NVSCIBUF = 8
4884+
4885+ FLAG_DEDICATED = 1
4886+
4887+ def __init__ (self , handle : Union [ctypes .c_void_p , int ], handle_type : int , size : int , flags : int = 0 , device : Devicelike = None ):
4888+ """TODO: docs"""
4889+
4890+ self .device = get_device (device )
4891+ self .context = self .device .context
4892+ self .external_memory = runtime .core .cuda_import_external_memory (self .context , handle_type , handle , size , flags )
4893+ self .size = size
4894+ if self .external_memory is None :
4895+ raise RuntimeError (f"Failed to import external memory { handle } with CUDA" )
4896+
4897+ def map (self , dtype : type , shape : Sequence [int ]) -> warp .array :
4898+ """TODO: docs"""
4899+
4900+ ptr = ctypes .c_uint64 (0 )
4901+ runtime .core .cuda_external_memory_get_mapped_buffer (self .context , self .external_memory , 0 , self .size , 0 , ctypes .byref (ptr ))
4902+ return warp .array (ptr = ptr .value , dtype = dtype , shape = shape , device = self .device , capacity = self .size )
4903+
4904+ def __del__ (self ):
4905+ """TODO: docs"""
4906+
4907+ if not self .external_memory :
4908+ return
4909+
4910+ # use CUDA context guard to avoid side effects during garbage collection
4911+ with self .device .context_guard :
4912+ runtime .core .cuda_destroy_external_memory (self .context , self .external_memory )
4913+
4914+
47754915class RegisteredGLBuffer :
47764916 """
47774917 Helper class to register a GL buffer with CUDA so that it can be mapped to a Warp array.
0 commit comments