@@ -128,8 +128,8 @@ def ready_argument_list(self, arguments):
128128 The order should match the argument list on the HIP function.
129129 Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.
130130 :type arguments: list(numpy objects)
131- :returns: A ctypes structure that can be passed to the HIP function.
132- :rtype: ctypes.Structure
131+ :returns: List of ctypes arguments to be passed to the HIP function.
132+ :rtype: list of ctypes
133133 """
134134 logging .debug ("HipFunction ready_argument_list called" )
135135
@@ -151,13 +151,7 @@ def ready_argument_list(self, arguments):
151151 data_ctypes = dtype_map [dtype_str ](arg )
152152 ctype_args .append (data_ctypes )
153153
154- # Determine the types of the fields in the structure
155- field_types = [type (x ) for x in ctype_args ]
156- # Define a new ctypes structure with the inferred layout
157- class ArgListStructure (ctypes .Structure ):
158- _fields_ = [(f'field{ i } ' , t ) for i , t in enumerate (field_types )]
159-
160- return ArgListStructure (* ctype_args )
154+ return ctype_args
161155
162156
163157 def compile (self , kernel_instance ):
@@ -228,7 +222,7 @@ def run_kernel(self, func, gpu_args, threads, grid, stream=None):
228222 :param gpu_args: A list of arguments to the kernel, order should match the
229223 order in the code. Allowed values are either variables in global memory
230224 or single values passed by value.
231- :type gpu_args: ctypes.Structure
225+ :type gpu_args: list of ctypes
232226
233227 :param threads: A tuple listing the number of threads in each dimension of
234228 the thread block
@@ -241,6 +235,15 @@ def run_kernel(self, func, gpu_args, threads, grid, stream=None):
241235 logging .debug ("HipFunction run_kernel called" )
242236 if stream is None :
243237 stream = self .stream
238+
239+ # Determine the types of the fields in the structure
240+ field_types = [type (x ) for x in gpu_args ]
241+ # Define a new ctypes structure with the inferred layout
242+ class ArgListStructure (ctypes .Structure ):
243+ _fields_ = [(f'field{ i } ' , t ) for i , t in enumerate (field_types )]
244+
245+ gpu_args = ArgListStructure (* gpu_args )
246+
244247 hip .hipModuleLaunchKernel (func ,
245248 grid [0 ], grid [1 ], grid [2 ],
246249 threads [0 ], threads [1 ], threads [2 ],
@@ -261,6 +264,8 @@ def memset(self, allocation, value, size):
261264 :type size: int
262265
263266 """
267+ logging .debug ("HipFunction memset called" )
268+ print ("HipFunction memset called" )
264269 ctypes_value = ctypes .c_int (value )
265270 ctypes_size = ctypes .c_size_t (size )
266271 status = _libhip .hipMemset (allocation , ctypes_value , ctypes_size )
@@ -276,8 +281,10 @@ def memcpy_dtoh(self, dest, src):
276281 :type src: ctypes ptr
277282 """
278283 logging .debug ("HipFunction memcpy_dtoh called" )
279- dtype_str = str (src .dtype )
280- hip .hipMemcpy_dtoh (ctypes .byref (dest .ctypes ), src , ctypes .sizeof (dtype_map [dtype_str ]) * src .size )
284+ print ("HipFunction memcpy_dtoh called" )
285+
286+ address = dest .ctypes .data
287+ hip .hipMemcpy_dtoh (ctypes .c_void_p (address ), src , dest .nbytes )
281288
282289 def memcpy_htod (self , dest , src ):
283290 """perform a host to device memory copy
@@ -289,6 +296,7 @@ def memcpy_htod(self, dest, src):
289296 :type src: numpy.ndarray
290297 """
291298 logging .debug ("HipFunction memcpy_htod called" )
299+ print ("HipFunction memcpy_htod called" )
292300 dtype_str = str (src .dtype )
293301 hip .hipMemcpy_htod (dest , ctypes .byref (src .ctypes ), ctypes .sizeof (dtype_map [dtype_str ]) * src .size )
294302
@@ -303,6 +311,7 @@ def copy_constant_memory_args(self, cmem_args):
303311 :type cmem_args: dict( string: numpy.ndarray, ... )
304312 """
305313 logging .debug ("HipFunction copy_constant_memory_args called" )
314+ print ("HipFunction copy_constant_memory_args called" )
306315 logging .debug ("current module: " + str (self .current_module ))
307316
308317 for k , v in cmem_args .items ():
@@ -316,11 +325,13 @@ def copy_constant_memory_args(self, cmem_args):
316325 def copy_shared_memory_args (self , smem_args ):
317326 """add shared memory arguments to the kernel"""
318327 logging .debug ("HipFunction copy_shared_memory_args called" )
328+ print ("HipFunction copy_shared_memory_args called" )
319329 self .smem_size = smem_args ["size" ]
320330
321331 def copy_texture_memory_args (self , texmem_args ):
322332 """This method must implement the allocation and copy of texture memory to the GPU."""
323333 logging .debug ("HipFunction copy_texture_memory_args called" )
334+ print ("HipFunction copy_texture_memory_args called" )
324335 raise NotImplementedError ("HIP backend does not support texture memory" ) # NOT SUPPORTED?
325336
326337 units = {"time" : "ms" }
0 commit comments