33import numpy as np
44import ctypes
55import ctypes .util
6- from collections import namedtuple
7- import os
86import sys
97import logging
108
2927 raise RuntimeError (
3028 'cant find libamdhip64.so or libnvhip64.so. make sure LD_LIBRARY_PATH is set' )
3129
32- _libhiprtc_libname = 'libhiprtc.so'
33- _libhiprtc = None
34- try :
35- _libhiprtc = ctypes .cdll .LoadLibrary (_libhiprtc_libname )
36- except :
37- raise OSError ('hiprtc library not found' )
38-
3930else :
4031 # Currently we do not support windows
4132 raise RuntimeError ('Only linux is supported' )
7162_libhip .hipModuleGetGlobal .argtypes = [ctypes .POINTER (ctypes .c_void_p ), ctypes .POINTER (ctypes .c_size_t ), ctypes .c_void_p , ctypes .c_char_p ]
7263_libhip .hipMemset .restype = ctypes .c_int
7364_libhip .hipMemset .argtypes = [ctypes .c_void_p , ctypes .c_int , ctypes .c_size_t ]
74- _libhip .hipMemcpyToSymbol .restype = ctypes .c_int
75- _libhip .hipMemcpyToSymbol .argtypes = [ctypes .c_void_p , ctypes .c_void_p , ctypes .c_size_t , ctypes .c_size_t , ctypes .c_int ]
76-
7765
7866hipSuccess = 0
7967
@@ -100,21 +88,25 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
10088
10189 self .name = self .hipProps ._name .decode ('utf-8' )
10290 self .max_threads = self .hipProps .maxThreadsPerBlock
103- logging .debug ("self.max_threads: " + str (self .max_threads ))
104-
10591 self .device = device
10692 self .compiler_options = compiler_options
93+ self .iterations = iterations
10794
10895 env = dict ()
10996 env ["device_name" ] = self .name
97+ env ["iterations" ] = self .iterations
98+ env ["compiler_options" ] = compiler_options
99+ env ["device_properties" ] = self .hipProps
110100 self .env = env
111101
112102 # create a stream and events
113103 self .stream = hip .hipStreamCreate ()
114104 self .start = hip .hipEventCreate ()
115105 self .end = hip .hipEventCreate ()
116106
107+ # default dynamically allocated shared memory size, can be overwritten using smem_args
117108 self .smem_size = 0
109+
118110 self .current_module = None
119111
120112 # setup observers
@@ -126,10 +118,12 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
126118
127119 def ready_argument_list (self , arguments ):
128120 """ready argument list to be passed to the HIP function
121+
129122 :param arguments: List of arguments to be passed to the HIP function.
130123 The order should match the argument list on the HIP function.
131124 Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.
132125 :type arguments: list(numpy objects)
126+
133127 :returns: Ctypes structure of arguments to be passed to the HIP function.
134128 :rtype: ctypes structure
135129 """
@@ -175,21 +169,23 @@ def compile(self, kernel_instance):
175169 :rtype: ctypes._FuncPtr
176170 """
177171 logging .debug ("HipFunction compile called" )
172+
173+ #Format and create program
178174 kernel_string = kernel_instance .kernel_string
179175 kernel_name = kernel_instance .name
180-
181176 if 'extern "C"' not in kernel_string :
182177 kernel_string = 'extern "C" {\n ' + kernel_string + "\n }"
183-
184178 kernel_ptr = hiprtc .hiprtcCreateProgram (kernel_string , kernel_name , [], [])
185179
180+ #Compile based on device (Not yet tested for non-AMD devices)
186181 plat = hip .hipGetPlatformName ()
187- #Compile based on device
188182 if plat == "amd" :
189183 hiprtc .hiprtcCompileProgram (
190184 kernel_ptr , [f'--offload-arch={ self .hipProps .gcnArchName } ' ])
191185 else :
192186 hiprtc .hiprtcCompileProgram (kernel_ptr , [])
187+
188+ #Get module and kernel from compiled kernel string
193189 code = hiprtc .hiprtcGetCode (kernel_ptr )
194190 module = hip .hipModuleLoadData (code )
195191 self .current_module = module
@@ -200,11 +196,13 @@ def compile(self, kernel_instance):
200196 def start_event (self ):
201197 """Records the event that marks the start of a measurement"""
202198 logging .debug ("HipFunction start_event called" )
199+
203200 hip .hipEventRecord (self .start , self .stream )
204201
205202 def stop_event (self ):
206203 """Records the event that marks the end of a measurement"""
207204 logging .debug ("HipFunction stop_event called" )
205+
208206 hip .hipEventRecord (self .end , self .stream )
209207
210208 def kernel_finished (self ):
@@ -221,12 +219,13 @@ def kernel_finished(self):
221219 def synchronize (self ):
222220 """Halts execution until device has finished its tasks"""
223221 logging .debug ("HipFunction synchronize called" )
222+
224223 hip .hipDeviceSynchronize ()
225224
226225 def run_kernel (self , func , gpu_args , threads , grid , stream = None ):
227226 """runs the HIP kernel passed as 'func'
228227
229- :param func: A PyHIP kernel compiled for this specific kernel configuration
228+ :param func: A HIP kernel compiled for this specific kernel configuration
230229 :type func: ctypes pionter
231230
232231 :param gpu_args: A ctypes structure of arguments to the kernel, order should match the
@@ -269,6 +268,8 @@ def memset(self, allocation, value, size):
269268 """
270269 logging .debug ("HipFunction memset called" )
271270
271+ # Format arguments to correct type, set the memory and
272+ # check return value of memset (as done in PyHIP with hipCheckStatus)
272273 ctypes_value = ctypes .c_int (value )
273274 ctypes_size = ctypes .c_size_t (size )
274275 status = _libhip .hipMemset (allocation , ctypes_value , ctypes_size )
@@ -285,6 +286,7 @@ def memcpy_dtoh(self, dest, src):
285286 """
286287 logging .debug ("HipFunction memcpy_dtoh called" )
287288
289+ # Format arguments to correct type and perform memory copy
288290 dtype_str = str (dest .dtype )
289291 dest_c = dest .ctypes .data_as (ctypes .POINTER (dtype_map [dtype_str ]))
290292 hip .hipMemcpy_dtoh (dest_c , src , dest .nbytes )
@@ -300,6 +302,7 @@ def memcpy_htod(self, dest, src):
300302 """
301303 logging .debug ("HipFunction memcpy_htod called" )
302304
305+ # Format arguments to correct type and perform memory copy
303306 dtype_str = str (src .dtype )
304307 src_c = src .ctypes .data_as (ctypes .POINTER (dtype_map [dtype_str ]))
305308 hip .hipMemcpy_htod (dest , src_c , src .nbytes )
@@ -316,21 +319,24 @@ def copy_constant_memory_args(self, cmem_args):
316319 """
317320 logging .debug ("HipFunction copy_constant_memory_args called" )
318321
322+ # Iterate over dictionary
319323 for k , v in cmem_args .items ():
320- #Format arguments, call hipModuleGetGlobal, and check return status
324+ # Format arguments, call hipModuleGetGlobal,
325+ # and check return status (as done in PyHIP with hipCheckStatus)
321326 symbol_string = ctypes .c_char_p (k .encode ('utf-8' ))
322327 symbol = ctypes .c_void_p ()
323328 symbol_ptr = ctypes .POINTER (ctypes .c_void_p )(symbol )
324329 size_kernel = ctypes .c_size_t (0 )
325330
331+ # Get constant memory symbol and check return value of hipModuleGetGlobal
332+ # (as done in PyHIP with hipCheckStatus)
326333 size_kernel_ptr = ctypes .POINTER (ctypes .c_size_t )(size_kernel )
327334 status = _libhip .hipModuleGetGlobal (symbol_ptr , size_kernel_ptr , self .current_module , symbol_string )
328335 hip .hipCheckStatus (status )
329336
330- #Format arguments and call hipMemcpy_htod
337+ #Format arguments and perform memory copy
331338 dtype_str = str (v .dtype )
332339 v_c = v .ctypes .data_as (ctypes .POINTER (dtype_map [dtype_str ]))
333-
334340 hip .hipMemcpy_htod (symbol_ptr .contents , v_c , v .nbytes )
335341
336342 def copy_shared_memory_args (self , smem_args ):
@@ -340,9 +346,8 @@ def copy_shared_memory_args(self, smem_args):
340346 self .smem_size = smem_args ["size" ]
341347
342348 def copy_texture_memory_args (self , texmem_args ):
343- """This method must implement the allocation and copy of texture memory to the GPU."""
344349 logging .debug ("HipFunction copy_texture_memory_args called" )
345350
346351 raise NotImplementedError ("HIP backend does not support texture memory" )
347352
348- units = {"time" : "ms" }
353+ units = {"time" : "ms" , "power" : "s,mW" , "energy" : "J" }
0 commit comments