Skip to content

Commit 2388be4

Browse files
committed
clean up and added more comments
1 parent 685aed3 commit 2388be4

File tree

1 file changed

+28
-23
lines changed

1 file changed

+28
-23
lines changed

kernel_tuner/backends/hip.py

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import ctypes
55
import ctypes.util
6-
from collections import namedtuple
7-
import os
86
import sys
97
import logging
108

@@ -29,13 +27,6 @@
2927
raise RuntimeError(
3028
'cant find libamdhip64.so or libnvhip64.so. make sure LD_LIBRARY_PATH is set')
3129

32-
_libhiprtc_libname = 'libhiprtc.so'
33-
_libhiprtc = None
34-
try:
35-
_libhiprtc = ctypes.cdll.LoadLibrary(_libhiprtc_libname)
36-
except:
37-
raise OSError('hiprtc library not found')
38-
3930
else:
4031
# Currently we do not support windows
4132
raise RuntimeError('Only linux is supported')
@@ -71,9 +62,6 @@
7162
_libhip.hipModuleGetGlobal.argtypes = [ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t), ctypes.c_void_p, ctypes.c_char_p]
7263
_libhip.hipMemset.restype = ctypes.c_int
7364
_libhip.hipMemset.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_size_t]
74-
_libhip.hipMemcpyToSymbol.restype = ctypes.c_int
75-
_libhip.hipMemcpyToSymbol.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t, ctypes.c_int]
76-
7765

7866
hipSuccess = 0
7967

@@ -100,21 +88,25 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
10088

10189
self.name = self.hipProps._name.decode('utf-8')
10290
self.max_threads = self.hipProps.maxThreadsPerBlock
103-
logging.debug("self.max_threads: " + str(self.max_threads))
104-
10591
self.device = device
10692
self.compiler_options = compiler_options
93+
self.iterations = iterations
10794

10895
env = dict()
10996
env["device_name"] = self.name
97+
env["iterations"] = self.iterations
98+
env["compiler_options"] = compiler_options
99+
env["device_properties"] = self.hipProps
110100
self.env = env
111101

112102
# create a stream and events
113103
self.stream = hip.hipStreamCreate()
114104
self.start = hip.hipEventCreate()
115105
self.end = hip.hipEventCreate()
116106

107+
# default dynamically allocated shared memory size, can be overwritten using smem_args
117108
self.smem_size = 0
109+
118110
self.current_module = None
119111

120112
# setup observers
@@ -126,10 +118,12 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
126118

127119
def ready_argument_list(self, arguments):
128120
"""ready argument list to be passed to the HIP function
121+
129122
:param arguments: List of arguments to be passed to the HIP function.
130123
The order should match the argument list on the HIP function.
131124
Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.
132125
:type arguments: list(numpy objects)
126+
133127
:returns: Ctypes structure of arguments to be passed to the HIP function.
134128
:rtype: ctypes structure
135129
"""
@@ -175,21 +169,23 @@ def compile(self, kernel_instance):
175169
:rtype: ctypes._FuncPtr
176170
"""
177171
logging.debug("HipFunction compile called")
172+
173+
#Format and create program
178174
kernel_string = kernel_instance.kernel_string
179175
kernel_name = kernel_instance.name
180-
181176
if 'extern "C"' not in kernel_string:
182177
kernel_string = 'extern "C" {\n' + kernel_string + "\n}"
183-
184178
kernel_ptr = hiprtc.hiprtcCreateProgram(kernel_string, kernel_name, [], [])
185179

180+
#Compile based on device (Not yet tested for non-AMD devices)
186181
plat = hip.hipGetPlatformName()
187-
#Compile based on device
188182
if plat == "amd":
189183
hiprtc.hiprtcCompileProgram(
190184
kernel_ptr, [f'--offload-arch={self.hipProps.gcnArchName}'])
191185
else:
192186
hiprtc.hiprtcCompileProgram(kernel_ptr, [])
187+
188+
#Get module and kernel from compiled kernel string
193189
code = hiprtc.hiprtcGetCode(kernel_ptr)
194190
module = hip.hipModuleLoadData(code)
195191
self.current_module = module
@@ -200,11 +196,13 @@ def compile(self, kernel_instance):
200196
def start_event(self):
201197
"""Records the event that marks the start of a measurement"""
202198
logging.debug("HipFunction start_event called")
199+
203200
hip.hipEventRecord(self.start, self.stream)
204201

205202
def stop_event(self):
206203
"""Records the event that marks the end of a measurement"""
207204
logging.debug("HipFunction stop_event called")
205+
208206
hip.hipEventRecord(self.end, self.stream)
209207

210208
def kernel_finished(self):
@@ -221,12 +219,13 @@ def kernel_finished(self):
221219
def synchronize(self):
222220
"""Halts execution until device has finished its tasks"""
223221
logging.debug("HipFunction synchronize called")
222+
224223
hip.hipDeviceSynchronize()
225224

226225
def run_kernel(self, func, gpu_args, threads, grid, stream=None):
227226
"""runs the HIP kernel passed as 'func'
228227
229-
:param func: A PyHIP kernel compiled for this specific kernel configuration
228+
:param func: A HIP kernel compiled for this specific kernel configuration
230229
:type func: ctypes pionter
231230
232231
:param gpu_args: A ctypes structure of arguments to the kernel, order should match the
@@ -269,6 +268,8 @@ def memset(self, allocation, value, size):
269268
"""
270269
logging.debug("HipFunction memset called")
271270

271+
# Format arguments to correct type, set the memory and
272+
# check return value of memset (as done in PyHIP with hipCheckStatus)
272273
ctypes_value = ctypes.c_int(value)
273274
ctypes_size = ctypes.c_size_t(size)
274275
status = _libhip.hipMemset(allocation, ctypes_value, ctypes_size)
@@ -285,6 +286,7 @@ def memcpy_dtoh(self, dest, src):
285286
"""
286287
logging.debug("HipFunction memcpy_dtoh called")
287288

289+
# Format arguments to correct type and perform memory copy
288290
dtype_str = str(dest.dtype)
289291
dest_c = dest.ctypes.data_as(ctypes.POINTER(dtype_map[dtype_str]))
290292
hip.hipMemcpy_dtoh(dest_c, src, dest.nbytes)
@@ -300,6 +302,7 @@ def memcpy_htod(self, dest, src):
300302
"""
301303
logging.debug("HipFunction memcpy_htod called")
302304

305+
# Format arguments to correct type and perform memory copy
303306
dtype_str = str(src.dtype)
304307
src_c = src.ctypes.data_as(ctypes.POINTER(dtype_map[dtype_str]))
305308
hip.hipMemcpy_htod(dest, src_c, src.nbytes)
@@ -316,21 +319,24 @@ def copy_constant_memory_args(self, cmem_args):
316319
"""
317320
logging.debug("HipFunction copy_constant_memory_args called")
318321

322+
# Iterate over dictionary
319323
for k, v in cmem_args.items():
320-
#Format arguments, call hipModuleGetGlobal, and check return status
324+
# Format arguments, call hipModuleGetGlobal,
325+
# and check return status (as done in PyHIP with hipCheckStatus)
321326
symbol_string = ctypes.c_char_p(k.encode('utf-8'))
322327
symbol = ctypes.c_void_p()
323328
symbol_ptr = ctypes.POINTER(ctypes.c_void_p)(symbol)
324329
size_kernel = ctypes.c_size_t(0)
325330

331+
# Get constant memory symbol and check return value of hipModuleGetGlobal
332+
# (as done in PyHIP with hipCheckStatus)
326333
size_kernel_ptr = ctypes.POINTER(ctypes.c_size_t)(size_kernel)
327334
status = _libhip.hipModuleGetGlobal(symbol_ptr, size_kernel_ptr, self.current_module, symbol_string)
328335
hip.hipCheckStatus(status)
329336

330-
#Format arguments and call hipMemcpy_htod
337+
#Format arguments and perform memory copy
331338
dtype_str = str(v.dtype)
332339
v_c = v.ctypes.data_as(ctypes.POINTER(dtype_map[dtype_str]))
333-
334340
hip.hipMemcpy_htod(symbol_ptr.contents, v_c, v.nbytes)
335341

336342
def copy_shared_memory_args(self, smem_args):
@@ -340,9 +346,8 @@ def copy_shared_memory_args(self, smem_args):
340346
self.smem_size = smem_args["size"]
341347

342348
def copy_texture_memory_args(self, texmem_args):
343-
"""This method must implement the allocation and copy of texture memory to the GPU."""
344349
logging.debug("HipFunction copy_texture_memory_args called")
345350

346351
raise NotImplementedError("HIP backend does not support texture memory")
347352

348-
units = {"time": "ms"}
353+
units = {"time": "ms", "power": "s,mW", "energy": "J"}

0 commit comments

Comments
 (0)