Skip to content

Commit f0ab5ef

Browse files
committed
modified copy_constant_memory_args, implemented test_copy_constant_memory_args --> passed
1 parent 92f55a5 commit f0ab5ef

File tree

2 files changed

+56
-14
lines changed

2 files changed

+56
-14
lines changed

kernel_tuner/backends/hip.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@
6868
_libhip.hipEventQuery.restype = ctypes.c_int
6969
_libhip.hipEventQuery.argtypes = [ctypes.c_void_p]
7070
_libhip.hipModuleGetGlobal.restype = ctypes.c_int
71-
_libhip.hipModuleGetGlobal.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p, ctypes.c_char_p]
71+
_libhip.hipModuleGetGlobal.argtypes = [ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t), ctypes.c_void_p, ctypes.c_char_p]
7272
_libhip.hipMemset.restype = ctypes.c_int
7373
_libhip.hipMemset.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_size_t]
74+
_libhip.hipMemcpyToSymbol.restype = ctypes.c_int
75+
_libhip.hipMemcpyToSymbol.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t, ctypes.c_int]
7476

7577

7678
hipSuccess = 0
@@ -313,16 +315,23 @@ def copy_constant_memory_args(self, cmem_args):
313315
:type cmem_args: dict( string: numpy.ndarray, ... )
314316
"""
315317
logging.debug("HipFunction copy_constant_memory_args called")
316-
print("HipFunction copy_constant_memory_args called")
317-
logging.debug("current module: " + str(self.current_module))
318318

319319
for k, v in cmem_args.items():
320-
symbol = ctypes.c_void_p
321-
size_kernel = ctypes.c_size_t
322-
status = _libhip.hipModuleGetGlobal(symbol, size_kernel, self.current_module, str.encode(k))
320+
#Format arguments, call hipModuleGetGlobal, and check return status
321+
symbol_string = ctypes.c_char_p(k.encode('utf-8'))
322+
symbol = ctypes.c_void_p()
323+
symbol_ptr = ctypes.POINTER(ctypes.c_void_p)(symbol)
324+
size_kernel = ctypes.c_size_t(0)
325+
326+
size_kernel_ptr = ctypes.POINTER(ctypes.c_size_t)(size_kernel)
327+
status = _libhip.hipModuleGetGlobal(symbol_ptr, size_kernel_ptr, self.current_module, symbol_string)
323328
hip.hipCheckStatus(status)
329+
330+
#Format arguments and call hipMemcpy_htod
324331
dtype_str = str(v.dtype)
325-
hip.hipMemcpy_htod(symbol, ctypes.byref(v.ctypes), ctypes.sizeof(dtype_map[dtype_str]) * v.size)
332+
v_c = v.ctypes.data_as(ctypes.POINTER(dtype_map[dtype_str]))
333+
334+
hip.hipMemcpy_htod(symbol_ptr.contents, v_c, v.nbytes)
326335

327336
def copy_shared_memory_args(self, smem_args):
328337
"""add shared memory arguments to the kernel"""

test/test_hip_functions.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_compile():
5656
"""
5757

5858
kernel_name = "vector_add"
59-
kernel_sources = KernelSource(kernel_name, kernel_string, "cuda")
59+
kernel_sources = KernelSource(kernel_name, kernel_string, "HIP")
6060
kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), [])
6161
dev = kt_hip.HipFunctions(0)
6262
try:
@@ -71,11 +71,11 @@ def test_memset_and_memcpy_dtoh():
7171
x = np.array(a).astype(np.int8)
7272
x_d = hip.hipMalloc(x.nbytes)
7373

74-
Hipfunc = kt_hip.HipFunctions()
75-
Hipfunc.memset(x_d, 4, x.nbytes)
74+
dev = kt_hip.HipFunctions()
75+
dev.memset(x_d, 4, x.nbytes)
7676

7777
output = np.empty(4, dtype=np.int8)
78-
Hipfunc.memcpy_dtoh(output, x_d)
78+
dev.memcpy_dtoh(output, x_d)
7979

8080
assert all(output == np.full(4, 4))
8181

@@ -86,12 +86,45 @@ def test_memcpy_htod():
8686
x_d = hip.hipMalloc(x.nbytes)
8787
output = np.empty(4, dtype=np.float32)
8888

89-
Hipfunc = kt_hip.HipFunctions()
90-
Hipfunc.memcpy_htod(x_d, x)
91-
Hipfunc.memcpy_dtoh(output, x_d)
89+
dev = kt_hip.HipFunctions()
90+
dev.memcpy_htod(x_d, x)
91+
dev.memcpy_dtoh(output, x_d)
9292

9393
assert all(output == x)
9494

95+
@skip_if_no_pyhip
96+
def test_copy_constant_memory_args():
97+
kernel_string = """
98+
__constant__ float my_constant_data[100];
99+
__global__ void copy_data_kernel(float* output) {
100+
int idx = threadIdx.x + blockIdx.x * blockDim.x;
101+
if (idx < 100) {
102+
output[idx] = my_constant_data[idx];
103+
}
104+
}
105+
"""
106+
107+
kernel_name = "copy_data_kernel"
108+
kernel_sources = KernelSource(kernel_name, kernel_string, "HIP")
109+
kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), [])
110+
dev = kt_hip.HipFunctions(0)
111+
kernel = dev.compile(kernel_instance)
112+
113+
my_constant_data = np.full(100, 23).astype(np.float32)
114+
cmem_args = {'my_constant_data': my_constant_data}
115+
dev.copy_constant_memory_args(cmem_args)
116+
117+
output = np.full(100, 0).astype(np.float32)
118+
gpu_args = dev.ready_argument_list([output])
119+
120+
threads = (100, 1, 1)
121+
grid = (1, 1, 1)
122+
dev.run_kernel(kernel, gpu_args, threads, grid)
123+
124+
dev.memcpy_dtoh(output, gpu_args.field0)
125+
126+
assert(my_constant_data == output).all()
127+
95128
def dummy_func(a, b, block=0, grid=0, stream=None, shared=0, texrefs=None):
96129
pass
97130

0 commit comments

Comments
 (0)