1111 from unittest .mock import patch , Mock
1212
1313import kernel_tuner
14- from kernel_tuner .backends .compiler import CompilerFunctions , Argument
14+ from kernel_tuner .backends .compiler import CompilerFunctions , Argument , is_cupy_array , get_array_module
1515from kernel_tuner .core import KernelSource , KernelInstance
1616from kernel_tuner import util
1717
18- from .context import skip_if_no_gfortran , skip_if_no_gcc , skip_if_no_openmp
18+ from .context import skip_if_no_gfortran , skip_if_no_gcc , skip_if_no_openmp , skip_if_no_cupy
19+ from .test_runners import env as cuda_env # noqa: F401
1920
2021
2122@skip_if_no_gcc
@@ -108,6 +109,29 @@ def test_ready_argument_list5():
108109 assert all (output [0 ].numpy == arg1 )
109110
110111
112+ @skip_if_no_cupy
113+ def test_ready_argument_list6 ():
114+ import cupy as cp
115+
116+ arg = cp .array ([1 , 2 , 3 ], dtype = np .float32 )
117+ arguments = [arg ]
118+
119+ cfunc = CompilerFunctions ()
120+ output = cfunc .ready_argument_list (arguments )
121+ print (output )
122+
123+ assert len (output ) == 1
124+ assert output [0 ].numpy is arg
125+ mem = cp .cuda .UnownedMemory (
126+ ptr = output [0 ].ctypes .value ,
127+ size = int (arg .nbytes / arg .dtype .itemsize ),
128+ owner = None ,
129+ )
130+ ptr = cp .cuda .MemoryPointer (mem , 0 )
131+ output_arg = cp .ndarray (shape = arg .shape , dtype = arg .dtype , memptr = ptr )
132+ assert cp .all (output_arg == arg )
133+
134+
111135@skip_if_no_gcc
112136def test_byte_array_arguments ():
113137 arg1 = np .array ([1 , 2 , 3 ]).astype (np .int8 )
@@ -206,8 +230,29 @@ def test_memset():
206230 assert all (x == np .zeros (4 ))
207231
208232
209- @skip_if_no_gcc
233+ @skip_if_no_cupy
210234def test_memcpy_dtoh ():
235+ import cupy as cp
236+
237+ a = [1 , 2 , 3 , 4 ]
238+ x = cp .asarray (a , dtype = np .float32 )
239+ x_c = C .c_void_p (x .data .ptr )
240+ arg = Argument (numpy = x , ctypes = x_c )
241+ output = np .zeros (len (x ), dtype = x .dtype )
242+
243+ cfunc = CompilerFunctions ()
244+ cfunc .memcpy_dtoh (output , arg )
245+
246+ print (f"{ type (x )= } { x = } " )
247+ print (f"{ type (a )= } { a = } " )
248+ print (f"{ type (output )= } { output = } " )
249+
250+ assert all (output == a )
251+ assert all (x .get () == a )
252+
253+
254+ @skip_if_no_gcc
255+ def test_memcpy_host_dtoh ():
211256 a = [1 , 2 , 3 , 4 ]
212257 x = np .array (a ).astype (np .float32 )
213258 x_c = x .ctypes .data_as (C .POINTER (C .c_float ))
@@ -224,8 +269,44 @@ def test_memcpy_dtoh():
224269 assert all (x == a )
225270
226271
227- @skip_if_no_gcc
272+ @skip_if_no_cupy
273+ def test_memcpy_device_dtoh ():
274+ import cupy as cp
275+
276+ a = [1 , 2 , 3 , 4 ]
277+ x = cp .asarray (a , dtype = np .float32 )
278+ x_c = C .c_void_p (x .data .ptr )
279+ arg = Argument (numpy = x , ctypes = x_c )
280+ output = cp .zeros_like (x )
281+
282+ cfunc = CompilerFunctions ()
283+ cfunc .memcpy_dtoh (output , arg )
284+
285+ print (f"{ type (x )= } { x = } " )
286+ print (f"{ type (a )= } { a = } " )
287+ print (f"{ type (output )= } { output = } " )
288+
289+ assert all (output .get () == a )
290+ assert all (x .get () == a )
291+
292+
293+ @skip_if_no_cupy
228294def test_memcpy_htod ():
295+ import cupy as cp
296+
297+ a = [1 , 2 , 3 , 4 ]
298+ src = np .array (a , dtype = np .float32 )
299+ x = cp .zeros (len (src ), dtype = src .dtype )
300+ x_c = C .c_void_p (x .data .ptr )
301+ arg = Argument (numpy = x , ctypes = x_c )
302+
303+ cfunc = CompilerFunctions ()
304+ cfunc .memcpy_htod (arg , src )
305+
306+ assert all (arg .numpy .get () == a )
307+
308+
309+ def test_memcpy_host_htod ():
229310 a = [1 , 2 , 3 , 4 ]
230311 src = np .array (a ).astype (np .float32 )
231312 x = np .zeros_like (src )
@@ -238,6 +319,22 @@ def test_memcpy_htod():
238319 assert all (arg .numpy == a )
239320
240321
322+ @skip_if_no_cupy
323+ def test_memcpy_device_htod ():
324+ import cupy as cp
325+
326+ a = [1 , 2 , 3 , 4 ]
327+ src = cp .array (a , dtype = np .float32 )
328+ x = cp .zeros (len (src ), dtype = src .dtype )
329+ x_c = C .c_void_p (x .data .ptr )
330+ arg = Argument (numpy = x , ctypes = x_c )
331+
332+ cfunc = CompilerFunctions ()
333+ cfunc .memcpy_htod (arg , src )
334+
335+ assert all (arg .numpy .get () == a )
336+
337+
241338@skip_if_no_gfortran
242339def test_complies_fortran_function_no_module ():
243340 kernel_string = """
@@ -335,3 +432,58 @@ def test_benchmark(env):
335432 assert all (["nthreads" in result for result in results ])
336433 assert all (["time" in result for result in results ])
337434 assert all ([result ["time" ] > 0.0 for result in results ])
435+
436+
437+ @skip_if_no_cupy
438+ def test_is_cupy_array ():
439+ import cupy as cp
440+
441+ assert is_cupy_array (cp .array ([1.0 ]))
442+ assert not is_cupy_array (np .array ([1.0 ]))
443+
444+
445+ def test_is_cupy_array_no_cupy ():
446+ assert not is_cupy_array (np .array ([1.0 ]))
447+
448+
449+ @skip_if_no_cupy
450+ def test_get_array_module ():
451+ import cupy as cp
452+
453+ assert get_array_module (cp .array ([1.0 ])) == cp
454+ assert get_array_module (np .array ([1.0 ])) == np
455+
456+
457+ @skip_if_no_cupy
458+ @skip_if_no_gcc
459+ def test_run_kernel ():
460+ import cupy as cp
461+
462+ kernel_string = """
463+ __global__ void vector_add_kernel(float *c, const float *a, const float *b, int n) {
464+ int i = blockIdx.x * block_size_x + threadIdx.x;
465+ if (i<n) {
466+ c[i] = a[i] + b[i];
467+ }
468+ }
469+
470+ extern "C" void vector_add(float *c, const float *a, const float *b, int n) {
471+ dim3 dimGrid(n);
472+ dim3 dimBlock(block_size_x);
473+ vector_add_kernel<<<dimGrid, dimBlock>>>(c, a, b, n);
474+ }
475+ """
476+ a = cp .asarray ([1 , 2.0 ], dtype = np .float32 )
477+ b = cp .asarray ([3 , 4.0 ], dtype = np .float32 )
478+ c = cp .zeros_like (b )
479+ n = np .int32 (len (c ))
480+
481+ result = kernel_tuner .run_kernel (
482+ kernel_name = "vector_add" ,
483+ kernel_source = kernel_string ,
484+ problem_size = n ,
485+ arguments = [c , a , b , n ],
486+ params = {"block_size_x" : 1 },
487+ lang = "C" ,
488+ )
489+ assert cp .all ((a + b ) == c )
0 commit comments