|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | + This is the vector_add example modified to show |
| 4 | + how to use PythonKernel with the CuPy backend |
| 5 | +""" |
| 6 | + |
| 7 | +import cupy as cp |
| 8 | +import numpy as np |
| 9 | +from kernel_tuner.kernelbuilder import PythonKernel |
| 10 | + |
| 11 | +def kernelbuilder_example(): |
| 12 | + |
| 13 | + # To make this example self-contained we include the kernel as a string |
| 14 | + # here, but you can also just point to a file with the kernel code |
| 15 | + kernel_string = """ |
| 16 | + __global__ void vector_add(float *c, float *a, float *b, int n) { |
| 17 | + int i = blockIdx.x * block_size_x + threadIdx.x; |
| 18 | + if (i<n) { |
| 19 | + c[i] = a[i] + b[i]; |
| 20 | + } |
| 21 | + } |
| 22 | + """ |
| 23 | + |
| 24 | + # Setup the arguments for our vector add kernel |
| 25 | + size = 100000 |
| 26 | + a = cp.random.randn(size).astype(np.float32) |
| 27 | + b = cp.random.randn(size).astype(np.float32) |
| 28 | + c = cp.zeros_like(b) |
| 29 | + n = np.int32(size) |
| 30 | + |
| 31 | + # Note that the type and order should match our GPU code |
| 32 | + # Because the arguments are all CuPy arrays, our PythonKernel does not need to |
| 33 | + # worry about moving data between host and device |
| 34 | + args = [c, a, b, n] |
| 35 | + |
| 36 | + # We can instantiate a specific kernel configurations |
| 37 | + params = {"block_size_x": 128} |
| 38 | + |
| 39 | + # Here we construct a Python object that represents the kernel |
| 40 | + # we can use it to conveniently use the GPU kernel in Python |
| 41 | + # applications that want to frequently call the GPU kernel |
| 42 | + vector_add = PythonKernel("vector_add", kernel_string, size, args, params, lang="cupy") |
| 43 | + |
| 44 | + # We can use the PythonKernel instance as a regular Python function |
| 45 | + vector_add(c, a, b, n) |
| 46 | + |
| 47 | + # Compare the result in c with a+b computed in Python |
| 48 | + assert np.allclose(c, a+b) |
| 49 | + |
| 50 | + |
| 51 | +if __name__ == "__main__": |
| 52 | + kernelbuilder_example() |
0 commit comments