Skip to content

Commit 94c51dc

Browse files
add simple vector_add example
1 parent 3f622bc commit 94c51dc

File tree

4 files changed

+71
-103
lines changed

4 files changed

+71
-103
lines changed

kernel_tuner/example.cu

Lines changed: 0 additions & 12 deletions
This file was deleted.

kernel_tuner/example.py

Lines changed: 0 additions & 91 deletions
This file was deleted.

kernel_tuner/vector_add.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include "kernel_float.h"
2+
namespace kf = kernel_float;
3+
4+
__global__ void vector_add(kf::vec<float_type, 1>* c, const kf::vec<float_type, 1>* a, const kf::vec<float_type, 1>* b, int n) {
5+
int i = blockIdx.x * blockDim.x + threadIdx.x;
6+
if (i < n) {
7+
c[i] = a[i] + b[i];
8+
}
9+
}

kernel_tuner/vector_add.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env python
2+
import os
3+
4+
import numpy
5+
from kernel_tuner import tune_kernel
6+
from kernel_tuner.accuracy import TunablePrecision, AccuracyObserver
7+
8+
# Specify the compiler flags Kernel Tuner should use to compile our kernel
9+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../"
10+
flags = [f"-I{ROOT_DIR}/include", "-std=c++17"]
11+
12+
def tune():
13+
14+
# Prepare input data
15+
size = 100000000
16+
n = numpy.int32(size)
17+
a = numpy.random.randn(size).astype(numpy.float64)
18+
b = numpy.random.randn(size).astype(numpy.float64)
19+
c = numpy.zeros_like(b)
20+
21+
# Prepare the argument list of the kernel
22+
args = [
23+
TunablePrecision("float_type", c),
24+
TunablePrecision("float_type", a),
25+
TunablePrecision("float_type", b),
26+
n,
27+
]
28+
29+
# Define the reference answer to compute the kernel output against
30+
answer = [a+b, None, None, None]
31+
32+
# Define the tunable parameters, in this case thread block size
33+
# and the type to use for the input and output data of our kernel
34+
tune_params = dict()
35+
tune_params["block_size_x"] = [64, 128, 256, 512]
36+
tune_params["float_type"] = ["half", "float", "double"]
37+
38+
# Observers will measure the error using either RMSE or MRE as error metric
39+
observers = [
40+
AccuracyObserver("RMSE", "error_rmse"),
41+
AccuracyObserver("MRE", "error_relative"),
42+
]
43+
44+
# The metrics here are only to ensure Kernel Tuner prints them to the console
45+
metrics = dict(RMSE=lambda p: p["error_rmse"], MRE=lambda p: p["error_relative"])
46+
47+
results, env = tune_kernel(
48+
"vector_add",
49+
"vector_add.cu",
50+
size,
51+
args,
52+
tune_params,
53+
answer=answer,
54+
observers=observers,
55+
metrics=metrics,
56+
lang="cupy",
57+
compiler_options=flags
58+
)
59+
60+
61+
if __name__ == "__main__":
62+
tune()

0 commit comments

Comments
 (0)