Skip to content

Commit 160d4b7

Browse files
committed
Expanded custom optimizer test to T1 input format
1 parent e8ff6e7 commit 160d4b7

File tree

4 files changed

+121
-8
lines changed

4 files changed

+121
-8
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ push_to_pypi.sh
2020
*.json
2121
!kernel_tuner/schema/T1/1.0.0/input-schema.json
2222
!test/test_T1_input.json
23-
!test_cache_file.json
23+
!test_cache_file*.json
2424
*.csv
2525
.cache
2626
*.ipynb_checkpoints

test/test_cache_file_T1_input.json

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"General": {
3+
"BenchmarkName": "vector_add",
4+
"OutputFormat": "JSON"
5+
},
6+
"ConfigurationSpace": {
7+
"TuningParameters": [
8+
{
9+
"Name": "block_size_x",
10+
"Type": "int",
11+
"Values": "[128+64*i for i in range(15)]",
12+
"Default": 512
13+
}
14+
],
15+
"Conditions": []
16+
},
17+
"KernelSpecification": {
18+
"Language": "CUDA",
19+
"CompilerOptions": [
20+
"-std=c++11"
21+
],
22+
"BenchmarkName": "vector_add",
23+
"KernelName": "vector_add",
24+
"KernelFile": "vector_add.cu",
25+
"GlobalSizeType": "CUDA",
26+
"LocalSize": {
27+
"X": "block_size_x",
28+
"Y": "1",
29+
"Z": "1"
30+
},
31+
"GlobalSize": {
32+
"X": "10000000 // block_size_x",
33+
"Y": "1",
34+
"Z": "1"
35+
},
36+
"GridDivX": [
37+
"block_size_x"
38+
],
39+
"GridDivY": [
40+
"block_size_y"
41+
],
42+
"ProblemSize": [],
43+
"SharedMemory": 0,
44+
"Stream": null,
45+
"Arguments": [
46+
{
47+
"Name": "a",
48+
"Type": "float",
49+
"MemoryType": "Vector",
50+
"AccessType": "ReadOnly",
51+
"FillType": "Random",
52+
"Size": 10000000,
53+
"FillValue": 1.0
54+
},
55+
{
56+
"Name": "b",
57+
"Type": "float",
58+
"MemoryType": "Vector",
59+
"AccessType": "ReadOnly",
60+
"FillType": "Random",
61+
"Size": 10000000,
62+
"FillValue": 1.0
63+
},
64+
{
65+
"Name": "c",
66+
"Type": "float",
67+
"MemoryType": "Vector",
68+
"AccessType": "WriteOnly",
69+
"FillType": "Constant",
70+
"Size": 10000000,
71+
"FillValue": 0.0
72+
},
73+
{
74+
"Name": "n",
75+
"Type": "int32",
76+
"MemoryType": "Scalar",
77+
"AccessType": "ReadOnly",
78+
"FillValue": 10000000
79+
}
80+
]
81+
}
82+
}

test/test_custom_optimizer.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,16 +120,14 @@ def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
120120
return best_params, best_value, evaluations
121121

122122

123-
124-
125123
### Testing the Optimization Algorithm Wrapper in Kernel Tuner
126-
import os
127-
from kernel_tuner import tune_kernel
124+
from kernel_tuner import tune_kernel, tune_kernel_T1
128125
from kernel_tuner.strategies.wrapper import OptAlgWrapper
126+
from pathlib import Path
129127

130-
from .test_runners import env
128+
from .test_runners import env # noqa: F401
131129

132-
cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
130+
cache_filename = Path(__file__).parent.resolve() / "test_cache_file.json"
133131

134132
def test_OptAlgWrapper(env):
135133
kernel_name, kernel_string, size, args, tune_params = env
@@ -143,6 +141,33 @@ def test_OptAlgWrapper(env):
143141
strategy_options = { 'max_fevals': 15 }
144142

145143
# Call the tuner
146-
tune_kernel(kernel_name, kernel_string, size, args, tune_params,
144+
res, _ = tune_kernel(kernel_name, kernel_string, size, args, tune_params,
147145
strategy=strategy, strategy_options=strategy_options, cache=cache_filename,
148146
simulation_mode=True, verbose=True)
147+
assert len(res) == strategy_options['max_fevals']
148+
149+
def test_OptAlgWrapper_T1(env):
150+
kernel_name, kernel_string, size, args, tune_params = env
151+
152+
strategy = "HybridDELocalRefinement"
153+
strategy_options = {
154+
"max_fevals": 15,
155+
"custom_search_method_path": Path(__file__).resolve(),
156+
"constraint_aware": False,
157+
}
158+
iterations = 1
159+
160+
res, _ = tune_kernel_T1(
161+
Path(__file__).parent.resolve() / "test_cache_file_T1_input.json",
162+
cache_filename,
163+
device="NVIDIA RTX A4000",
164+
objective="time",
165+
objective_higher_is_better=False,
166+
simulation_mode=True,
167+
output_T4=False,
168+
iterations=iterations,
169+
strategy=strategy,
170+
strategy_options=strategy_options,
171+
)
172+
173+
assert len(res) == strategy_options['max_fevals']

test/vector_add.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
__global__ void vector_add(float *c, float *a, float *b, int n) {
2+
int i = blockIdx.x * block_size_x + threadIdx.x;
3+
if (i<n) {
4+
c[i] = a[i] + b[i];
5+
}
6+
}

0 commit comments

Comments
 (0)