Skip to content

Commit 881042a

Browse files
committed
Formatted with black.
1 parent 3fa8ef2 commit 881042a

File tree

1 file changed

+49
-27
lines changed

1 file changed

+49
-27
lines changed

examples/cuda/convolution_correct.py

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,66 +26,88 @@
2626
import kernel_tuner
2727
from collections import OrderedDict
2828

29+
2930
def tune():
30-
with open('convolution.cu', 'r') as f:
31+
with open("convolution.cu", "r") as f:
3132
kernel_string = f.read()
3233

3334
filter_size = (17, 17)
3435
problem_size = (4096, 4096)
3536
size = numpy.prod(problem_size)
36-
border_size = (filter_size[0]//2*2, filter_size[1]//2*2)
37-
input_size = ((problem_size[0]+border_size[0]) * (problem_size[1]+border_size[1]))
37+
border_size = (filter_size[0] // 2 * 2, filter_size[1] // 2 * 2)
38+
input_size = (problem_size[0] + border_size[0]) * (problem_size[1] + border_size[1])
3839

3940
output = numpy.zeros(size).astype(numpy.float32)
4041
input = numpy.random.randn(input_size).astype(numpy.float32)
4142

42-
filter = numpy.random.randn(filter_size[0]*filter_size[1]).astype(numpy.float32)
43-
cmem_args= {'d_filter': filter }
43+
filter = numpy.random.randn(filter_size[0] * filter_size[1]).astype(numpy.float32)
44+
cmem_args = {"d_filter": filter}
4445

4546
args = [output, input, filter]
4647
tune_params = OrderedDict()
4748
tune_params["filter_width"] = [filter_size[0]]
4849
tune_params["filter_height"] = [filter_size[1]]
4950

50-
#tune_params["block_size_x"] = [16*i for i in range(1,3)]
51-
tune_params["block_size_x"] = [16*i for i in range(1,9)]
52-
#tune_params["block_size_y"] = [2**i for i in range(1,5)]
53-
tune_params["block_size_y"] = [2**i for i in range(1,6)]
51+
# tune_params["block_size_x"] = [16*i for i in range(1,3)]
52+
tune_params["block_size_x"] = [16 * i for i in range(1, 9)]
53+
# tune_params["block_size_y"] = [2**i for i in range(1,5)]
54+
tune_params["block_size_y"] = [2**i for i in range(1, 6)]
5455

5556
tune_params["tile_size_x"] = [2**i for i in range(3)]
5657
tune_params["tile_size_y"] = [2**i for i in range(3)]
5758

58-
tune_params["use_padding"] = [0,1] #toggle the insertion of padding in shared memory
59-
tune_params["read_only"] = [0,1] #toggle using the read-only cache
59+
tune_params["use_padding"] = [
60+
0,
61+
1,
62+
] # toggle the insertion of padding in shared memory
63+
tune_params["read_only"] = [0, 1] # toggle using the read-only cache
6064

6165
grid_div_x = ["block_size_x", "tile_size_x"]
6266
grid_div_y = ["block_size_y", "tile_size_y"]
6367

64-
#compute the answer using a naive kernel
65-
params = { "block_size_x": 16, "block_size_y": 16}
68+
# compute the answer using a naive kernel
69+
params = {"block_size_x": 16, "block_size_y": 16}
6670
tune_params["filter_width"] = [filter_size[0]]
6771
tune_params["filter_height"] = [filter_size[1]]
68-
results = kernel_tuner.run_kernel("convolution_naive", kernel_string,
69-
problem_size, args, params,
70-
grid_div_y=["block_size_y"], grid_div_x=["block_size_x"], lang='cupy')
71-
72-
#set non-output fields to None
72+
results = kernel_tuner.run_kernel(
73+
"convolution_naive",
74+
kernel_string,
75+
problem_size,
76+
args,
77+
params,
78+
grid_div_y=["block_size_y"],
79+
grid_div_x=["block_size_x"],
80+
lang="cupy",
81+
)
82+
83+
# set non-output fields to None
7384
answer = [results[0], None, None]
7485

75-
#start kernel tuning with correctness verification
76-
return kernel_tuner.tune_kernel("convolution_kernel", kernel_string,
77-
problem_size, args, tune_params,
78-
grid_div_y=grid_div_y, grid_div_x=grid_div_x, verbose=True, cmem_args=cmem_args, answer=answer, lang='cupy')
86+
# start kernel tuning with correctness verification
87+
return kernel_tuner.tune_kernel(
88+
"convolution_kernel",
89+
kernel_string,
90+
problem_size,
91+
args,
92+
tune_params,
93+
grid_div_y=grid_div_y,
94+
grid_div_x=grid_div_x,
95+
verbose=True,
96+
cmem_args=cmem_args,
97+
answer=answer,
98+
lang="cupy",
99+
)
79100

80101

81102
if __name__ == "__main__":
82103
import time
83-
s1 = time.time()*1000
104+
105+
s1 = time.time() * 1000
84106
results = tune()
85107

86-
e1 = time.time()*1000
87-
print("\n Actual time used:", e1-s1)
108+
e1 = time.time() * 1000
109+
print("\n Actual time used:", e1 - s1)
88110
import json
89-
with open("convolution_RTX_2070.json", 'w') as fp:
90-
json.dump(results, fp)
91111

112+
with open("convolution_RTX_2070.json", "w") as fp:
113+
json.dump(results, fp)

0 commit comments

Comments
 (0)