Skip to content

Commit 00ac419

Browse files
committed
Added function to check for compute capability validity, improved check on gpu-architecture compiler option, added gpu-architecture auto-adding to CuPy
1 parent f15338f commit 00ac419

File tree

3 files changed

+25
-11
lines changed

3 files changed

+25
-11
lines changed

kernel_tuner/backends/cupy.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""This module contains all Cupy specific kernel_tuner functions."""
22
from __future__ import print_function
3+
from warnings import warn
34

45
import numpy as np
56

67
from kernel_tuner.backends.backend import GPUBackend
78
from kernel_tuner.observers.cupy import CupyRuntimeObserver
9+
from kernel_tuner.util import is_valid_nvrtc_gpu_arch_cc
810

911
# embedded in try block to be able to generate documentation
1012
# and run tests without cupy installed
@@ -125,10 +127,11 @@ def compile(self, kernel_instance):
125127
compiler_options = self.compiler_options
126128
if not any(["-std=" in opt for opt in self.compiler_options]):
127129
compiler_options = ["--std=c++11"] + self.compiler_options
128-
if not any([b"--gpu-architecture=" in opt for opt in compiler_options]):
129-
compiler_options.append(
130-
f"--gpu-architecture=compute_{self.cc}".encode("UTF-8")
131-
)
130+
if is_valid_nvrtc_gpu_arch_cc(self.cc):
131+
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in compiler_options]):
132+
compiler_options.append(f"--gpu-architecture=compute_{self.cc}")
133+
else:
134+
warn(f"Could not add compiler option '--gpu-architecture=compute_{self.cc}' as {self.cc} is an invalid target")
132135

133136
options = tuple(compiler_options)
134137

kernel_tuner/backends/nvcuda.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""This module contains all NVIDIA cuda-python specific kernel_tuner functions."""
2+
from warnings import warn
3+
24
import numpy as np
35

46
from kernel_tuner.backends.backend import GPUBackend
57
from kernel_tuner.observers.nvcuda import CudaRuntimeObserver
6-
from kernel_tuner.util import SkippableFailure, cuda_error_check
8+
from kernel_tuner.util import SkippableFailure, cuda_error_check, is_valid_nvrtc_gpu_arch_cc
79

810
# embedded in try block to be able to generate documentation
911
# and run tests without cuda-python installed
@@ -165,12 +167,15 @@ def compile(self, kernel_instance):
165167
compiler_options.append(b"--std=c++11")
166168
if not any(["--std=" in opt for opt in self.compiler_options]):
167169
self.compiler_options.append("--std=c++11")
168-
if not any([b"--gpu-architecture=" in opt for opt in compiler_options]):
169-
compiler_options.append(
170-
f"--gpu-architecture=compute_{self.cc}".encode("UTF-8")
171-
)
172-
if not any(["--gpu-architecture=" in opt for opt in self.compiler_options]):
173-
self.compiler_options.append(f"--gpu-architecture=compute_{self.cc}")
170+
if is_valid_nvrtc_gpu_arch_cc(self.cc):
171+
if not any([b"--gpu-architecture=" in opt or b"-arch" in opt for opt in compiler_options]):
172+
compiler_options.append(
173+
f"--gpu-architecture=compute_{self.cc}".encode("UTF-8")
174+
)
175+
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in self.compiler_options]):
176+
self.compiler_options.append(f"--gpu-architecture=compute_{self.cc}")
177+
else:
178+
warn(f"Could not add compiler option '--gpu-architecture=compute_{self.cc}' as {self.cc} is an invalid target")
174179

175180
err, program = nvrtc.nvrtcCreateProgram(
176181
str.encode(kernel_string), b"CUDAProgram", 0, [], []

kernel_tuner/util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,12 @@ def get_total_timings(results, env, overhead_time):
570570
return env
571571

572572

573+
def is_valid_nvrtc_gpu_arch_cc(compute_capability: str) -> bool:
574+
"""Returns whether the Compute Capability is a valid argument for NVRTC `--gpu-architecture=`, as per https://docs.nvidia.com/cuda/nvrtc/index.html#group__options."""
575+
valid_cc = ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '87', '89', '90', '90a']
576+
return str(compute_capability) in valid_cc
577+
578+
573579
def print_config(config, tuning_options, runner):
574580
"""Print the configuration string with tunable parameters and benchmark results."""
575581
print_config_output(tuning_options.tune_params, config, runner.quiet, tuning_options.metrics, runner.units)

0 commit comments

Comments
 (0)