Skip to content

Commit 881db4d

Browse files
committed
Split the cuda-python utils from the main file.
1 parent b5d92d2 commit 881db4d

File tree

3 files changed

+39
-47
lines changed

3 files changed

+39
-47
lines changed

kernel_tuner/backends/nvcuda.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
from kernel_tuner.backends.backend import GPUBackend
77
from kernel_tuner.observers.nvcuda import CudaRuntimeObserver
8-
from kernel_tuner.util import SkippableFailure, cuda_error_check, to_valid_nvrtc_gpu_arch_cc
8+
from kernel_tuner.util import SkippableFailure
9+
from kernel_tuner.utils.nvcuda import cuda_error_check, to_valid_nvrtc_gpu_arch_cc
910

1011
# embedded in try block to be able to generate documentation
1112
# and run tests without cuda-python installed
@@ -56,13 +57,9 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
5657
CudaFunctions.last_selected_device = device
5758

5859
# compute capabilities and device properties
59-
err, major = runtime.cudaDeviceGetAttribute(
60-
runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, device
61-
)
60+
err, major = runtime.cudaDeviceGetAttribute(runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, device)
6261
cuda_error_check(err)
63-
err, minor = runtime.cudaDeviceGetAttribute(
64-
runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, device
65-
)
62+
err, minor = runtime.cudaDeviceGetAttribute(runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, device)
6663
cuda_error_check(err)
6764
err, self.max_threads = runtime.cudaDeviceGetAttribute(
6865
runtime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock, device
@@ -164,20 +161,14 @@ def compile(self, kernel_instance):
164161
if not any(["--std=" in opt for opt in self.compiler_options]):
165162
self.compiler_options.append("--std=c++11")
166163
if not any([b"--gpu-architecture=" in opt or b"-arch" in opt for opt in compiler_options]):
167-
compiler_options.append(
168-
f"--gpu-architecture=compute_{to_valid_nvrtc_gpu_arch_cc(self.cc)}".encode("UTF-8")
169-
)
164+
compiler_options.append(f"--gpu-architecture=compute_{to_valid_nvrtc_gpu_arch_cc(self.cc)}".encode("UTF-8"))
170165
if not any(["--gpu-architecture=" in opt or "-arch" in opt for opt in self.compiler_options]):
171166
self.compiler_options.append(f"--gpu-architecture=compute_{to_valid_nvrtc_gpu_arch_cc(self.cc)}")
172167

173-
err, program = nvrtc.nvrtcCreateProgram(
174-
str.encode(kernel_string), b"CUDAProgram", 0, [], []
175-
)
168+
err, program = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b"CUDAProgram", 0, [], [])
176169
try:
177170
cuda_error_check(err)
178-
err = nvrtc.nvrtcCompileProgram(
179-
program, len(compiler_options), compiler_options
180-
)
171+
err = nvrtc.nvrtcCompileProgram(program, len(compiler_options), compiler_options)
181172
cuda_error_check(err)
182173
err, size = nvrtc.nvrtcGetPTXSize(program)
183174
cuda_error_check(err)
@@ -189,9 +180,7 @@ def compile(self, kernel_instance):
189180
raise SkippableFailure("uses too much shared data")
190181
else:
191182
cuda_error_check(err)
192-
err, self.func = driver.cuModuleGetFunction(
193-
self.current_module, str.encode(kernel_name)
194-
)
183+
err, self.func = driver.cuModuleGetFunction(self.current_module, str.encode(kernel_name))
195184
cuda_error_check(err)
196185

197186
# get the number of registers per thread used in this kernel

kernel_tuner/util.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@
3838
import cupy as cp
3939
except ImportError:
4040
cp = np
41-
try:
42-
from cuda import cuda, cudart, nvrtc
43-
except ImportError:
44-
cuda = None
4541

4642
from kernel_tuner.observers.nvml import NVMLObserver
4743

@@ -642,14 +638,6 @@ def get_total_timings(results, env, overhead_time):
642638
return env
643639

644640

645-
NVRTC_VALID_CC = np.array(["50", "52", "53", "60", "61", "62", "70", "72", "75", "80", "87", "89", "90", "90a"])
646-
647-
648-
def to_valid_nvrtc_gpu_arch_cc(compute_capability: str) -> str:
649-
"""Returns a valid Compute Capability for NVRTC `--gpu-architecture=`, as per https://docs.nvidia.com/cuda/nvrtc/index.html#group__options."""
650-
return max(NVRTC_VALID_CC[NVRTC_VALID_CC <= compute_capability], default="52")
651-
652-
653641
def print_config(config, tuning_options, runner):
654642
"""Print the configuration string with tunable parameters and benchmark results."""
655643
print_config_output(tuning_options.tune_params, config, runner.quiet, tuning_options.metrics, runner.units)
@@ -1315,19 +1303,3 @@ def dump_cache(obj: str, tuning_options):
13151303
if isinstance(tuning_options.cache, dict) and tuning_options.cachefile:
13161304
with open(tuning_options.cachefile, "a") as cachefile:
13171305
cachefile.write(obj)
1318-
1319-
1320-
def cuda_error_check(error):
1321-
"""Checking the status of CUDA calls using the NVIDIA cuda-python backend."""
1322-
if isinstance(error, cuda.CUresult):
1323-
if error != cuda.CUresult.CUDA_SUCCESS:
1324-
_, name = cuda.cuGetErrorName(error)
1325-
raise RuntimeError(f"CUDA error: {name.decode()}")
1326-
elif isinstance(error, cudart.cudaError_t):
1327-
if error != cudart.cudaError_t.cudaSuccess:
1328-
_, name = cudart.getErrorName(error)
1329-
raise RuntimeError(f"CUDART error: {name.decode()}")
1330-
elif isinstance(error, nvrtc.nvrtcResult):
1331-
if error != nvrtc.nvrtcResult.NVRTC_SUCCESS:
1332-
_, desc = nvrtc.nvrtcGetErrorString(error)
1333-
raise RuntimeError(f"NVRTC error: {desc.decode()}")

kernel_tuner/utils/nvcuda.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Module for kernel tuner cuda-python utility functions."""
2+
3+
import numpy as np
4+
5+
try:
6+
from cuda.bindings import driver, runtime, nvrtc
7+
except ImportError:
8+
cuda = None
9+
10+
NVRTC_VALID_CC = np.array(["50", "52", "53", "60", "61", "62", "70", "72", "75", "80", "87", "89", "90", "90a"])
11+
12+
13+
def cuda_error_check(error):
14+
"""Checking the status of CUDA calls using the NVIDIA cuda-python backend."""
15+
if isinstance(error, driver.CUresult):
16+
if error != driver.CUresult.CUDA_SUCCESS:
17+
_, name = driver.cuGetErrorName(error)
18+
raise RuntimeError(f"CUDA error: {name.decode()}")
19+
elif isinstance(error, runtime.cudaError_t):
20+
if error != runtime.cudaError_t.cudaSuccess:
21+
_, name = runtime.getErrorName(error)
22+
raise RuntimeError(f"CUDART error: {name.decode()}")
23+
elif isinstance(error, nvrtc.nvrtcResult):
24+
if error != nvrtc.nvrtcResult.NVRTC_SUCCESS:
25+
_, desc = nvrtc.nvrtcGetErrorString(error)
26+
raise RuntimeError(f"NVRTC error: {desc.decode()}")
27+
28+
29+
def to_valid_nvrtc_gpu_arch_cc(compute_capability: str) -> str:
30+
"""Returns a valid Compute Capability for NVRTC `--gpu-architecture=`, as per https://docs.nvidia.com/cuda/nvrtc/index.html#group__options."""
31+
return max(NVRTC_VALID_CC[NVRTC_VALID_CC <= compute_capability], default="52")

0 commit comments

Comments
 (0)