@@ -56,13 +56,9 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
5656 CudaFunctions .last_selected_device = device
5757
5858 # compute capabilities and device properties
59- err , major = cudart .cudaDeviceGetAttribute (
60- cudart .cudaDeviceAttr .cudaDevAttrComputeCapabilityMajor , device
61- )
59+ err , major = cudart .cudaDeviceGetAttribute (cudart .cudaDeviceAttr .cudaDevAttrComputeCapabilityMajor , device )
6260 cuda_error_check (err )
63- err , minor = cudart .cudaDeviceGetAttribute (
64- cudart .cudaDeviceAttr .cudaDevAttrComputeCapabilityMinor , device
65- )
61+ err , minor = cudart .cudaDeviceGetAttribute (cudart .cudaDeviceAttr .cudaDevAttrComputeCapabilityMinor , device )
6662 cuda_error_check (err )
6763 err , self .max_threads = cudart .cudaDeviceGetAttribute (
6864 cudart .cudaDeviceAttr .cudaDevAttrMaxThreadsPerBlock , device
@@ -164,20 +160,14 @@ def compile(self, kernel_instance):
164160 if not any (["--std=" in opt for opt in self .compiler_options ]):
165161 self .compiler_options .append ("--std=c++11" )
166162 if not any ([b"--gpu-architecture=" in opt or b"-arch" in opt for opt in compiler_options ]):
167- compiler_options .append (
168- f"--gpu-architecture=compute_{ to_valid_nvrtc_gpu_arch_cc (self .cc )} " .encode ("UTF-8" )
169- )
163+ compiler_options .append (f"--gpu-architecture=compute_{ to_valid_nvrtc_gpu_arch_cc (self .cc )} " .encode ("UTF-8" ))
170164 if not any (["--gpu-architecture=" in opt or "-arch" in opt for opt in self .compiler_options ]):
171165 self .compiler_options .append (f"--gpu-architecture=compute_{ to_valid_nvrtc_gpu_arch_cc (self .cc )} " )
172166
173- err , program = nvrtc .nvrtcCreateProgram (
174- str .encode (kernel_string ), b"CUDAProgram" , 0 , [], []
175- )
167+ err , program = nvrtc .nvrtcCreateProgram (str .encode (kernel_string ), b"CUDAProgram" , 0 , [], [])
176168 try :
177169 cuda_error_check (err )
178- err = nvrtc .nvrtcCompileProgram (
179- program , len (compiler_options ), compiler_options
180- )
170+ err = nvrtc .nvrtcCompileProgram (program , len (compiler_options ), compiler_options )
181171 cuda_error_check (err )
182172 err , size = nvrtc .nvrtcGetPTXSize (program )
183173 cuda_error_check (err )
@@ -189,9 +179,7 @@ def compile(self, kernel_instance):
189179 raise SkippableFailure ("uses too much shared data" )
190180 else :
191181 cuda_error_check (err )
192- err , self .func = cuda .cuModuleGetFunction (
193- self .current_module , str .encode (kernel_name )
194- )
182+ err , self .func = cuda .cuModuleGetFunction (self .current_module , str .encode (kernel_name ))
195183 cuda_error_check (err )
196184
197185 # get the number of registers per thread used in this kernel
0 commit comments