@@ -199,21 +199,30 @@ def compile(self, kernel_instance):
199199 self .compiler_options .append (f"--gpu-architecture=compute_{ self .cc } " )
200200
201201 err , program = nvrtc .nvrtcCreateProgram (str .encode (kernel_string ), b"CUDAProgram" , 0 , [], [])
202- error_check (err )
203- err = nvrtc .nvrtcCompileProgram (program , len (compiler_options ), compiler_options )
204- error_check (err )
205- err , size = nvrtc .nvrtcGetPTXSize (program )
206- error_check (err )
207- buff = b' ' * size
208- err = nvrtc .nvrtcGetPTX (program , buff )
209- error_check (err )
210- err , self .current_module = cuda .cuModuleLoadData (np .char .array (buff ))
211- if err == cuda .CUresult .CUDA_ERROR_INVALID_PTX :
212- raise SkippableFailure ("uses too much shared data" )
213- else :
202+ try :
214203 error_check (err )
215- err , self .func = cuda .cuModuleGetFunction (self .current_module , str .encode (kernel_name ))
216- error_check (err )
204+ err = nvrtc .nvrtcCompileProgram (program , len (compiler_options ), compiler_options )
205+ error_check (err )
206+ err , size = nvrtc .nvrtcGetPTXSize (program )
207+ error_check (err )
208+ buff = b' ' * size
209+ err = nvrtc .nvrtcGetPTX (program , buff )
210+ error_check (err )
211+ err , self .current_module = cuda .cuModuleLoadData (np .char .array (buff ))
212+ if err == cuda .CUresult .CUDA_ERROR_INVALID_PTX :
213+ raise SkippableFailure ("uses too much shared data" )
214+ else :
215+ error_check (err )
216+ err , self .func = cuda .cuModuleGetFunction (self .current_module , str .encode (kernel_name ))
217+ error_check (err )
218+
219+ except RuntimeError as re :
220+ _ , n = nvrtc .nvrtcGetProgramLogSize (program )
221+ log = b' ' * n
222+ nvrtc .nvrtcGetProgramLog (program , log )
223+ print (log .decode ('utf-8' ))
224+ raise re
225+
217226 return self .func
218227
219228 def start_event (self ):
0 commit comments