File tree Expand file tree Collapse file tree 4 files changed +19
-0
lines changed Expand file tree Collapse file tree 4 files changed +19
-0
lines changed Original file line number Diff line number Diff line change @@ -132,6 +132,7 @@ def compile(self, kernel_instance):
132
132
)
133
133
134
134
self .func = self .current_module .get_function (kernel_name )
135
+ self .num_regs = self .func .num_regs
135
136
return self .func
136
137
137
138
def start_event (self ):
Original file line number Diff line number Diff line change @@ -192,6 +192,11 @@ def compile(self, kernel_instance):
192
192
)
193
193
cuda_error_check (err )
194
194
195
+ # get the number of registers per thread used in this kernel
196
+ num_regs = cuda .cuFuncGetAttribute (cuda .CUfunction_attribute .CU_FUNC_ATTRIBUTE_NUM_REGS , self .func )
197
+ assert num_regs [0 ] == 0 , f"Retrieving number of registers per thread unsuccesful: code { num_regs [0 ]} "
198
+ self .num_regs = num_regs [1 ]
199
+
195
200
except RuntimeError as re :
196
201
_ , n = nvrtc .nvrtcGetProgramLogSize (program )
197
202
log = b" " * n
Original file line number Diff line number Diff line change @@ -218,6 +218,7 @@ def compile(self, kernel_instance):
218
218
)
219
219
220
220
self .func = self .current_module .get_function (kernel_name )
221
+ self .num_regs = self .func .num_regs
221
222
return self .func
222
223
except drv .CompileError as e :
223
224
if "uses too much shared data" in e .stderr :
Original file line number Diff line number Diff line change
1
+ from kernel_tuner .observers .observer import BenchmarkObserver
2
+
3
+ class RegisterObserver (BenchmarkObserver ):
4
+ """Observer for counting the number of registers."""
5
+
6
+ def __init__ (self ) -> None :
7
+ super ().__init__ ()
8
+
9
+ def get_results (self ):
10
+ return {
11
+ "num_regs" : self .dev .num_regs
12
+ }
You can’t perform that action at this time.
0 commit comments