@@ -29,284 +29,92 @@ from collections.abc import Callable, Sequence
2929from typing import Optional , Self , SupportsFloat , SupportsInt , Union
3030
3131class CudaStream :
32- """Represents CUDA stream
33-
34- Note
35- ----
36- The class is not user-constructible.
37- """
38- def __cuda_stream__ (self ) -> tuple [int , int ]:
39- """
40- Special method implement CUDA stream protocol
41- from `cuda.core`. Returns a pair of integers:
42- (protocol_version, integral_value_of_cudaStream_t pointer)
43-
44- Example
45- -------
46- import cuda.core.experimental as core
47- import cuda.bench as bench
48-
49- def bench(state: bench.State):
50- dev = core.Device(state.get_device())
51- dev.set_current()
52- # converts CudaString to core.Stream
53- # using __cuda_stream__ protocol
54- dev.create_stream(state.get_stream())
55- """
56- ...
57-
58- def addressof (self ) -> int :
59- "Integral value of address of driver's CUDA stream struct"
60- ...
32+ def __cuda_stream__ (self ) -> tuple [int , int ]: ...
33+ def addressof (self ) -> int : ...
6134
6235class Benchmark :
63- """Represents NVBench benchmark.
64-
65- Note
66- ----
67- The class is not user-constructible.
68-
69- Use `~register` function to create Benchmark and register
70- it with NVBench.
71- """
72- def get_name (self ) -> str :
73- "Get benchmark name"
74- ...
75- def add_int64_axis (self , name : str , values : Sequence [SupportsInt ]) -> Self :
76- "Add integral type parameter axis with given name and values to sweep over"
77- ...
36+ def get_name (self ) -> str : ...
37+ def add_int64_axis (self , name : str , values : Sequence [SupportsInt ]) -> Self : ...
7838 def add_int64_power_of_two_axis (
7939 self , name : str , values : Sequence [SupportsInt ]
80- ) -> Self :
81- "Add integral type parameter axis with given name and values to sweep over"
82- ...
83- def add_float64_axis (self , name : str , values : Sequence [SupportsFloat ]) -> Self :
84- "Add floating-point type parameter axis with given name and values to sweep over"
85- ...
86- def add_string_axis (self , name : str , values : Sequence [str ]) -> Self :
87- "Add string type parameter axis with given name and values to sweep over"
88- ...
89- def set_name (self , name : str ) -> Self :
90- "Set benchmark name"
91- ...
92- def set_is_cpu_only (self , is_cpu_only : bool ) -> Self :
93- "Set whether this benchmark only executes on CPU"
94- ...
95- def set_run_once (self , v : bool ) -> Self :
96- "Set whether all benchmark configurations are executed only once"
97- ...
98- def set_skip_time (self , duration_seconds : SupportsFloat ) -> Self :
99- "Set run durations, in seconds, that should be skipped"
100- ...
101- def set_throttle_recovery_delay (self , delay_seconds : SupportsFloat ) -> Self :
102- "Set throttle recovery delay, in seconds"
103- ...
104- def set_throttle_threshold (self , threshold : SupportsFloat ) -> Self :
105- "Set throttle threshold, as a fraction of maximal GPU frequency"
106- ...
107- def set_timeout (self , duration_seconds : SupportsFloat ) -> Self :
108- "Set benchmark run duration timeout value, in seconds"
109- ...
110- def set_stopping_criterion (self , criterion : str ) -> Self :
111- "Set stopping criterion to be used"
112- ...
113- def set_criterion_param_float64 (self , name : str , value : SupportsFloat ) -> Self :
114- "Set stopping criterion floating point parameter value"
115- ...
116- def set_criterion_param_int64 (self , name : str , value : SupportsInt ) -> Self :
117- "Set stopping criterion integer parameter value"
118- ...
119- def set_criterion_param_string (self , name : str , value : str ) -> Self :
120- "Set stopping criterion string parameter value"
121- ...
122- def set_min_samples (self , count : SupportsInt ) -> Self :
123- "Set minimal samples count before stopping criterion applies"
124- ...
40+ ) -> Self : ...
41+ def add_float64_axis (self , name : str , values : Sequence [SupportsFloat ]) -> Self : ...
42+ def add_string_axis (self , name : str , values : Sequence [str ]) -> Self : ...
43+ def set_name (self , name : str ) -> Self : ...
44+ def set_run_once (self , v : bool ) -> Self : ...
45+ def set_skip_time (self , duration_seconds : SupportsFloat ) -> Self : ...
46+ def set_throttle_recovery_delay (self , delay_seconds : SupportsFloat ) -> Self : ...
47+ def set_throttle_threshold (self , threshold : SupportsFloat ) -> Self : ...
48+ def set_timeout (self , duration_seconds : SupportsFloat ) -> Self : ...
49+ def set_stopping_criterion (self , criterion : str ) -> Self : ...
50+ def set_criterion_param_float64 (self , name : str , value : SupportsFloat ) -> Self : ...
51+ def set_criterion_param_int64 (self , name : str , value : SupportsInt ) -> Self : ...
52+ def set_criterion_param_string (self , name : str , value : str ) -> Self : ...
53+ def set_min_samples (self , count : SupportsInt ) -> Self : ...
54+ def set_is_cpu_only (self , is_cpu_only : bool ) -> Self : ...
12555
12656class Launch :
127- """Configuration object for function launch.
128-
129- Note
130- ----
131- The class is not user-constructible.
132- """
133- def get_stream (self ) -> CudaStream :
134- "Get CUDA stream of this configuration"
135- ...
57+ def get_stream (self ) -> CudaStream : ...
13658
13759class State :
138- """Represent benchmark configuration state.
139-
140- Note
141- ----
142- The class is not user-constructible.
143- """
144- def has_device (self ) -> bool :
145- "True if configuration has a device"
146- ...
147- def has_printers (self ) -> bool :
148- "True if configuration has a printer"
149- ...
150- def get_device (self ) -> Union [int , None ]:
151- "Get device_id of the device from this configuration"
152- ...
153- def get_stream (self ) -> CudaStream :
154- "CudaStream object from this configuration"
155- ...
156- def get_int64 (self , name : str ) -> int :
157- "Get value for given Int64 axis from this configuration"
158- ...
159- def get_int64_or_default (self , name : str , default_value : SupportsInt ) -> int :
160- "Get value for given Int64 axis from this configuration"
161- ...
162- def get_float64 (self , name : str ) -> float :
163- "Get value for given Float64 axis from this configuration"
164- ...
165- def get_float64_or_default (self , name : str , default_value : SupportsFloat ) -> float :
166- "Get value for given Float64 axis from this configuration"
167- ...
168- def get_string (self , name : str ) -> str :
169- "Get value for given String axis from this configuration"
170- ...
171- def get_string_or_default (self , name : str , default_value : str ) -> str :
172- "Get value for given String axis from this configuration"
173- ...
60+ def has_device (self ) -> bool : ...
61+ def has_printers (self ) -> bool : ...
62+ def get_device (self ) -> Union [int , None ]: ...
63+ def get_stream (self ) -> CudaStream : ...
64+ def get_int64 (self , name : str ) -> int : ...
65+ def get_int64_or_default (self , name : str , default_value : SupportsInt ) -> int : ...
66+ def get_float64 (self , name : str ) -> float : ...
67+ def get_float64_or_default (
68+ self , name : str , default_value : SupportsFloat
69+ ) -> float : ...
70+ def get_string (self , name : str ) -> str : ...
71+ def get_string_or_default (self , name : str , default_value : str ) -> str : ...
17472 def add_element_count (
17573 self , count : SupportsInt , column_name : Optional [str ] = None
176- ) -> None :
177- "Add element count"
178- ...
179- def set_element_count (self , count : SupportsInt ) -> None :
180- "Set element count"
181- ...
182- def get_element_count (self ) -> int :
183- "Get element count"
184- ...
185- def skip (self , reason : str ) -> None :
186- "Skip this configuration"
187- ...
188- def is_skipped (self ) -> bool :
189- "Has this configuration been skipped"
190- ...
191- def get_skip_reason (self ) -> str :
192- "Get reason provided for skipping this configuration"
193- ...
74+ ) -> None : ...
75+ def set_element_count (self , count : SupportsInt ) -> None : ...
76+ def get_element_count (self ) -> int : ...
77+ def skip (self , reason : str ) -> None : ...
78+ def is_skipped (self ) -> bool : ...
79+ def get_skip_reason (self ) -> str : ...
19480 def add_global_memory_reads (
19581 self , nbytes : SupportsInt , / , column_name : str = ""
196- ) -> None :
197- "Inform NVBench that given amount of bytes is being read by the benchmark from global memory"
198- ...
82+ ) -> None : ...
19983 def add_global_memory_writes (
20084 self , nbytes : SupportsInt , / , column_name : str = ""
201- ) -> None :
202- "Inform NVBench that given amount of bytes is being written by the benchmark into global memory"
203- ...
204- def get_benchmark (self ) -> Benchmark :
205- "Get Benchmark this configuration is a part of"
206- ...
207- def get_throttle_threshold (self ) -> float :
208- "Get throttle threshold value, as fraction of maximal frequency"
209- ...
210- def set_throttle_threshold (self , threshold_fraction : SupportsFloat ) -> None :
211- "Set throttle threshold fraction to specified value, expected to be between 0 and 1"
212- ...
213- def get_min_samples (self ) -> int :
214- "Get the number of benchmark timings NVBench performs before stopping criterion begins being used"
215- ...
216- def set_min_samples (self , min_samples_count : SupportsInt ) -> None :
217- "Set the number of benchmark timings for NVBench to perform before stopping criterion begins being used"
218- ...
219- def get_disable_blocking_kernel (self ) -> bool :
220- "True if use of blocking kernel by NVBench is disabled, False otherwise"
221- ...
222- def set_disable_blocking_kernel (self , flag : bool ) -> None :
223- "Use flag = True to disable use of blocking kernel by NVBench"
224- ...
225- def get_run_once (self ) -> bool :
226- "Boolean flag whether configuration should only run once"
227- ...
228- def set_run_once (self , run_once_flag : bool ) -> None :
229- "Set run-once flag for this configuration"
230- ...
231- def get_timeout (self ) -> float :
232- "Get time-out value for benchmark execution of this configuration, in seconds"
233- ...
234- def set_timeout (self , duration : SupportsFloat ) -> None :
235- "Set time-out value for benchmark execution of this configuration, in seconds"
236- ...
237- def get_blocking_kernel_timeout (self ) -> float :
238- "Get time-out value for execution of blocking kernel, in seconds"
239- ...
240- def set_blocking_kernel_timeout (self , duration : SupportsFloat ) -> None :
241- "Set time-out value for execution of blocking kernel, in seconds"
242- ...
243- def collect_cupti_metrics (self ) -> None :
244- "Request NVBench to record CUPTI metrics while running benchmark for this configuration"
245- ...
246- def is_cupti_required (self ) -> bool :
247- "True if (some) CUPTI metrics are being collected"
248- ...
85+ ) -> None : ...
86+ def get_benchmark (self ) -> Benchmark : ...
87+ def get_throttle_threshold (self ) -> float : ...
88+ def set_throttle_threshold (self , threshold_fraction : SupportsFloat ) -> None : ...
89+ def get_min_samples (self ) -> int : ...
90+ def set_min_samples (self , min_samples_count : SupportsInt ) -> None : ...
91+ def get_disable_blocking_kernel (self ) -> bool : ...
92+ def set_disable_blocking_kernel (self , flag : bool ) -> None : ...
93+ def get_run_once (self ) -> bool : ...
94+ def set_run_once (self , run_once_flag : bool ) -> None : ...
95+ def get_timeout (self ) -> float : ...
96+ def set_timeout (self , duration : SupportsFloat ) -> None : ...
97+ def get_blocking_kernel_timeout (self ) -> float : ...
98+ def set_blocking_kernel_timeout (self , duration : SupportsFloat ) -> None : ...
99+ def collect_cupti_metrics (self ) -> None : ...
100+ def is_cupti_required (self ) -> bool : ...
249101 def exec (
250102 self ,
251103 fn : Callable [[Launch ], None ],
252104 / ,
253105 * ,
254106 batched : Optional [bool ] = True ,
255107 sync : Optional [bool ] = False ,
256- ):
257- """Execute callable running the benchmark.
258-
259- The callable may be executed multiple times.
260-
261- Parameters
262- ----------
263- fn: Callable
264- Python callable with signature fn(Launch) -> None that executes the benchmark.
265- batched: bool, optional
266- If `True`, no cache flushing is performed between callable invocations.
267- Default: `True`.
268- sync: bool, optional
269- True value indicates that callable performs device synchronization.
270- NVBench disables use of blocking kernel in this case.
271- Default: `False`.
272- """
273- ...
274- def get_short_description (self ) -> str :
275- "Get short description for this configuration"
276- ...
108+ ): ...
109+ def get_short_description (self ) -> str : ...
277110 def add_summary (
278111 self , column_name : str , value : Union [SupportsInt , SupportsFloat , str ]
279- ) -> None :
280- "Add summary column with a value"
281- ...
282- def get_axis_values (self ) -> dict [str , int | float | str ]:
283- "Get dictionary with axis values for this configuration"
284- ...
285- def get_axis_values_as_string (self ) -> str :
286- "Get string of space-separated name=value pairs for this configuration"
287- ...
288- def get_stopping_criterion (self ) -> str :
289- "Get string name of stopping criterion used"
290- ...
291-
292- def register (fn : Callable [[State ], None ]) -> Benchmark :
293- """
294- Register given benchmarking function with NVBench.
295- """
296- ...
297-
298- def run_all_benchmarks (argv : Sequence [str ]) -> None :
299- """
300- Run all benchmarks registered with NVBench.
301-
302- Parameters
303- ----------
304- argv: List[str]
305- Sequence of CLI arguments controlling NVBench. Usually, it is `sys.argv`.
306- """
307- ...
112+ ) -> None : ...
113+ def get_axis_values (self ) -> dict [str , int | float | str ]: ...
114+ def get_axis_values_as_string (self ) -> str : ...
115+ def get_stopping_criterion (self ) -> str : ...
308116
309- class NVBenchRuntimeError ( RuntimeError ):
310- """An exception raised if running benchmarks encounters an error"""
117+ def register ( fn : Callable [[ State ], None ]) -> Benchmark : ...
118+ def run_all_benchmarks ( argv : Sequence [ str ]) -> None : ...
311119
312- ...
120+ class NVBenchRuntimeError ( RuntimeError ): ...
0 commit comments