Skip to content

Commit d75fc74

Browse files
committed
Merge branch 'main' into remove-cupti-python
2 parents 4fa4296 + 867d5d4 commit d75fc74

File tree

11 files changed

+902
-589
lines changed

11 files changed

+902
-589
lines changed

docs/cli_help.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@
115115
* Applies to the most recent `--benchmark`, or all benchmarks if specified
116116
before any `--benchmark` arguments.
117117

118+
* `--no-batch`
119+
* Do not run batched measurements even if enabled.
120+
* Intended to shorten run-time when batched measurements are not of interest.
121+
* Applied to the most recent `--benchmark`, or all benchmarks if specified
122+
before any `--benchmark` arguments.
123+
118124
## Stopping Criteria
119125

120126
* `--timeout <seconds>`

nvbench/benchmark_base.cuh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,18 @@ struct benchmark_base
183183
}
184184
/// @}
185185

186+
/// If true, the batched measurements for benchmark are not run. This is intended for use to
187+
/// save resources when only non-batched measurements are of interest, although batched
188+
/// measurements are meaningful and code to exercise them is compiled. This option has no
189+
/// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
190+
[[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
191+
benchmark_base &set_skip_batched(bool v)
192+
{
193+
m_skip_batched = v;
194+
return *this;
195+
}
196+
/// @}
197+
186198
/// If true, the benchmark does not use the blocking_kernel. This is intended
187199
/// for use with external profiling tools. @{
188200
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -304,6 +316,7 @@ protected:
304316
bool m_is_cpu_only{false};
305317
bool m_run_once{false};
306318
bool m_disable_blocking_kernel{false};
319+
bool m_skip_batched{false};
307320

308321
nvbench::int64_t m_min_samples{10};
309322

nvbench/detail/state_exec.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
185185
static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag.");
186186
static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag.");
187187

188-
if (!this->get_run_once())
188+
if (!this->skip_hot_measurement())
189189
{
190190
using measure_t = nvbench::detail::measure_hot<KL>;
191191
measure_t measure{*this, kernel_launcher};

nvbench/option_parser.cu

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,11 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
467467
this->enable_profile();
468468
first += 1;
469469
}
470+
else if (arg == "--no-batch")
471+
{
472+
this->disable_batched();
473+
first += 1;
474+
}
470475
else if (arg == "--quiet" || arg == "-q")
471476
{
472477
// Setting this flag prevents the default stdout printer from being
@@ -762,6 +767,18 @@ void option_parser::enable_profile()
762767
bench.set_run_once(true);
763768
}
764769

770+
void option_parser::disable_batched()
771+
{
772+
// If no active benchmark, save args as global
773+
if (m_benchmarks.empty())
774+
{
775+
m_global_benchmark_args.push_back("--no-batch");
776+
return;
777+
}
778+
benchmark_base &bench = *m_benchmarks.back();
779+
bench.set_skip_batched(true);
780+
}
781+
765782
void option_parser::add_benchmark(const std::string &name)
766783
try
767784
{

nvbench/option_parser.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private:
9090
void set_stopping_criterion(const std::string &criterion);
9191

9292
void enable_profile();
93+
void disable_batched();
9394

9495
void add_benchmark(const std::string &name);
9596
void replay_global_args();

nvbench/state.cuh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,14 @@ struct state
161161
void set_run_once(bool v) { m_run_once = v; }
162162
/// @}
163163

164+
/// If true, the batched measurements of benchmark are not run. This is intended for use to
165+
/// save resources when only non-batched measurements are of interest, although batched
166+
/// measurements are meaningful and code to exercise them is compiled. This option has no
167+
/// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
168+
[[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
169+
void set_skip_batched(bool v) { m_skip_batched = v; }
170+
/// @}
171+
164172
/// If true, the benchmark does not use the blocking_kernel. This is intended
165173
/// for use with external profiling tools. @{
166174
[[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -298,6 +306,8 @@ private:
298306
std::optional<nvbench::device_info> device,
299307
std::size_t type_config_index);
300308

309+
[[nodiscard]] bool skip_hot_measurement() const { return get_run_once() || get_skip_batched(); }
310+
301311
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
302312
nvbench::named_values m_axis_values;
303313
std::optional<nvbench::device_info> m_device;
@@ -306,6 +316,7 @@ private:
306316
bool m_is_cpu_only{false};
307317
bool m_run_once{false};
308318
bool m_disable_blocking_kernel{false};
319+
bool m_skip_batched{false};
309320

310321
nvbench::criterion_params m_criterion_params;
311322
std::string m_stopping_criterion;

nvbench/state.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ state::state(const benchmark_base &bench,
5757
, m_is_cpu_only(bench.get_is_cpu_only())
5858
, m_run_once{bench.get_run_once()}
5959
, m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
60+
, m_skip_batched{bench.get_skip_batched()}
6061
, m_criterion_params{bench.get_criterion_params()}
6162
, m_stopping_criterion(bench.get_stopping_criterion())
6263
, m_min_samples{bench.get_min_samples()}

python/cuda/bench/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,7 @@ def _get_cuda_major_version():
8383
_module_fullname,
8484
_get_cuda_major_version,
8585
)
86+
87+
__doc__ = """
88+
CUDA Kernel Benchmarking Library Python API
89+
"""

0 commit comments

Comments
 (0)