-
Notifications
You must be signed in to change notification settings - Fork 40
Expand file tree
/
Copy pathbench_demo.cu
More file actions
25 lines (21 loc) · 888 Bytes
/
bench_demo.cu
File metadata and controls
25 lines (21 loc) · 888 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <cuda_runtime.h>
#include <cuda/std/chrono>
#include <nvbench/nvbench.cuh>
__global__ void sleep_kernel(nvbench::int64_t microseconds) {
const auto start = cuda::std::chrono::high_resolution_clock::now();
const auto target_duration = cuda::std::chrono::microseconds(microseconds);
const auto finish = start + target_duration;
auto now = cuda::std::chrono::high_resolution_clock::now();
while (now < finish) {
now = cuda::std::chrono::high_resolution_clock::now();
}
}
void sleep_benchmark(nvbench::state& state) {
const auto duration_us = state.get_int64("Duration (us)");
state.exec([&duration_us](nvbench::launch& launch) {
sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration_us);
});
}
NVBENCH_BENCH(sleep_benchmark)
.add_int64_axis("Duration (us)", nvbench::range(0, 100, 5))
.set_timeout(1); // Limit to one second per measurement.