Skip to content

Commit 4c04563

Browse files
authored
Merge pull request #596 from chencha3/main
Profiling with multiple runs
2 parents 3b9fb47 + 6e88d14 commit 4c04563

File tree

1 file changed

+45
-19
lines changed

1 file changed

+45
-19
lines changed

lib/ExecutionEngine/SYCLRUNTIME/SyclRuntimeWrappers.cpp

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -198,26 +198,52 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
198198

199199
auto paramsCount = countUntil(params, ParamDesc{nullptr, 0});
200200

201-
sycl::event event = syclQueue.submit([&](sycl::handler &cgh) {
202-
for (size_t i = 0; i < paramsCount; i++) {
203-
auto param = params[i];
204-
cgh.set_arg(static_cast<uint32_t>(i),
205-
*(static_cast<void **>(param.data)));
206-
}
207-
cgh.parallel_for(syclNdRange, *kernel);
208-
});
209201
if (getenv("IMEX_ENABLE_PROFILING")) {
210-
// auto submitTime = event.get_profiling_info<
211-
// cl::sycl::info::event_profiling::command_submit>();
212-
auto startTime = event.get_profiling_info<
213-
cl::sycl::info::event_profiling::command_start>();
214-
auto endTime =
215-
event
216-
.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
217-
// auto submissionTime = float(startTime - submitTime) / 1000000.0f;
218-
// fprintf(stdout, "the kernel submission time is %f ms\n", submissionTime);
219-
auto executionTime = float(endTime - startTime) / 1000000.0f;
220-
fprintf(stdout, "the kernel execution time is %f ms\n", executionTime);
202+
auto executionTime = 0.0f;
203+
auto maxTime = 0.0f;
204+
auto minTime = 10000.0f;
205+
auto rounds = 1000;
206+
207+
if (getenv("IMEX_PROFILING_RUNS")) {
208+
auto runs = strtol(getenv("IMEX_PROFILING_RUNS"), NULL, 10L);
209+
if (runs)
210+
rounds = runs;
211+
}
212+
213+
for (int r = 0; r < rounds; r++) {
214+
sycl::event event = syclQueue.submit([&](sycl::handler &cgh) {
215+
for (size_t i = 0; i < paramsCount; i++) {
216+
auto param = params[i];
217+
cgh.set_arg(static_cast<uint32_t>(i),
218+
*(static_cast<void **>(param.data)));
219+
}
220+
cgh.parallel_for(syclNdRange, *kernel);
221+
});
222+
223+
auto startTime = event.get_profiling_info<
224+
cl::sycl::info::event_profiling::command_start>();
225+
auto endTime = event.get_profiling_info<
226+
cl::sycl::info::event_profiling::command_end>();
227+
auto gap = float(endTime - startTime) / 1000000.0f;
228+
executionTime += gap;
229+
if (gap > maxTime)
230+
maxTime = gap;
231+
if (gap < minTime)
232+
minTime = gap;
233+
}
234+
fprintf(stdout,
235+
"the kernel execution time is (ms):"
236+
"avg: %.4f, min: %.4f, max: %.4f (over %ld runs)\n",
237+
executionTime / rounds, minTime, maxTime, rounds);
238+
} else {
239+
syclQueue.submit([&](sycl::handler &cgh) {
240+
for (size_t i = 0; i < paramsCount; i++) {
241+
auto param = params[i];
242+
cgh.set_arg(static_cast<uint32_t>(i),
243+
*(static_cast<void **>(param.data)));
244+
}
245+
cgh.parallel_for(syclNdRange, *kernel);
246+
});
221247
}
222248
}
223249

0 commit comments

Comments
 (0)