@@ -190,10 +190,12 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
190
190
size_t blockX, size_t blockY, size_t blockZ,
191
191
size_t sharedMemBytes, ParamDesc *params) {
192
192
auto syclQueue = queue->syclQueue_ ;
193
- auto syclGlobalRange = ::sycl::range<3 >(blockZ * gridZ, blockY * gridY, blockX * gridX);
193
+ auto syclGlobalRange =
194
+ ::sycl::range<3 >(blockZ * gridZ, blockY * gridY, blockX * gridX);
194
195
auto syclLocalRange = ::sycl::range<3 >(blockZ, blockY, blockX);
195
- sycl::nd_range<3 > syclNdRange (sycl::nd_range<3 >(syclGlobalRange, syclLocalRange));
196
-
196
+ sycl::nd_range<3 > syclNdRange (
197
+ sycl::nd_range<3 >(syclGlobalRange, syclLocalRange));
198
+
197
199
auto paramsCount = countUntil (params, ParamDesc{nullptr , 0 });
198
200
199
201
if (getenv (" IMEX_ENABLE_PROFILING" )) {
@@ -204,7 +206,8 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
204
206
205
207
if (getenv (" IMEX_PROFILING_RUNS" )) {
206
208
auto runs = strtol (getenv (" IMEX_PROFILING_RUNS" ), NULL , 10L );
207
- if (runs) rounds = runs;
209
+ if (runs)
210
+ rounds = runs;
208
211
}
209
212
210
213
for (int r = 0 ; r < rounds; r++) {
@@ -217,21 +220,27 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
217
220
cgh.parallel_for (syclNdRange, *kernel);
218
221
});
219
222
220
- auto startTime = event.get_profiling_info <cl::sycl::info::event_profiling::command_start>();
221
- auto endTime = event.get_profiling_info <cl::sycl::info::event_profiling::command_end>();
223
+ auto startTime = event.get_profiling_info <
224
+ cl::sycl::info::event_profiling::command_start>();
225
+ auto endTime = event.get_profiling_info <
226
+ cl::sycl::info::event_profiling::command_end>();
222
227
auto gap = float (endTime - startTime) / 1000000 .0f ;
223
228
executionTime += gap;
224
- if (gap > maxTime) maxTime = gap;
225
- if (gap < minTime) minTime = gap;
229
+ if (gap > maxTime)
230
+ maxTime = gap;
231
+ if (gap < minTime)
232
+ minTime = gap;
226
233
}
227
- fprintf (stdout, " the kernel execution time is (ms): avg: %.4f, min: %.4f, max: %.4f (over %ld runs)\n " , \
228
- executionTime/rounds, minTime, maxTime, rounds);
234
+ fprintf (stdout,
235
+ " the kernel execution time is (ms):"
236
+ " avg: %.4f, min: %.4f, max: %.4f (over %ld runs)\n " ,
237
+ executionTime / rounds, minTime, maxTime, rounds);
229
238
} else {
230
239
syclQueue.submit ([&](sycl::handler &cgh) {
231
240
for (size_t i = 0 ; i < paramsCount; i++) {
232
241
auto param = params[i];
233
242
cgh.set_arg (static_cast <uint32_t >(i),
234
- *(static_cast <void **>(param.data )));
243
+ *(static_cast <void **>(param.data )));
235
244
}
236
245
cgh.parallel_for (syclNdRange, *kernel);
237
246
});
0 commit comments