@@ -190,10 +190,12 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
190190 size_t blockX, size_t blockY, size_t blockZ,
191191 size_t sharedMemBytes, ParamDesc *params) {
192192 auto syclQueue = queue->syclQueue_ ;
193- auto syclGlobalRange = ::sycl::range<3 >(blockZ * gridZ, blockY * gridY, blockX * gridX);
193+ auto syclGlobalRange =
194+ ::sycl::range<3 >(blockZ * gridZ, blockY * gridY, blockX * gridX);
194195 auto syclLocalRange = ::sycl::range<3 >(blockZ, blockY, blockX);
195- sycl::nd_range<3 > syclNdRange (sycl::nd_range<3 >(syclGlobalRange, syclLocalRange));
196-
196+ sycl::nd_range<3 > syclNdRange (
197+ sycl::nd_range<3 >(syclGlobalRange, syclLocalRange));
198+
197199 auto paramsCount = countUntil (params, ParamDesc{nullptr , 0 });
198200
199201 if (getenv (" IMEX_ENABLE_PROFILING" )) {
@@ -204,7 +206,8 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
204206
205207 if (getenv (" IMEX_PROFILING_RUNS" )) {
206208 auto runs = strtol (getenv (" IMEX_PROFILING_RUNS" ), NULL , 10L );
207- if (runs) rounds = runs;
209+ if (runs)
210+ rounds = runs;
208211 }
209212
210213 for (int r = 0 ; r < rounds; r++) {
@@ -217,21 +220,27 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
217220 cgh.parallel_for (syclNdRange, *kernel);
218221 });
219222
220- auto startTime = event.get_profiling_info <cl::sycl::info::event_profiling::command_start>();
221- auto endTime = event.get_profiling_info <cl::sycl::info::event_profiling::command_end>();
223+ auto startTime = event.get_profiling_info <
224+ cl::sycl::info::event_profiling::command_start>();
225+ auto endTime = event.get_profiling_info <
226+ cl::sycl::info::event_profiling::command_end>();
222227 auto gap = float (endTime - startTime) / 1000000 .0f ;
223228 executionTime += gap;
224- if (gap > maxTime) maxTime = gap;
225- if (gap < minTime) minTime = gap;
229+ if (gap > maxTime)
230+ maxTime = gap;
231+ if (gap < minTime)
232+ minTime = gap;
226233 }
227- fprintf (stdout, " the kernel execution time is (ms): avg: %.4f, min: %.4f, max: %.4f (over %ld runs)\n " , \
228- executionTime/rounds, minTime, maxTime, rounds);
234+ fprintf (stdout,
235+ " the kernel execution time is (ms):"
236+ " avg: %.4f, min: %.4f, max: %.4f (over %ld runs)\n " ,
237+ executionTime / rounds, minTime, maxTime, rounds);
229238 } else {
230239 syclQueue.submit ([&](sycl::handler &cgh) {
231240 for (size_t i = 0 ; i < paramsCount; i++) {
232241 auto param = params[i];
233242 cgh.set_arg (static_cast <uint32_t >(i),
234- *(static_cast <void **>(param.data )));
243+ *(static_cast <void **>(param.data )));
235244 }
236245 cgh.parallel_for (syclNdRange, *kernel);
237246 });
0 commit comments