@@ -99,30 +99,31 @@ struct GPUSYCLQUEUE {
9999 sycl::context syclContext_;
100100 sycl::queue syclQueue_;
101101
102- GPUSYCLQUEUE () {
102+ GPUSYCLQUEUE (sycl::property_list propList ) {
103103
104104 syclDevice_ = getDefaultDevice ();
105105 syclContext_ = sycl::context (syclDevice_);
106- syclQueue_ = sycl::queue (syclContext_, syclDevice_);
106+ syclQueue_ = sycl::queue (syclContext_, syclDevice_, propList );
107107 }
108108
109- GPUSYCLQUEUE (sycl::device *device, sycl::context *context) {
109+ GPUSYCLQUEUE (sycl::device *device, sycl::context *context,
110+ sycl::property_list propList) {
110111 syclDevice_ = *device;
111112 syclContext_ = *context;
112- syclQueue_ = sycl::queue (syclContext_, syclDevice_);
113+ syclQueue_ = sycl::queue (syclContext_, syclDevice_, propList );
113114 }
114- GPUSYCLQUEUE (sycl::device *device) {
115+ GPUSYCLQUEUE (sycl::device *device, sycl::property_list propList ) {
115116
116117 syclDevice_ = *device;
117118 syclContext_ = sycl::context (syclDevice_);
118- syclQueue_ = sycl::queue (syclContext_, syclDevice_);
119+ syclQueue_ = sycl::queue (syclContext_, syclDevice_, propList );
119120 }
120121
121- GPUSYCLQUEUE (sycl::context *context) {
122+ GPUSYCLQUEUE (sycl::context *context, sycl::property_list propList ) {
122123
123124 syclDevice_ = getDefaultDevice ();
124125 syclContext_ = *context;
125- syclQueue_ = sycl::queue (syclContext_, syclDevice_);
126+ syclQueue_ = sycl::queue (syclContext_, syclDevice_, propList );
126127 }
127128
128129}; // end of GPUSYCLQUEUE
@@ -197,32 +198,49 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
197198
198199 auto paramsCount = countUntil (params, ParamDesc{nullptr , 0 });
199200
200- syclQueue.submit ([&](sycl::handler &cgh) {
201+ sycl::event event = syclQueue.submit ([&](sycl::handler &cgh) {
201202 for (size_t i = 0 ; i < paramsCount; i++) {
202203 auto param = params[i];
203204 cgh.set_arg (static_cast <uint32_t >(i),
204205 *(static_cast <void **>(param.data )));
205206 }
206207 cgh.parallel_for (syclNdRange, *kernel);
207208 });
209+ if (getenv (" IMEX_ENABLE_PROFILING" )) {
210+ // auto submitTime = event.get_profiling_info<
211+ // cl::sycl::info::event_profiling::command_submit>();
212+ auto startTime = event.get_profiling_info <
213+ cl::sycl::info::event_profiling::command_start>();
214+ auto endTime =
215+ event
216+ .get_profiling_info <cl::sycl::info::event_profiling::command_end>();
217+ // auto submissionTime = float(startTime - submitTime) / 1000000.0f;
218+ // fprintf(stdout, "the kernel submission time is %f ms\n", submissionTime);
219+ auto executionTime = float (endTime - startTime) / 1000000 .0f ;
220+ fprintf (stdout, " the kernel execution time is %f ms\n " , executionTime);
221+ }
208222}
209223
210224// Wrappers
211225
212226extern " C" SYCL_RUNTIME_EXPORT GPUSYCLQUEUE *gpuCreateStream (void *device,
213227 void *context) {
228+ auto propList = sycl::property_list{};
229+ if (getenv (" IMEX_ENABLE_PROFILING" )) {
230+ propList = sycl::property_list{sycl::property::queue::enable_profiling ()};
231+ }
214232 return catchAll ([&]() {
215233 if (!device && !context) {
216- return new GPUSYCLQUEUE ();
234+ return new GPUSYCLQUEUE (propList );
217235 } else if (device && context) {
218236 // TODO: Check if the pointers/address is valid and holds the correct
219237 // device and context
220238 return new GPUSYCLQUEUE (static_cast <sycl::device *>(device),
221- static_cast <sycl::context *>(context));
239+ static_cast <sycl::context *>(context), propList );
222240 } else if (device && !context) {
223- return new GPUSYCLQUEUE (static_cast <sycl::device *>(device));
241+ return new GPUSYCLQUEUE (static_cast <sycl::device *>(device), propList );
224242 } else {
225- return new GPUSYCLQUEUE (static_cast <sycl::context *>(context));
243+ return new GPUSYCLQUEUE (static_cast <sycl::context *>(context), propList );
226244 }
227245 });
228246}
0 commit comments