55#include < memory>
66#include < oneapi/mkl.hpp>
77
8- // This is a workaround to flush MKL submissions into Level-zero queue, using
9- // unspecified but guaranteed behavior of intel-sycl runtime. Once SYCL standard
10- // committee approves sycl::queue::flush() we will change the macro to use that
11- #define __FORCE_MKL_FLUSH__ (cmd ) \
12- sycl::get_native<sycl::backend::ext_oneapi_level_zero>(cmd)
13-
148oneapi::mkl::transpose convert (onemklTranspose val) {
159 switch (val) {
1610 case ONEMKL_TRANSPOSE_NONTRANS:
@@ -392,7 +386,6 @@ extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose trans
392386 reinterpret_cast <const sycl::half **>(&b[0 ]), ldb,
393387 reinterpret_cast <sycl::half *>(beta), reinterpret_cast <sycl::half **>(&c[0 ]),
394388 ldc, group_count, group_size, {});
395- __FORCE_MKL_FLUSH__ (status);
396389 return 0 ;
397390}
398391
@@ -410,7 +403,6 @@ extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose trans
410403 (const float **)&b[0 ], ldb,
411404 beta, &c[0 ], ldc,
412405 group_count, group_size, {});
413- __FORCE_MKL_FLUSH__ (status);
414406 return 0 ;
415407}
416408
@@ -428,7 +420,6 @@ extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose trans
428420 (const double **)&b[0 ], ldb,
429421 beta, &c[0 ], ldc,
430422 group_count, group_size, {});
431- __FORCE_MKL_FLUSH__ (status);
432423 return 0 ;
433424}
434425
@@ -450,7 +441,6 @@ extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose trans
450441 reinterpret_cast <std::complex <float > *>(beta),
451442 reinterpret_cast <std::complex <float > **>(&c[0 ]), ldc,
452443 group_count, group_size, {});
453- __FORCE_MKL_FLUSH__ (status);
454444 return 0 ;
455445}
456446
@@ -473,7 +463,6 @@ extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose trans
473463 reinterpret_cast <std::complex <double > *>(beta),
474464 reinterpret_cast <std::complex <double > **>(&c[0 ]), ldc,
475465 group_count, group_size, {});
476- __FORCE_MKL_FLUSH__ (status);
477466 return 0 ;
478467}
479468
@@ -490,7 +479,6 @@ extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right
490479 &trsmInfo.m_transa [0 ], &trsmInfo.m_unitdiag [0 ],
491480 m, n, alpha, (const float **)&a[0 ], lda,
492481 &b[0 ], ldb, group_count, group_size, {});
493- __FORCE_MKL_FLUSH__ (status);
494482 return 0 ;
495483}
496484
@@ -508,7 +496,6 @@ extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right
508496 &trsmInfo.m_transa [0 ], &trsmInfo.m_unitdiag [0 ],
509497 m, n, alpha, (const double **)&a[0 ], lda, &b[0 ],
510498 ldb, group_count, group_size, {});
511- __FORCE_MKL_FLUSH__ (status);
512499 return 0 ;
513500}
514501
@@ -528,7 +515,6 @@ extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right
528515 reinterpret_cast <const std::complex <float > **>(&a[0 ]),
529516 lda, reinterpret_cast <std::complex <float > **>(&b[0 ]),
530517 ldb, group_count, group_size, {});
531- __FORCE_MKL_FLUSH__ (status);
532518 return 0 ;
533519}
534520
@@ -548,6 +534,5 @@ extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right
548534 reinterpret_cast <const std::complex <double > **>(&a[0 ]),
549535 lda, reinterpret_cast <std::complex <double > **>(&b[0 ]),
550536 ldb, group_count, group_size, {});
551- __FORCE_MKL_FLUSH__ (status);
552537 return 0 ;
553538}
0 commit comments