@@ -108,9 +108,7 @@ SoftmaxLayer::SoftmaxLayer(LangHandle *langHandle, Timer* timer,
108108void SoftmaxLayer::doFw () {
109109#if defined(USE_CUBLAS)
110110
111- langHandle_->getSyclQueue ()->submit ([&](sycl::handler &cgh) {
112- // auto d_A = b_A.get_access<sycl::access::mode::read_write>(cgh);
113- cgh.host_task ([=](sycl::interop_handle ih) {
111+ SYCL::ExecNativeCommand (*langHandle_->getSyclQueue (), [=](sycl::interop_handle ih) {
114112 cuCtxSetCurrent (ih.get_native_context <sycl::backend::ext_oneapi_cuda>());
115113 cublasSetStream (*(langHandle_->getCublasHandle ()), ih.get_native_queue <sycl::backend::ext_oneapi_cuda>());
116114
@@ -129,14 +127,9 @@ void SoftmaxLayer::doFw() {
129127 d_output_));
130128 // cublasDestroy(handle);
131129 // cudaStreamSynchronize(cudaStreamHandle);
132- assertDevApiInvar (cudaDeviceSynchronize ());
133- });
134- });
135- langHandle_->getSyclQueue ()->wait_and_throw ();
130+ }, []{assertDevApiInvar (cudaDeviceSynchronize ())});
136131#elif defined(USE_ROCBLAS)
137- langHandle_->getSyclQueue ()->submit ([&](sycl::handler &cgh) {
138- // auto d_A = b_A.get_access<sycl::access::mode::read_write>(cgh);
139- cgh.host_task ([=](sycl::interop_handle ih) {
132+ SYCL::ExecNativeCommand (*langHandle_->getSyclQueue (), [=](sycl::interop_handle ih) {
140133 // cuCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_cuda>());
141134 // cublasSetStream(*(langHandle_->getCublasHandle()), ih.get_native_queue<sycl::backend::ext_oneapi_cuda>());
142135
@@ -153,10 +146,7 @@ void SoftmaxLayer::doFw() {
153146 d_output_));
154147 // cublasDestroy(handle);
155148 // cudaStreamSynchronize(cudaStreamHandle);
156- assertDevApiInvar (hipDeviceSynchronize ());
157- });
158- });
159- langHandle_->getSyclQueue ()->wait_and_throw ();
149+ }, []{assertDevApiInvar (hipDeviceSynchronize ())});
160150#else
161151 std::unordered_map<int , memory> softmax_args;
162152 softmax_args.insert ({DNNL_ARG_SRC, src_mem});
@@ -170,9 +160,7 @@ void SoftmaxLayer::doFw() {
170160void SoftmaxLayer::doBw () {
171161#if defined(USE_CUBLAS)
172162
173- langHandle_->getSyclQueue ()->submit ([&](sycl::handler &cgh) {
174- // auto d_A = b_A.get_access<sycl::access::mode::read_write>(cgh);
175- cgh.host_task ([=](sycl::interop_handle ih) {
163+ SYCL::ExecNativeCommand (*langHandle_->getSyclQueue (), [=](sycl::interop_handle ih) {
176164 cuCtxSetCurrent (ih.get_native_context <sycl::backend::ext_oneapi_cuda>());
177165 cublasSetStream (*(langHandle_->getCublasHandle ()), ih.get_native_queue <sycl::backend::ext_oneapi_cuda>());
178166
@@ -193,14 +181,9 @@ void SoftmaxLayer::doBw() {
193181 d_d_input_));
194182 // cublasDestroy(handle);
195183 // cudaStreamSynchronize(cudaStreamHandle);
196- assertDevApiInvar (cudaDeviceSynchronize ());
197- });
198- });
199- langHandle_->getSyclQueue ()->wait_and_throw ();
184+ }, []{assertDevApiInvar (cudaDeviceSynchronize ())});
200185#elif defined(USE_ROCBLAS)
201- langHandle_->getSyclQueue ()->submit ([&](sycl::handler &cgh) {
202- // auto d_A = b_A.get_access<sycl::access::mode::read_write>(cgh);
203- cgh.host_task ([=](sycl::interop_handle ih) {
186+ SYCL::ExecNativeCommand (*langHandle_->getSyclQueue (), [=](sycl::interop_handle ih) {
204187 // cuCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_cuda>());
205188 // cublasSetStream(*(langHandle_->getCublasHandle()), ih.get_native_queue<sycl::backend::ext_oneapi_cuda>());
206189
@@ -219,10 +202,7 @@ void SoftmaxLayer::doBw() {
219202 d_d_input_));
220203 // cublasDestroy(handle);
221204 // cudaStreamSynchronize(cudaStreamHandle);
222- assertDevApiInvar (hipDeviceSynchronize ());
223- });
224- });
225- langHandle_->getSyclQueue ()->wait_and_throw ();
205+ }, []{assertDevApiInvar (hipDeviceSynchronize ())});
226206#else
227207 std::unordered_map<int , memory> softmax_args;
228208 softmax_args.insert ({DNNL_ARG_SRC, src_mem});
@@ -237,4 +217,4 @@ void SoftmaxLayer::doBw() {
237217
238218SoftmaxLayer::~SoftmaxLayer () {
239219
240- }
220+ }
0 commit comments