@@ -419,7 +419,7 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst,
419419 const int ne12, const int nb1, const int nb2,
420420 const int offset, queue_ptr stream) {
421421 int num_blocks = ceil_div (n_elements, SYCL_ACC_BLOCK_SIZE);
422- stream-> paraller_for (
422+ sycl_parallel_for (stream,
423423 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) *
424424 sycl::range<1 >(SYCL_ACC_BLOCK_SIZE),
425425 sycl::range<1 >(SYCL_ACC_BLOCK_SIZE)),
@@ -449,7 +449,7 @@ static void pad_sycl(const T *x, T *dst, const int ne00,
449449 const int ne1, const int ne2, queue_ptr stream) {
450450 int num_blocks = ceil_div (ne0, SYCL_PAD_BLOCK_SIZE);
451451 sycl::range<3 > gridDim (ne2, ne1, num_blocks);
452- stream-> paraller_for (
452+ sycl_parallel_for (stream,
453453 sycl::nd_range<3 >(gridDim * sycl::range<3 >(1 , 1 , SYCL_PAD_BLOCK_SIZE),
454454 sycl::range<3 >(1 , 1 , SYCL_PAD_BLOCK_SIZE)),
455455 [=](sycl::nd_item<3 > item_ct1) { pad (x, dst, ne0, ne00, ne01, ne02, item_ct1); });
@@ -651,7 +651,7 @@ static inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor
651651 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
652652 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
653653 const int num_blocks = ceil_div (k_elements, 256 );
654- stream-> paraller_for (
654+ sycl_parallel_for (stream,
655655 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(256 ),
656656 sycl::range<1 >(256 )),
657657 [=](sycl::nd_item<1 > item_ct1) {
@@ -664,7 +664,7 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor
664664 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
665665 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
666666 const int num_blocks = ceil_div (k_elements, 256 );
667- stream-> paraller_for (
667+ sycl_parallel_for (stream,
668668 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(256 ),
669669 sycl::range<1 >(256 )),
670670 [=](sycl::nd_item<1 > item_ct1) {
@@ -677,7 +677,7 @@ static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tens
677677 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
678678 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
679679 const int num_blocks = ceil_div (k_elements, 256 );
680- stream-> paraller_for (
680+ sycl_parallel_for (stream,
681681 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(256 ),
682682 sycl::range<1 >(256 )),
683683 [=](sycl::nd_item<1 > item_ct1) {
@@ -690,7 +690,7 @@ static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor
690690 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
691691 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
692692 const int num_blocks = ceil_div (k_elements, 256 );
693- stream-> paraller_for (
693+ sycl_parallel_for (stream,
694694 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(256 ),
695695 sycl::range<1 >(256 )),
696696 [=](sycl::nd_item<1 > item_ct1) {
@@ -703,7 +703,7 @@ static inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tenso
703703 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
704704 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
705705 const int num_blocks = ceil_div (k_elements, SYCL_SILU_BLOCK_SIZE);
706- stream-> paraller_for (
706+ sycl_parallel_for (stream,
707707 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SILU_BLOCK_SIZE),
708708 sycl::range<1 >(SYCL_SILU_BLOCK_SIZE)),
709709 [=](sycl::nd_item<1 > item_ct1) {
@@ -716,7 +716,7 @@ static inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tenso
716716 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
717717 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
718718 const int num_blocks = ceil_div (k_elements, SYCL_GELU_BLOCK_SIZE);
719- stream-> paraller_for (
719+ sycl_parallel_for (stream,
720720 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE),
721721 sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)),
722722 [=](sycl::nd_item<1 > item_ct1) {
@@ -729,7 +729,7 @@ static inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml
729729 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
730730 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
731731 const int num_blocks = ceil_div (k_elements, SYCL_GELU_BLOCK_SIZE);
732- stream-> paraller_for (
732+ sycl_parallel_for (stream,
733733 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE),
734734 sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)),
735735 [=](sycl::nd_item<1 > item_ct1) {
@@ -742,7 +742,7 @@ static inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_t
742742 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
743743 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
744744 const int num_blocks = ceil_div (k_elements, SYCL_GELU_BLOCK_SIZE);
745- stream-> paraller_for (
745+ sycl_parallel_for (stream,
746746 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE),
747747 sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)),
748748 [=](sycl::nd_item<1 > item_ct1) {
@@ -755,7 +755,7 @@ static inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tenso
755755 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
756756 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
757757 const int num_blocks = ceil_div (k_elements, SYCL_TANH_BLOCK_SIZE);
758- stream-> paraller_for (
758+ sycl_parallel_for (stream,
759759 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_TANH_BLOCK_SIZE),
760760 sycl::range<1 >(SYCL_TANH_BLOCK_SIZE)),
761761 [=](sycl::nd_item<1 > item_ct1) {
@@ -768,7 +768,7 @@ static inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tenso
768768 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
769769 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
770770 const int num_blocks = ceil_div (k_elements, SYCL_RELU_BLOCK_SIZE);
771- stream-> paraller_for (
771+ sycl_parallel_for (stream,
772772 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_RELU_BLOCK_SIZE),
773773 sycl::range<1 >(SYCL_RELU_BLOCK_SIZE)),
774774 [=](sycl::nd_item<1 > item_ct1) {
@@ -781,7 +781,7 @@ static inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggm
781781 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
782782 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
783783 const int num_blocks = ceil_div (k_elements, SYCL_HARDSIGMOID_BLOCK_SIZE);
784- stream-> paraller_for (
784+ sycl_parallel_for (stream,
785785 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_HARDSIGMOID_BLOCK_SIZE),
786786 sycl::range<1 >(SYCL_HARDSIGMOID_BLOCK_SIZE)),
787787 [=](sycl::nd_item<1 > item_ct1) {
@@ -794,7 +794,7 @@ static inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_
794794 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
795795 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
796796 const int num_blocks = ceil_div (k_elements, SYCL_HARDSWISH_BLOCK_SIZE);
797- stream-> paraller_for (
797+ sycl_parallel_for (stream,
798798 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_HARDSWISH_BLOCK_SIZE),
799799 sycl::range<1 >(SYCL_HARDSWISH_BLOCK_SIZE)),
800800 [=](sycl::nd_item<1 > item_ct1) {
@@ -807,7 +807,7 @@ static inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor
807807 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
808808 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
809809 const int num_blocks = ceil_div (k_elements, SYCL_EXP_BLOCK_SIZE);
810- stream-> paraller_for (
810+ sycl_parallel_for (stream,
811811 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_EXP_BLOCK_SIZE),
812812 sycl::range<1 >(SYCL_EXP_BLOCK_SIZE)),
813813 [=](sycl::nd_item<1 > item_ct1) {
@@ -820,7 +820,7 @@ static inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor
820820 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
821821 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
822822 const int num_blocks = ceil_div (k_elements, SYCL_EXP_BLOCK_SIZE); // Using EXP block size
823- stream-> paraller_for (
823+ sycl_parallel_for (stream,
824824 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_EXP_BLOCK_SIZE),
825825 sycl::range<1 >(SYCL_EXP_BLOCK_SIZE)),
826826 [=](sycl::nd_item<1 > item_ct1) {
@@ -833,7 +833,7 @@ static inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor
833833 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
834834 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
835835 const int num_blocks = ceil_div (k_elements, SYCL_NEG_BLOCK_SIZE);
836- stream-> paraller_for (
836+ sycl_parallel_for (stream,
837837 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_NEG_BLOCK_SIZE),
838838 sycl::range<1 >(SYCL_NEG_BLOCK_SIZE)),
839839 [=](sycl::nd_item<1 > item_ct1) {
@@ -846,7 +846,7 @@ static inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tenso
846846 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
847847 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
848848 const int num_blocks = ceil_div (k_elements, SYCL_NEG_BLOCK_SIZE); // Using NEG block size
849- stream-> paraller_for (
849+ sycl_parallel_for (stream,
850850 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_NEG_BLOCK_SIZE),
851851 sycl::range<1 >(SYCL_NEG_BLOCK_SIZE)),
852852 [=](sycl::nd_item<1 > item_ct1) {
@@ -859,7 +859,7 @@ static inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_te
859859 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
860860 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
861861 const int num_blocks = ceil_div (k_elements, SYCL_SIGMOID_BLOCK_SIZE);
862- stream-> paraller_for (
862+ sycl_parallel_for (stream,
863863 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIGMOID_BLOCK_SIZE),
864864 sycl::range<1 >(SYCL_SIGMOID_BLOCK_SIZE)),
865865 [=](sycl::nd_item<1 > item_ct1) {
@@ -872,7 +872,7 @@ static inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tenso
872872 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
873873 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
874874 const int num_blocks = ceil_div (k_elements, SYCL_SQRT_BLOCK_SIZE);
875- stream-> paraller_for (
875+ sycl_parallel_for (stream,
876876 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SQRT_BLOCK_SIZE),
877877 sycl::range<1 >(SYCL_SQRT_BLOCK_SIZE)),
878878 [=](sycl::nd_item<1 > item_ct1) {
@@ -885,7 +885,7 @@ static inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor
885885 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
886886 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
887887 const int num_blocks = ceil_div (k_elements, SYCL_SIN_BLOCK_SIZE);
888- stream-> paraller_for (
888+ sycl_parallel_for (stream,
889889 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIN_BLOCK_SIZE),
890890 sycl::range<1 >(SYCL_SIN_BLOCK_SIZE)),
891891 [=](sycl::nd_item<1 > item_ct1) {
@@ -898,7 +898,7 @@ static inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor
898898 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
899899 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
900900 const int num_blocks = ceil_div (k_elements, SYCL_SIN_BLOCK_SIZE); // Using SIN block size
901- stream-> paraller_for (
901+ sycl_parallel_for (stream,
902902 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIN_BLOCK_SIZE),
903903 sycl::range<1 >(SYCL_SIN_BLOCK_SIZE)),
904904 [=](sycl::nd_item<1 > item_ct1) {
@@ -913,7 +913,7 @@ static inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml
913913 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
914914 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream, float slope) {
915915 const int num_blocks = ceil_div (k_elements, SYCL_RELU_BLOCK_SIZE);
916- stream-> paraller_for (
916+ sycl_parallel_for (stream,
917917 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_RELU_BLOCK_SIZE),
918918 sycl::range<1 >(SYCL_RELU_BLOCK_SIZE)),
919919 [=](sycl::nd_item<1 > item_ct1) {
@@ -926,7 +926,7 @@ static inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor
926926 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
927927 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
928928 const int num_blocks = ceil_div (k_elements, SYCL_SQR_BLOCK_SIZE);
929- stream-> paraller_for (
929+ sycl_parallel_for (stream,
930930 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SQR_BLOCK_SIZE),
931931 sycl::range<1 >(SYCL_SQR_BLOCK_SIZE)),
932932 [=](sycl::nd_item<1 > item_ct1) {
@@ -960,7 +960,7 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
960960 ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
961961 [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream, float min_arg, float max_arg) {
962962 const int num_blocks = ceil_div (k_elements, SYCL_CLAMP_BLOCK_SIZE);
963- stream-> paraller_for (
963+ sycl_parallel_for (stream,
964964 sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_CLAMP_BLOCK_SIZE),
965965 sycl::range<1 >(SYCL_CLAMP_BLOCK_SIZE)),
966966 [=](sycl::nd_item<1 > item_ct1) {
@@ -992,9 +992,9 @@ static inline void ggml_sycl_op_geglu(ggml_backend_sycl_context & ctx, ggml_tens
992992 ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu (ctx, dst,
993993 [](const auto * x_ptr, const auto * g_ptr, auto * dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
994994 const uint32_t num_blocks = ceil_div (k, SYCL_GELU_BLOCK_SIZE);
995- main_stream-> paraller_for (
996- sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
997- gated_op_fused_geglu (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
995+ sycl_parallel_for (main_stream,
996+ sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
997+ gated_op_fused_geglu (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
998998 });
999999 });
10001000}
@@ -1003,7 +1003,7 @@ static inline void ggml_sycl_op_reglu(ggml_backend_sycl_context & ctx, ggml_tens
10031003 ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu (ctx, dst,
10041004 [](const auto * x_ptr, const auto * g_ptr, auto * dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10051005 const uint32_t num_blocks = ceil_div ((uint32_t )k, SYCL_RELU_BLOCK_SIZE); // Using RELU block size for reglu
1006- main_stream-> paraller_for (
1006+ sycl_parallel_for (main_stream,
10071007 sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_RELU_BLOCK_SIZE)), sycl::range<1 >(SYCL_RELU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
10081008 gated_op_fused_reglu (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10091009 });
@@ -1014,7 +1014,7 @@ static inline void ggml_sycl_op_swiglu(ggml_backend_sycl_context & ctx, ggml_ten
10141014 ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu (ctx, dst,
10151015 [](const auto * x_ptr, const auto * g_ptr, auto * dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10161016 const uint32_t num_blocks = ceil_div ((uint32_t )k, SYCL_SILU_BLOCK_SIZE); // Using SILU block size for swiglu
1017- main_stream-> paraller_for (
1017+ sycl_parallel_for (main_stream,
10181018 sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_SILU_BLOCK_SIZE)), sycl::range<1 >(SYCL_SILU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
10191019 gated_op_fused_swiglu (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10201020 });
@@ -1025,7 +1025,7 @@ static inline void ggml_sycl_op_geglu_erf(ggml_backend_sycl_context & ctx, ggml_
10251025 ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu (ctx, dst,
10261026 [](const auto * x_ptr, const auto * g_ptr, auto * dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10271027 const uint32_t num_blocks = ceil_div (k, SYCL_GELU_BLOCK_SIZE);
1028- main_stream-> paraller_for (
1028+ sycl_parallel_for (main_stream,
10291029 sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
10301030 gated_op_fused_geglu_erf (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10311031 });
@@ -1036,7 +1036,7 @@ static inline void ggml_sycl_op_geglu_quick(ggml_backend_sycl_context & ctx, ggm
10361036 ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu (ctx, dst,
10371037 [](const auto * x_ptr, const auto * g_ptr, auto * dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10381038 const uint32_t num_blocks = ceil_div (k, SYCL_GELU_BLOCK_SIZE);
1039- main_stream-> paraller_for (
1039+ sycl_parallel_for (main_stream,
10401040 sycl::nd_range<1 >((num_blocks * sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), sycl::range<1 >(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1 > item_ct1) {
10411041 gated_op_fused_geglu_quick (x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10421042 });
0 commit comments