Skip to content

Commit eeb54fe

Browse files
committed
Conflict resolution
1 parent 7d52483 commit eeb54fe

File tree

1 file changed

+32
-32
lines changed

1 file changed

+32
-32
lines changed

ggml/src/ggml-sycl/element_wise.cpp

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst,
419419
const int ne12, const int nb1, const int nb2,
420420
const int offset, queue_ptr stream) {
421421
int num_blocks = ceil_div(n_elements, SYCL_ACC_BLOCK_SIZE);
422-
stream->paraller_for(
422+
sycl_parallel_for(stream,
423423
sycl::nd_range<1>(sycl::range<1>(num_blocks) *
424424
sycl::range<1>(SYCL_ACC_BLOCK_SIZE),
425425
sycl::range<1>(SYCL_ACC_BLOCK_SIZE)),
@@ -449,7 +449,7 @@ static void pad_sycl(const T *x, T *dst, const int ne00,
449449
const int ne1, const int ne2, queue_ptr stream) {
450450
int num_blocks = ceil_div(ne0, SYCL_PAD_BLOCK_SIZE);
451451
sycl::range<3> gridDim(ne2, ne1, num_blocks);
452-
stream->paraller_for(
452+
sycl_parallel_for(stream,
453453
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE),
454454
sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)),
455455
[=](sycl::nd_item<3> item_ct1) { pad(x, dst, ne0, ne00, ne01, ne02, item_ct1); });
@@ -651,7 +651,7 @@ static inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor
651651
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
652652
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
653653
const int num_blocks = ceil_div(k_elements, 256);
654-
stream->paraller_for(
654+
sycl_parallel_for(stream,
655655
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
656656
sycl::range<1>(256)),
657657
[=](sycl::nd_item<1> item_ct1) {
@@ -664,7 +664,7 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor
664664
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
665665
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
666666
const int num_blocks = ceil_div(k_elements, 256);
667-
stream->paraller_for(
667+
sycl_parallel_for(stream,
668668
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
669669
sycl::range<1>(256)),
670670
[=](sycl::nd_item<1> item_ct1) {
@@ -677,7 +677,7 @@ static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tens
677677
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
678678
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
679679
const int num_blocks = ceil_div(k_elements, 256);
680-
stream->paraller_for(
680+
sycl_parallel_for(stream,
681681
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
682682
sycl::range<1>(256)),
683683
[=](sycl::nd_item<1> item_ct1) {
@@ -690,7 +690,7 @@ static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor
690690
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
691691
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
692692
const int num_blocks = ceil_div(k_elements, 256);
693-
stream->paraller_for(
693+
sycl_parallel_for(stream,
694694
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
695695
sycl::range<1>(256)),
696696
[=](sycl::nd_item<1> item_ct1) {
@@ -703,7 +703,7 @@ static inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tenso
703703
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
704704
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
705705
const int num_blocks = ceil_div(k_elements, SYCL_SILU_BLOCK_SIZE);
706-
stream->paraller_for(
706+
sycl_parallel_for(stream,
707707
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SILU_BLOCK_SIZE),
708708
sycl::range<1>(SYCL_SILU_BLOCK_SIZE)),
709709
[=](sycl::nd_item<1> item_ct1) {
@@ -716,7 +716,7 @@ static inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tenso
716716
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
717717
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
718718
const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE);
719-
stream->paraller_for(
719+
sycl_parallel_for(stream,
720720
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE),
721721
sycl::range<1>(SYCL_GELU_BLOCK_SIZE)),
722722
[=](sycl::nd_item<1> item_ct1) {
@@ -729,7 +729,7 @@ static inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml
729729
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
730730
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
731731
const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE);
732-
stream->paraller_for(
732+
sycl_parallel_for(stream,
733733
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE),
734734
sycl::range<1>(SYCL_GELU_BLOCK_SIZE)),
735735
[=](sycl::nd_item<1> item_ct1) {
@@ -742,7 +742,7 @@ static inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_t
742742
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
743743
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
744744
const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE);
745-
stream->paraller_for(
745+
sycl_parallel_for(stream,
746746
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE),
747747
sycl::range<1>(SYCL_GELU_BLOCK_SIZE)),
748748
[=](sycl::nd_item<1> item_ct1) {
@@ -755,7 +755,7 @@ static inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tenso
755755
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
756756
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
757757
const int num_blocks = ceil_div(k_elements, SYCL_TANH_BLOCK_SIZE);
758-
stream->paraller_for(
758+
sycl_parallel_for(stream,
759759
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_TANH_BLOCK_SIZE),
760760
sycl::range<1>(SYCL_TANH_BLOCK_SIZE)),
761761
[=](sycl::nd_item<1> item_ct1) {
@@ -768,7 +768,7 @@ static inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tenso
768768
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
769769
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
770770
const int num_blocks = ceil_div(k_elements, SYCL_RELU_BLOCK_SIZE);
771-
stream->paraller_for(
771+
sycl_parallel_for(stream,
772772
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_RELU_BLOCK_SIZE),
773773
sycl::range<1>(SYCL_RELU_BLOCK_SIZE)),
774774
[=](sycl::nd_item<1> item_ct1) {
@@ -781,7 +781,7 @@ static inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggm
781781
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
782782
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
783783
const int num_blocks = ceil_div(k_elements, SYCL_HARDSIGMOID_BLOCK_SIZE);
784-
stream->paraller_for(
784+
sycl_parallel_for(stream,
785785
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_HARDSIGMOID_BLOCK_SIZE),
786786
sycl::range<1>(SYCL_HARDSIGMOID_BLOCK_SIZE)),
787787
[=](sycl::nd_item<1> item_ct1) {
@@ -794,7 +794,7 @@ static inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_
794794
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
795795
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
796796
const int num_blocks = ceil_div(k_elements, SYCL_HARDSWISH_BLOCK_SIZE);
797-
stream->paraller_for(
797+
sycl_parallel_for(stream,
798798
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_HARDSWISH_BLOCK_SIZE),
799799
sycl::range<1>(SYCL_HARDSWISH_BLOCK_SIZE)),
800800
[=](sycl::nd_item<1> item_ct1) {
@@ -807,7 +807,7 @@ static inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor
807807
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
808808
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
809809
const int num_blocks = ceil_div(k_elements, SYCL_EXP_BLOCK_SIZE);
810-
stream->paraller_for(
810+
sycl_parallel_for(stream,
811811
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_EXP_BLOCK_SIZE),
812812
sycl::range<1>(SYCL_EXP_BLOCK_SIZE)),
813813
[=](sycl::nd_item<1> item_ct1) {
@@ -820,7 +820,7 @@ static inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor
820820
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
821821
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
822822
const int num_blocks = ceil_div(k_elements, SYCL_EXP_BLOCK_SIZE); // Using EXP block size
823-
stream->paraller_for(
823+
sycl_parallel_for(stream,
824824
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_EXP_BLOCK_SIZE),
825825
sycl::range<1>(SYCL_EXP_BLOCK_SIZE)),
826826
[=](sycl::nd_item<1> item_ct1) {
@@ -833,7 +833,7 @@ static inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor
833833
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
834834
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
835835
const int num_blocks = ceil_div(k_elements, SYCL_NEG_BLOCK_SIZE);
836-
stream->paraller_for(
836+
sycl_parallel_for(stream,
837837
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_NEG_BLOCK_SIZE),
838838
sycl::range<1>(SYCL_NEG_BLOCK_SIZE)),
839839
[=](sycl::nd_item<1> item_ct1) {
@@ -846,7 +846,7 @@ static inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tenso
846846
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
847847
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
848848
const int num_blocks = ceil_div(k_elements, SYCL_NEG_BLOCK_SIZE); // Using NEG block size
849-
stream->paraller_for(
849+
sycl_parallel_for(stream,
850850
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_NEG_BLOCK_SIZE),
851851
sycl::range<1>(SYCL_NEG_BLOCK_SIZE)),
852852
[=](sycl::nd_item<1> item_ct1) {
@@ -859,7 +859,7 @@ static inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_te
859859
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
860860
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
861861
const int num_blocks = ceil_div(k_elements, SYCL_SIGMOID_BLOCK_SIZE);
862-
stream->paraller_for(
862+
sycl_parallel_for(stream,
863863
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIGMOID_BLOCK_SIZE),
864864
sycl::range<1>(SYCL_SIGMOID_BLOCK_SIZE)),
865865
[=](sycl::nd_item<1> item_ct1) {
@@ -872,7 +872,7 @@ static inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tenso
872872
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
873873
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
874874
const int num_blocks = ceil_div(k_elements, SYCL_SQRT_BLOCK_SIZE);
875-
stream->paraller_for(
875+
sycl_parallel_for(stream,
876876
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SQRT_BLOCK_SIZE),
877877
sycl::range<1>(SYCL_SQRT_BLOCK_SIZE)),
878878
[=](sycl::nd_item<1> item_ct1) {
@@ -885,7 +885,7 @@ static inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor
885885
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
886886
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
887887
const int num_blocks = ceil_div(k_elements, SYCL_SIN_BLOCK_SIZE);
888-
stream->paraller_for(
888+
sycl_parallel_for(stream,
889889
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIN_BLOCK_SIZE),
890890
sycl::range<1>(SYCL_SIN_BLOCK_SIZE)),
891891
[=](sycl::nd_item<1> item_ct1) {
@@ -898,7 +898,7 @@ static inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor
898898
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
899899
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
900900
const int num_blocks = ceil_div(k_elements, SYCL_SIN_BLOCK_SIZE); // Using SIN block size
901-
stream->paraller_for(
901+
sycl_parallel_for(stream,
902902
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIN_BLOCK_SIZE),
903903
sycl::range<1>(SYCL_SIN_BLOCK_SIZE)),
904904
[=](sycl::nd_item<1> item_ct1) {
@@ -913,7 +913,7 @@ static inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml
913913
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
914914
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float slope) {
915915
const int num_blocks = ceil_div(k_elements, SYCL_RELU_BLOCK_SIZE);
916-
stream->paraller_for(
916+
sycl_parallel_for(stream,
917917
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_RELU_BLOCK_SIZE),
918918
sycl::range<1>(SYCL_RELU_BLOCK_SIZE)),
919919
[=](sycl::nd_item<1> item_ct1) {
@@ -926,7 +926,7 @@ static inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor
926926
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
927927
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
928928
const int num_blocks = ceil_div(k_elements, SYCL_SQR_BLOCK_SIZE);
929-
stream->paraller_for(
929+
sycl_parallel_for(stream,
930930
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SQR_BLOCK_SIZE),
931931
sycl::range<1>(SYCL_SQR_BLOCK_SIZE)),
932932
[=](sycl::nd_item<1> item_ct1) {
@@ -960,7 +960,7 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
960960
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
961961
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float min_arg, float max_arg) {
962962
const int num_blocks = ceil_div(k_elements, SYCL_CLAMP_BLOCK_SIZE);
963-
stream->paraller_for(
963+
sycl_parallel_for(stream,
964964
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE),
965965
sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE)),
966966
[=](sycl::nd_item<1> item_ct1) {
@@ -992,9 +992,9 @@ static inline void ggml_sycl_op_geglu(ggml_backend_sycl_context & ctx, ggml_tens
992992
ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst,
993993
[](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
994994
const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
995-
main_stream->paraller_for(
996-
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
997-
gated_op_fused_geglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
995+
sycl_parallel_for(main_stream,
996+
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
997+
gated_op_fused_geglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
998998
});
999999
});
10001000
}
@@ -1003,7 +1003,7 @@ static inline void ggml_sycl_op_reglu(ggml_backend_sycl_context & ctx, ggml_tens
10031003
ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst,
10041004
[](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10051005
const uint32_t num_blocks = ceil_div((uint32_t)k, SYCL_RELU_BLOCK_SIZE); // Using RELU block size for reglu
1006-
main_stream->paraller_for(
1006+
sycl_parallel_for(main_stream,
10071007
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
10081008
gated_op_fused_reglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10091009
});
@@ -1014,7 +1014,7 @@ static inline void ggml_sycl_op_swiglu(ggml_backend_sycl_context & ctx, ggml_ten
10141014
ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst,
10151015
[](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10161016
const uint32_t num_blocks = ceil_div((uint32_t)k, SYCL_SILU_BLOCK_SIZE); // Using SILU block size for swiglu
1017-
main_stream->paraller_for(
1017+
sycl_parallel_for(main_stream,
10181018
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_SILU_BLOCK_SIZE)), sycl::range<1>(SYCL_SILU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
10191019
gated_op_fused_swiglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10201020
});
@@ -1025,7 +1025,7 @@ static inline void ggml_sycl_op_geglu_erf(ggml_backend_sycl_context & ctx, ggml_
10251025
ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst,
10261026
[](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10271027
const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
1028-
main_stream->paraller_for(
1028+
sycl_parallel_for(main_stream,
10291029
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
10301030
gated_op_fused_geglu_erf(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10311031
});
@@ -1036,7 +1036,7 @@ static inline void ggml_sycl_op_geglu_quick(ggml_backend_sycl_context & ctx, ggm
10361036
ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst,
10371037
[](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) {
10381038
const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
1039-
main_stream->paraller_for(
1039+
sycl_parallel_for(main_stream,
10401040
sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
10411041
gated_op_fused_geglu_quick(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1);
10421042
});

0 commit comments

Comments
 (0)