@@ -937,70 +937,45 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst
937937 }
938938}
939939
940- static inline void ggml_sycl_op_step (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
941- ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
942- [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
943- const int num_blocks = ceil_div (k_elements, SYCL_NEG_BLOCK_SIZE); // Using NEG block size
944- sycl_parallel_for (stream,
945- sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_NEG_BLOCK_SIZE),
946- sycl::range<1 >(SYCL_NEG_BLOCK_SIZE)),
947- [=](sycl::nd_item<1 > item_ct1) {
948- unary_op_step_kernel (src, dst_ptr, k_elements, item_ct1);
949- });
950- });
951- }
952-
953- static inline void ggml_sycl_op_sigmoid (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
954- ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
955- [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
956- const int num_blocks = ceil_div (k_elements, SYCL_SIGMOID_BLOCK_SIZE);
957- sycl_parallel_for (stream,
958- sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIGMOID_BLOCK_SIZE),
959- sycl::range<1 >(SYCL_SIGMOID_BLOCK_SIZE)),
960- [=](sycl::nd_item<1 > item_ct1) {
961- unary_op_sigmoid_kernel (src, dst_ptr, k_elements, item_ct1);
962- });
963- });
964- }
965-
966- static inline void ggml_sycl_op_sqrt (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
967- ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
968- [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
969- const int num_blocks = ceil_div (k_elements, SYCL_SQRT_BLOCK_SIZE);
970- sycl_parallel_for (stream,
971- sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SQRT_BLOCK_SIZE),
972- sycl::range<1 >(SYCL_SQRT_BLOCK_SIZE)),
973- [=](sycl::nd_item<1 > item_ct1) {
974- unary_op_sqrt_kernel (src, dst_ptr, k_elements, item_ct1);
975- });
976- });
977- }
978-
979- static inline void ggml_sycl_op_sin (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
980- ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
981- [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
982- const int num_blocks = ceil_div (k_elements, SYCL_SIN_BLOCK_SIZE);
983- sycl_parallel_for (stream,
984- sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIN_BLOCK_SIZE),
985- sycl::range<1 >(SYCL_SIN_BLOCK_SIZE)),
986- [=](sycl::nd_item<1 > item_ct1) {
987- unary_op_sin_kernel (src, dst_ptr, k_elements, item_ct1);
988- });
989- });
940+ inline void ggml_sycl_op_neg (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
941+ #if defined (GGML_SYCL_F16)
942+ GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32 || dst->src [0 ]->type == GGML_TYPE_F16);
943+ GGML_ASSERT (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
944+ #else
945+ GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32);
946+ GGML_ASSERT (dst->type == GGML_TYPE_F32);
947+ #endif
948+ GGML_ASSERT (dst->src [0 ]->type == dst->type );
949+ dpct::queue_ptr main_stream = ctx.stream ();
950+ SYCL_CHECK (ggml_sycl_set_device (ctx.device ));
951+ switch (dst->type ) {
952+ #if defined (GGML_SYCL_F16)
953+ case GGML_TYPE_F16:
954+ {
955+ auto data_pts = cast_data<sycl::half>(dst);
956+ neg_sycl (data_pts.src , data_pts.dst , ggml_nelements (dst->src [0 ]), main_stream);
957+ break ;
958+ }
959+ #endif
960+ case GGML_TYPE_F32:
961+ {
962+ auto data_pts = cast_data<float >(dst);
963+ neg_sycl (data_pts.src , data_pts.dst , ggml_nelements (dst->src [0 ]), main_stream);
964+ break ;
965+ }
966+ default :
967+ GGML_ABORT (" GGML tensor type not supported!\n " );
968+ }
990969}
991970
992- static inline void ggml_sycl_op_cos (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
993- ggml_sycl_detail::dispatch_ggml_sycl_op_unary (ctx, dst,
994- [](const auto * src, auto * dst_ptr, int k_elements, queue_ptr stream) {
995- const int num_blocks = ceil_div (k_elements, SYCL_SIN_BLOCK_SIZE); // Using SIN block size
996- sycl_parallel_for (stream,
997- sycl::nd_range<1 >(sycl::range<1 >(num_blocks) * sycl::range<1 >(SYCL_SIN_BLOCK_SIZE),
998- sycl::range<1 >(SYCL_SIN_BLOCK_SIZE)),
999- [=](sycl::nd_item<1 > item_ct1) {
1000- unary_op_cos_kernel (src, dst_ptr, k_elements, item_ct1);
1001- });
1002- });
1003- }
971+ inline void ggml_sycl_op_leaky_relu (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
972+ #if defined (GGML_SYCL_F16)
973+ GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32 || dst->src [0 ]->type == GGML_TYPE_F16);
974+ GGML_ASSERT (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
975+ #else
976+ GGML_ASSERT (dst->src [0 ]->type == GGML_TYPE_F32);
977+ GGML_ASSERT (dst->type == GGML_TYPE_F32);
978+ #endif
1004979
1005980static inline void ggml_sycl_op_leaky_relu (ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1006981 float negative_slope;
0 commit comments