Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions ggml/src/ggml-sycl/binbcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,32 +208,27 @@ inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *ds


void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_add(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_sub(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_mul(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_div(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_repeat(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

82 changes: 77 additions & 5 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <fstream>
#include <iostream>
#include <string>

#include "dpct/helper.hpp"
#include "ggml-sycl.h"
Expand Down Expand Up @@ -44,11 +45,20 @@ extern int g_ggml_sycl_debug;
extern int g_ggml_sycl_disable_optimize;
extern int g_ggml_sycl_prioritize_dmmv;

#define GGML_SYCL_DEBUG(...) \
do { \
if (g_ggml_sycl_debug) \
fprintf(stderr, __VA_ARGS__); \
} while (0)
#if defined(__clang__) && __has_builtin(__builtin_expect)
// Hint the optimizer to pipeline the more likely following instruction in branches
# define LIKELY(expr) __builtin_expect(expr, true)
# define UNLIKELY(expr) __builtin_expect(expr, false)
#else
# define LIKELY(expr) (expr)
# define UNLIKELY(expr) (expr)
#endif

#define GGML_SYCL_DEBUG(...) \
do { \
if (UNLIKELY(g_ggml_sycl_debug)) \
fprintf(stderr, __VA_ARGS__); \
} while (0)

#define CHECK_TRY_ERROR(expr) \
[&]() { \
Expand Down Expand Up @@ -490,4 +500,66 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
}

bool gpu_has_xmx(sycl::device &dev);

template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
if (LIKELY(!g_ggml_sycl_debug)) {
return;
}
std::stringstream ss;
ss << prefix << "=[";
for (std::size_t i = 0; i < N - 1; ++i) {
ss << array[i] << ", ";
}
if constexpr (N > 0) {
ss << array[N - 1];
}
ss << "]";
GGML_SYCL_DEBUG("%s", ss.str().c_str());
}

inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
const std::string & suffix = "") {
if (LIKELY(!g_ggml_sycl_debug)) {
return;
}
GGML_SYCL_DEBUG("%s=", prefix.c_str());
if (tensor) {
GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
if (!ggml_is_contiguous(tensor)) {
GGML_SYCL_DEBUG(";strided");
}
if (ggml_is_permuted(tensor)) {
GGML_SYCL_DEBUG(";permuted");
}
} else {
GGML_SYCL_DEBUG("nullptr");
}
GGML_SYCL_DEBUG("%s", suffix.c_str());
}

struct scope_op_debug_print {
scope_op_debug_print(const std::string & func, const ggml_tensor * dst, std::size_t num_src,
const std::string & suffix = "") :
func(func) {
if (LIKELY(!g_ggml_sycl_debug)) {
return;
}
GGML_SYCL_DEBUG("[SYCL][OP] call %s:", func.c_str());
debug_print_tensor(" dst", dst);
if (dst) {
for (std::size_t i = 0; i < num_src; ++i) {
debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
}
}
GGML_SYCL_DEBUG("%s\n", suffix.c_str());
}

~scope_op_debug_print() { GGML_SYCL_DEBUG("[SYCL][OP] call %s done\n", func.c_str()); }

private:
std::string func;
};

#endif // GGML_SYCL_COMMON_HPP
64 changes: 31 additions & 33 deletions ggml/src/ggml-sycl/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,39 +159,37 @@ static void concat_f32_sycl_non_cont(
}

void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
queue_ptr stream = ctx.stream();

const int32_t dim = ((int32_t *)dst->op_params)[0];

if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
const float *src0_d = (const float *)src0->data;
const float *src1_d = (const float *)src1->data;

float *dst_d = (float *)dst->data;

if (dim != 3) {
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
concat_f32_sycl(
src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
}
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
const ggml_tensor * src0 = dst->src[0];
const ggml_tensor * src1 = dst->src[1];
queue_ptr stream = ctx.stream();

const int32_t dim = ((int32_t *) dst->op_params)[0];

if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
const float * src0_d = (const float *) src0->data;
const float * src1_d = (const float *) src1->data;

float * dst_d = (float *) dst->data;

if (dim != 3) {
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
dst->ne[1], dst->ne[2], dim, stream);
}
} else {
const size_t size0 = ggml_nbytes(src0);
const size_t size1 = ggml_nbytes(src1);

SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
}
} else {
const size_t size0 = ggml_nbytes(src0);
const size_t size1 = ggml_nbytes(src1);

SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
SYCL_CHECK(CHECK_TRY_ERROR(
stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
concat_f32_sycl_non_cont(stream, (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1],
src0->nb[2], src0->nb[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
}
} else
concat_f32_sycl_non_cont(
stream, (const char *)src0->data, (const char *)src1->data,
(char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
}
1 change: 1 addition & 0 deletions ggml/src/ggml-sycl/conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ static void conv_transpose_1d_f32_f32_sycl(
}

void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data;
Expand Down
9 changes: 4 additions & 5 deletions ggml/src/ggml-sycl/cpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,9 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co
}

void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
// Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
std::string(" src0 type=") + ggml_type_name(src0->type));
const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1));

Expand All @@ -629,8 +632,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co

char * src0_ddc = (char *) src0->data;
char * src1_ddc = (char *) src1->data;
GGML_SYCL_DEBUG("[SYCL] %s: Tensor supplied: %s to %s\n", __func__, ggml_type_name(src0->type),
ggml_type_name(src1->type));

if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
ggml_cpy_f32_f32_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10,
Expand Down Expand Up @@ -694,8 +695,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
}

void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
// TODO: why do we pass dst as src1 here?
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_cpy(ctx, dst->src[0], dst);
GGML_SYCL_DEBUG("[SYCL] call %s done\n", __func__);
}
2 changes: 2 additions & 0 deletions ggml/src/ggml-sycl/dmmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,8 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
src0->type == GGML_TYPE_Q8_0 || src0->type == GGML_TYPE_F16;

if (src1_convert_f16) {
scope_op_debug_print scope_dbg_print(std::string(__func__) + "to_fp16_sycl", dst, /*num_src=*/2,
" : converting src1 to fp16");
src1_dfloat = src1_dfloat_a.alloc(ne00);
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
GGML_ASSERT(to_fp16_sycl != nullptr);
Expand Down
Loading
Loading