Skip to content

Commit 03bd1a6

Browse files
committed
restored common.hpp
1 parent acd80ec commit 03bd1a6

File tree

1 file changed

+117
-6
lines changed

1 file changed

+117
-6
lines changed

ggml/src/ggml-sycl/common.hpp

Lines changed: 117 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#ifndef GGML_SYCL_COMMON_HPP
1414
#define GGML_SYCL_COMMON_HPP
1515

16+
#include <cstddef>
1617
#include <fstream>
1718
#include <iostream>
19+
#include <string>
1820

1921
#include "dpct/helper.hpp"
2022
#include "ggml-sycl.h"
@@ -44,11 +46,20 @@ extern int g_ggml_sycl_debug;
4446
extern int g_ggml_sycl_disable_optimize;
4547
extern int g_ggml_sycl_prioritize_dmmv;
4648

47-
#define GGML_SYCL_DEBUG(...) \
48-
do { \
49-
if (g_ggml_sycl_debug) \
50-
fprintf(stderr, __VA_ARGS__); \
51-
} while (0)
49+
#if defined(__clang__) && __has_builtin(__builtin_expect)
50+
// Hint the optimizer to pipeline the more likely following instruction in branches
51+
# define LIKELY(expr) __builtin_expect(expr, true)
52+
# define UNLIKELY(expr) __builtin_expect(expr, false)
53+
#else
54+
# define LIKELY(expr) (expr)
55+
# define UNLIKELY(expr) (expr)
56+
#endif
57+
58+
#define GGML_SYCL_DEBUG(...) \
59+
do { \
60+
if (UNLIKELY(g_ggml_sycl_debug)) \
61+
fprintf(stderr, __VA_ARGS__); \
62+
} while (0)
5263

5364
#define CHECK_TRY_ERROR(expr) \
5465
[&]() { \
@@ -280,7 +291,22 @@ void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> str
280291
inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
281292
optimize_feature opt;
282293

283-
opt.reorder = true;
294+
opt.reorder =
295+
(arch == syclex::architecture::intel_gpu_dg1 ||
296+
arch == syclex::architecture::intel_gpu_acm_g10 ||
297+
arch == syclex::architecture::intel_gpu_acm_g11 ||
298+
arch == syclex::architecture::intel_gpu_acm_g12 ||
299+
arch == syclex::architecture::intel_gpu_pvc ||
300+
arch == syclex::architecture::intel_gpu_pvc_vg ||
301+
arch == syclex::architecture::intel_gpu_mtl_u ||
302+
arch == syclex::architecture::intel_gpu_mtl_s ||
303+
arch == syclex::architecture::intel_gpu_mtl_h ||
304+
arch == syclex::architecture::intel_gpu_arl_u ||
305+
arch == syclex::architecture::intel_gpu_arl_s ||
306+
arch == syclex::architecture::intel_gpu_arl_h ||
307+
arch == syclex::architecture::intel_gpu_bmg_g21 ||
308+
arch == syclex::architecture::intel_gpu_lnl_m
309+
);
284310

285311
return opt;
286312
}
@@ -456,6 +482,19 @@ static __dpct_inline__ float warp_reduce_max(float x,
456482
return x;
457483
}
458484

485+
/* Helper for Computing the linear offset of a ggml_tensor given
486+
per-dimension sizes, strides, and indices */
487+
template<int N>
488+
__dpct_inline__ size_t calculate_offset(const std::array<int, N> & strides, const std::array<int, N> & indices) {
489+
size_t offset = 0;
490+
#pragma unroll
491+
for (int i = 0; i < N; i++) {
492+
auto index_i = indices[i];
493+
offset += strides[i] * index_i;
494+
}
495+
return offset;
496+
}
497+
459498
// Helper for vec loading aligned data
460499
template <typename Tp, int n>
461500
inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
@@ -475,4 +514,76 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
475514
}
476515

477516
bool gpu_has_xmx(sycl::device &dev);
517+
518+
template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
519+
if (LIKELY(!g_ggml_sycl_debug)) {
520+
return;
521+
}
522+
std::stringstream ss;
523+
ss << prefix << "=[";
524+
for (std::size_t i = 0; i < N - 1; ++i) {
525+
ss << array[i] << ", ";
526+
}
527+
if constexpr (N > 0) {
528+
ss << array[N - 1];
529+
}
530+
ss << "]";
531+
GGML_SYCL_DEBUG("%s", ss.str().c_str());
532+
}
533+
534+
inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
535+
const std::string & suffix = "") {
536+
if (LIKELY(!g_ggml_sycl_debug)) {
537+
return;
538+
}
539+
GGML_SYCL_DEBUG("%s=", prefix.c_str());
540+
if (tensor) {
541+
GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
542+
debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
543+
debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
544+
if (!ggml_is_contiguous(tensor)) {
545+
GGML_SYCL_DEBUG(";strided");
546+
}
547+
if (ggml_is_permuted(tensor)) {
548+
GGML_SYCL_DEBUG(";permuted");
549+
}
550+
} else {
551+
GGML_SYCL_DEBUG("nullptr");
552+
}
553+
GGML_SYCL_DEBUG("%s", suffix.c_str());
554+
}
555+
556+
// Use scope_op_debug_print to log operations coming from running a model
557+
struct scope_op_debug_print {
558+
// Use string_views to avoid the cost of creating a string and concatenating them
559+
// string_views must be alive for as long as the object is alive
560+
// scope_op_debug_print are used with string literals in practice which are stored in constant space so always accessible
561+
scope_op_debug_print(const std::string_view & func, const std::string_view & func_suffix, const ggml_tensor * dst,
562+
std::size_t num_src, const std::string_view & suffix = "") :
563+
func(func),
564+
func_suffix(func_suffix) {
565+
if (LIKELY(!g_ggml_sycl_debug)) {
566+
return;
567+
}
568+
GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
569+
debug_print_tensor(" dst", dst);
570+
if (dst) {
571+
for (std::size_t i = 0; i < num_src; ++i) {
572+
debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
573+
}
574+
}
575+
GGML_SYCL_DEBUG("%s\n", suffix.data());
576+
}
577+
578+
scope_op_debug_print(const std::string_view & func, const ggml_tensor * dst, std::size_t num_src,
579+
const std::string_view & suffix = "") :
580+
scope_op_debug_print(func, "", dst, num_src, suffix) {}
581+
582+
~scope_op_debug_print() { GGML_SYCL_DEBUG("[SYCL][OP] call %s%s done\n", func.data(), func_suffix.data()); }
583+
584+
private:
585+
std::string_view func;
586+
std::string_view func_suffix;
587+
};
588+
478589
#endif // GGML_SYCL_COMMON_HPP

0 commit comments

Comments
 (0)