3939#include " ggml-sycl/backend.hpp"
4040#include " ggml-sycl/presets.hpp"
4141#include " ggml-sycl/gemm.hpp"
42+ #include " ggml-sycl/sycl_hw.hpp"
4243
4344static bool g_sycl_loaded = false ;
4445
@@ -63,14 +64,18 @@ static ggml_sycl_device_info ggml_sycl_init() {
6364 for (int i = 0 ; i < info.device_count ; ++i) {
6465 info.devices [i].vmm = 0 ;
6566 dpct::device_info prop;
67+ sycl::device device = dpct::dev_mgr::instance ().get_device (i);
68+
6669 SYCL_CHECK (CHECK_TRY_ERROR (dpct::get_device_info (
67- prop, dpct::dev_mgr::instance (). get_device (i) )));
70+ prop, device )));
6871
6972 info.default_tensor_split [i] = total_vram;
7073 total_vram += prop.get_global_mem_size ();
7174
7275 info.devices [i].cc =
7376 100 * prop.get_major_version () + 10 * prop.get_minor_version ();
77+ info.devices [i].hw_info = get_device_hw_info (&device);
78+ info.devices [i].opt_feature = check_gpu_optimize_feature (info.devices [i].hw_info .family );
7479
7580 info.max_work_group_sizes [i] = prop.get_max_work_group_size ();
7681 }
@@ -109,6 +114,27 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
109114 global_mem_size, device.get_info <sycl::info::device::driver_version>().c_str ());
110115}
111116
117+ void print_device_opt_feature (int device_count) {
118+ GGML_LOG_INFO (" SYCL Optimization Feature:\n " );
119+ GGML_LOG_INFO (
120+ " |ID| Device Type|Reorder|\n " );
121+ GGML_LOG_INFO (
122+ " |--|-------------------|-------|\n " );
123+ std::map<std::string, size_t > DeviceNums;
124+ for (int id = 0 ; id < device_count; ++id) {
125+ sycl::device device = dpct::dev_mgr::instance ().get_device (id);
126+ std::string backend_type = get_device_backend_and_type (device);
127+ int type_id = DeviceNums[backend_type]++;
128+ std::stringstream device_type;
129+ device_type << " [" << backend_type << " :" << std::to_string (type_id)
130+ << " ]" ;
131+ std::string device_type_s = device_type.str ();
132+ device_type_s = std::regex_replace (device_type_s, std::regex (" ext_oneapi_" ), " " );
133+ GGML_LOG_INFO (" |%2d|%19s|%7s|\n " , id, device_type_s.c_str (),
134+ ggml_sycl_info ().devices [id].opt_feature .reorder ? " Y" : " N" );
135+ }
136+
137+ }
112138void ggml_backend_sycl_print_sycl_devices () {
113139 GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_print_sycl_devices\n " );
114140 int device_count = dpct::dev_mgr::instance ().device_count ();
@@ -137,6 +163,8 @@ void ggml_backend_sycl_print_sycl_devices() {
137163 << " ]" ;
138164 print_device_detail (id, device, device_type.str ());
139165 }
166+
167+ print_device_opt_feature (device_count);
140168}
141169
142170static inline int get_sycl_env (const char *env_name, int default_val) {
@@ -172,18 +200,6 @@ static void ggml_check_sycl() try {
172200 GGML_LOG_INFO (" GGML_SYCL_F16: no\n " );
173201#endif
174202
175- #if defined(GGML_SYCL_INTEL_TARGET)
176- GGML_LOG_INFO (" GGML_SYCL_INTEL_TARGET: yes\n " );
177- #endif
178-
179- #if defined(GGML_SYCL_NVIDIA_TARGET)
180- GGML_LOG_INFO (" GGML_SYCL_NVIDIA_TARGET: yes\n " );
181- #endif
182-
183- #if defined(GGML_SYCL_AMD_TARGET)
184- GGML_LOG_INFO (" GGML_SYCL_AMD_TARGET: yes\n " );
185- #endif
186-
187203/* NOT REMOVE, keep it for next optimize for XMX.
188204#if defined(SYCL_USE_XMX)
189205 fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
@@ -253,11 +269,13 @@ struct ggml_backend_sycl_buffer_context {
253269 void * dev_ptr = nullptr ;
254270 queue_ptr stream;
255271 std::string name;
272+ optimize_feature opt_feature;
256273
257274 ggml_backend_sycl_buffer_context (int device, void * dev_ptr, queue_ptr stream) :
258275 device (device), dev_ptr(dev_ptr), stream(stream) {
259276 check_allow_gpu_index (device);
260277 name = (GGML_SYCL_NAME + std::to_string (device));
278+ opt_feature = ggml_sycl_info ().devices [device].opt_feature ;
261279 }
262280
263281
@@ -368,14 +386,13 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
368386 .wait ()));
369387 free (host_buf);
370388
371- #if defined(GGML_SYCL_INTEL_TARGET)
372- if (tensor->type == GGML_TYPE_Q4_0) {
373- size_t ncols = tensor->ne [0 ];
374- size_t nrows = tensor->ne [1 ];
375- reorder_qw ((char *)tensor->data + offset, ncols, nrows, size, stream);
389+ if (ctx->opt_feature .reorder ) {
390+ if (tensor->type == GGML_TYPE_Q4_0) {
391+ size_t ncols = tensor->ne [0 ];
392+ size_t nrows = tensor->ne [1 ];
393+ reorder_qw ((char *)tensor->data + offset, ncols, nrows, size, stream);
394+ }
376395 }
377- #endif
378-
379396}
380397catch (sycl::exception const &exc) {
381398 std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -2655,11 +2672,11 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
26552672 get_rows_sycl_float (ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
26562673 break ;
26572674 case GGML_TYPE_Q4_0:
2658- # if defined(GGML_SYCL_INTEL_TARGET)
2659- get_rows_sycl_reorder<QK4_0, QR4_0, dequantize_q4_0_reorder>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
2660- # else
2661- get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
2662- # endif
2675+ if (ctx. opt_feature . reorder ) {
2676+ get_rows_sycl_reorder<QK4_0, QR4_0, dequantize_q4_0_reorder>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
2677+ } else {
2678+ get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
2679+ }
26632680 break ;
26642681 case GGML_TYPE_Q4_1:
26652682 get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
@@ -2735,7 +2752,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
27352752 // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
27362753 ggml_sycl_pool_alloc<sycl::half> src0_as_f16 (ctx.pool ());
27372754 if (src0->type != GGML_TYPE_F16) {
2738- const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src0->type );
2755+ const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src0->type , ctx );
27392756 GGML_ASSERT (to_fp16_sycl != nullptr );
27402757 size_t ne = row_diff*ne00;
27412758 src0_as_f16.alloc (ne);
@@ -2747,7 +2764,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
27472764
27482765 ggml_sycl_pool_alloc<sycl::half> src1_as_f16 (ctx.pool ());
27492766 if (src1->type != GGML_TYPE_F16) {
2750- const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src1->type );
2767+ const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src1->type , ctx );
27512768 GGML_ASSERT (to_fp16_sycl != nullptr );
27522769 size_t ne = src1_ncols*ne10;
27532770 src1_as_f16.alloc (ne);
@@ -2768,13 +2785,13 @@ inline void ggml_sycl_op_mul_mat_sycl(
27682785 src1_ptr, dpct::library_data_t ::real_half, ne10, &beta_f16,
27692786 dst_f16.get (), dpct::library_data_t ::real_half, ldc,
27702787 dpct::library_data_t ::real_half)));
2771- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (GGML_TYPE_F16);
2788+ const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (GGML_TYPE_F16, ctx );
27722789 to_fp32_sycl (dst_f16.get (), dst_dd_i, row_diff*src1_ncols, stream);
27732790#else
27742791 auto dnnl_stream = ctx.stream_dnnl (stream);
27752792 DnnlGemmWrapper::row_gemm (dnnl_stream, false , true , src1_ncols, row_diff, ne10, src1_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
27762793 src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(), dst_f16.get (), DnnlGemmWrapper::to_dt<sycl::half>());
2777- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (GGML_TYPE_F16);
2794+ const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (GGML_TYPE_F16, ctx );
27782795 to_fp32_sycl (dst_f16.get (), dst_dd_i, row_diff* src1_ncols, stream);
27792796#endif
27802797 }
@@ -2783,13 +2800,13 @@ inline void ggml_sycl_op_mul_mat_sycl(
27832800 ggml_sycl_pool_alloc<float > src0_ddq_as_f32 (ctx.pool ());
27842801 ggml_sycl_pool_alloc<float > src1_ddq_as_f32 (ctx.pool ());
27852802 if (src0->type != GGML_TYPE_F32) {
2786- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (src0->type );
2803+ const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (src0->type , ctx );
27872804 GGML_ASSERT (to_fp32_sycl != nullptr );
27882805 src0_ddq_as_f32.alloc (row_diff*ne00);
27892806 to_fp32_sycl (src0_dd_i, src0_ddq_as_f32.get (), row_diff*ne00, stream);
27902807 }
27912808 if (src1->type != GGML_TYPE_F32) {
2792- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (src1->type );
2809+ const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl (src1->type , ctx );
27932810 GGML_ASSERT (to_fp32_sycl != nullptr );
27942811 src1_ddq_as_f32.alloc (src1_ncols*ne10);
27952812 to_fp32_sycl (src1_ddf_i, src1_ddq_as_f32.get (), src1_ncols*ne10, stream);
@@ -3535,7 +3552,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
35353552 // convert src1 to fp16
35363553 ggml_sycl_pool_alloc<sycl::half> src1_f16_alloc (ctx.pool ());
35373554 if (src1->type != GGML_TYPE_F16) {
3538- const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src1->type );
3555+ const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl (src1->type , ctx );
35393556 const int64_t ne_src1 = ggml_nelements (src1);
35403557 src1_f16_alloc.alloc (ne_src1);
35413558 GGML_ASSERT (to_fp16_sycl != nullptr );
0 commit comments