@@ -3769,8 +3769,42 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
37693769 std::ofstream logfile;
37703770 logfile.open(filename);
37713771 for(size_t i=0; i<total_elements; i++){
3772+ logfile << local_buf[i] <<" ";
3773+ if((i+1)%20 ==0) logfile <<std::endl;
3774+ }
3775+ logfile <<std::endl;
3776+ logfile.close();
3777+
3778+ if(src_on_device) ggml_sycl_host_free(local_buf);
3779+ }
3780+
3781+ void log_ggml_var_device_fp16(const char*name, sycl::half *src, size_t total_elements, bool src_on_device){
3782+ if(!g_ggml_sycl_debug) return;
3783+ if(!src){
3784+ printf("GGML Tensor:%s skip to save for NULL pointer\n", name);
3785+ return;
3786+ }
3787+ char filename[1024];
3788+ sprintf(filename, "%s.txt", name);
3789+ printf("GGML Tensor:%s save to %s\n", name, filename);
3790+
3791+ size_t total_size = total_elements*sizeof(sycl::half);
3792+ sycl::half *local_buf = NULL;
3793+ if(src_on_device) {
3794+ local_buf = (sycl::half *) ggml_sycl_host_malloc(total_size);
3795+ ggml_sycl_set_device(g_main_device);
3796+ dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0];
3797+ main_stream->memcpy(local_buf, src, total_size).wait();
3798+ }
3799+ else {
3800+ local_buf = (sycl::half *)src;
3801+ }
3802+
3803+ std::ofstream logfile;
3804+ logfile.open(filename);
3805+ for(size_t i=0; i<total_elements; i++){
3806+ logfile << local_buf[i] <<" ";
37723807 if((i+1)%20 ==0) logfile <<std::endl;
3773- else logfile << local_buf[i] <<" ";
37743808 }
37753809 logfile <<std::endl;
37763810 logfile.close();
@@ -14126,7 +14160,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
1412614160 src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
1412714161 dst_f16.get(), dpct::library_data_t::real_half, ldc,
1412814162 dpct::library_data_t::real_half)));
14129-
14163+ g_sycl_handles[id]->wait();
1413014164 const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
1413114165 to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
1413214166 }
@@ -14159,6 +14193,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
1415914193 dpct::get_value(&alpha, *g_sycl_handles[id]), src0_ddf_i, ne00,
1416014194 src1_ddf1_i, ne10, dpct::get_value(&beta, *g_sycl_handles[id]),
1416114195 dst_dd_i, ldc)));
14196+ g_sycl_handles[id]->wait();
1416214197 }
1416314198 (void) dst;
1416414199 (void) src1_ddq_i;
@@ -15295,8 +15330,8 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
1529515330 sycl_pool_alloc<sycl::half> dst_f16;
1529615331 char * dst_t;
1529715332
15298- dpct::library_data_t cu_compute_type = dpct::library_data_t::real_half ;
15299- dpct::library_data_t cu_data_type = dpct::library_data_t::real_half ;
15333+ dpct::library_data_t cu_compute_type = dpct::library_data_t::real_float ;
15334+ dpct::library_data_t cu_data_type = dpct::library_data_t::real_float ;
1530015335
1530115336 // dst strides
1530215337 size_t nbd2 = dst->nb[2];
@@ -15308,15 +15343,13 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
1530815343 const float alpha_f32 = 1.0f;
1530915344 const float beta_f32 = 0.0f;
1531015345
15311- const void * alpha = &alpha_f16 ;
15312- const void * beta = &beta_f16 ;
15346+ const void * alpha = &alpha_f32 ;
15347+ const void * beta = &beta_f32 ;
1531315348
1531415349 // TODO: Renable (dst->op_params[0] =! GGML_PREC_DEFAULT) pathway
15315- // once oneMKL open source supports half, half, float, float: datatypes
15316- dst_t = (char *) dst_f16.alloc(ne_dst);
15350+ // oneMKL open source supports half, half, float, float: datatypes
1531715351
15318- nbd2 /= sizeof(float) / sizeof(sycl::half);
15319- nbd3 /= sizeof(float) / sizeof(sycl::half);
15352+ dst_t = (char *) dst_ddf;
1532015353
1532115354 GGML_ASSERT(ne12 % ne02 == 0);
1532215355 GGML_ASSERT(ne13 % ne03 == 0);
@@ -15356,6 +15389,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
1535615389 nb11 / nb10, nb12 / nb10, beta,
1535715390 (char *)dst_t, cu_data_type, ne01, nb2 / nb0,
1535815391 ne12 * ne13, cu_compute_type)));
15392+ g_sycl_handles[g_main_device]->wait();
1535915393 } else {
1536015394 const int ne23 = ne12*ne13;
1536115395
@@ -15386,7 +15420,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
1538615420 nb02, nb03, nb12_scaled, nb13_scaled,
1538715421 nbd2, nbd3, r2, r3, item_ct1);
1538815422 });
15389- });
15423+ }).wait() ;
1539015424 }
1539115425 SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
1539215426 *g_sycl_handles[g_main_device], oneapi::mkl::transpose::trans,
@@ -15397,11 +15431,10 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
1539715431 dpct::library_data_t::real_half, nb11 / nb10, beta,
1539815432 (void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23,
1539915433 cu_compute_type)));
15434+ g_sycl_handles[g_main_device]->wait();
1540015435 }
1540115436#endif
1540215437
15403- const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
15404- to_fp32_sycl(dst_f16.get(), dst_ddf, ne_dst, main_stream);
1540515438}
1540615439catch (sycl::exception const &exc) {
1540715440 std::cerr << exc.what() << "Exception caught at file:" << __FILE__
0 commit comments