Skip to content

Commit 6fe27eb

Browse files
author
Alberto Cabrera
committed
Explicit inlining and unroll
1 parent 6afb367 commit 6fe27eb

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

ggml/src/ggml-sycl/mmvq.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r
4040
// Y block index that aligns with ibx
4141
const int iby = i * block_type::block_to_q8_1_ratio();
4242

43+
#pragma unroll
4344
for (int elem = 0; elem < block_elements_per_subgroup; elem += WARP_SIZE) {
4445
// x block quant index when casting the quants to int
4546
const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);

ggml/src/ggml-sycl/vecdotq.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0> {
265265
using q4_0_block = ggml_sycl_reordered::block_q_t<GGML_TYPE_Q4_0>;
266266
using q4_0_traits = typename q4_0_block::traits;
267267

268-
float vec_dot_q4_0_q8_1_impl(const int * v, const int * u, const float & d4, const sycl::half2 & ds8) {
268+
__dpct_inline__ float vec_dot_q4_0_q8_1_impl(const int * v, const int * u, const float & d4, const sycl::half2 & ds8) {
269269
int sumi = 0;
270270

271271
#pragma unroll
@@ -284,7 +284,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0> {
284284
return d4 * (sumi * ds8f.x() - (8 * q4_0_traits::vdr_mmvq / q4_0_traits::qi) * ds8f.y());
285285
}
286286

287-
float operator()(const void * __restrict__ vbq, const int ibx_offset, const int d_offset,
287+
__dpct_inline__ float operator()(const void * __restrict__ vbq, const int ibx_offset, const int d_offset,
288288
const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
289289
const uint8_t * bq4_0 = static_cast<const uint8_t *>(vbq) + ibx_offset;
290290
const ggml_half d = *(reinterpret_cast<const ggml_half *>(static_cast<const uint8_t *>(vbq) + d_offset));

0 commit comments

Comments
 (0)