@@ -51,11 +51,10 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
5151 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
5252 },
5353 /* .lhs_info = */ {
54- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32 ,
55- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32 ,
54+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon ,
55+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon ,
5656 /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
5757 /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32_neon,
58- /* .require_aligned_m_idx = */ true ,
5958 },
6059 /* .rhs_info = */ {
6160 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon,
@@ -100,7 +99,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
10099 /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
101100 /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
102101 /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
103- /* .require_aligned_m_idx = */ false ,
104102 },
105103 /* .rhs_info = */ {
106104 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -144,7 +142,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
144142 /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
145143 /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
146144 /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
147- /* .require_aligned_m_idx = */ false ,
148145 },
149146 /* .rhs_info = */ {
150147 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -189,7 +186,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
189186 /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
190187 /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
191188 /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
192- /* .require_aligned_m_idx = */ false ,
193189 },
194190 /* .rhs_info = */ {
195191 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -233,7 +229,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
233229 /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
234230 /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
235231 /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
236- /* .require_aligned_m_idx = */ false ,
237232 },
238233 /* .rhs_info = */ {
239234 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
0 commit comments