@@ -51,10 +51,11 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
5151 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
5252 },
5353 /* .lhs_info = */ {
54- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
55- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
56- /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
57- /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32_neon,
54+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
55+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
56+ /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
57+ /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32_neon,
58+ /* .require_aligned_m_idx = */ true ,
5859 },
5960 /* .rhs_info = */ {
6061 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon,
@@ -95,10 +96,11 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
9596 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
9697 },
9798 /* .lhs_info = */ {
98- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
99- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
100- /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
101- /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
99+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
100+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
101+ /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
102+ /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
103+ /* .require_aligned_m_idx = */ false ,
102104 },
103105 /* .rhs_info = */ {
104106 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -138,10 +140,11 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
138140 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
139141 },
140142 /* .lhs_info = */ {
141- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
142- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
143- /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
144- /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
143+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
144+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
145+ /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
146+ /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
147+ /* .require_aligned_m_idx = */ false ,
145148 },
146149 /* .rhs_info = */ {
147150 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -182,10 +185,11 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
182185 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
183186 },
184187 /* .lhs_info = */ {
185- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
186- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
187- /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
188- /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
188+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
189+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
190+ /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
191+ /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
192+ /* .require_aligned_m_idx = */ false ,
189193 },
190194 /* .rhs_info = */ {
191195 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -225,10 +229,11 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
225229 /* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
226230 },
227231 /* .lhs_info = */ {
228- /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
229- /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
230- /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
231- /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
232+ /* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
233+ /* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
234+ /* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
235+ /* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
236+ /* .require_aligned_m_idx = */ false ,
232237 },
233238 /* .rhs_info = */ {
234239 /* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
0 commit comments