@@ -626,7 +626,7 @@ extern "C" IQK_API bool iqk_mul_mat_moe(long Nx, long Ny, long ne00, int ne11,
626626 this_info.s += ix;
627627 int this_nrc_x = ix + k_x_step <= nrc_x ? k_x_step : nrc_x - ix;
628628 if (f.size () < row_size_qx*this_nrc_x) f.resize (row_size_qx*this_nrc_x);
629- if (!iqk_dequantize_ktquants (typeA, ne00, (const char *)A + (first_x + ix)*strideA, strideA, f.data (), ne00, this_nrc_x)) {
629+ if (!iqk_convert_repack (typeA, ne00, (const char *)A + (first_x + ix)*strideA, strideA, f.data (), ne00, this_nrc_x)) {
630630 GGML_ABORT (" Fatal error" );
631631 }
632632 mm.mul_mat_NxM (ne00, f.data (), row_size_qx, this_info, this_nrc_x, Ny);
@@ -696,10 +696,10 @@ extern "C" IQK_API bool iqk_moe_fused_up_gate(long Nx, long Ny, long ne00, int n
696696 if (f.size () < 2 *row_size_qx*this_nrc_x) f.resize (2 *row_size_qx*this_nrc_x);
697697 auto Xu = f.data ();
698698 auto Xg = f.data () + row_size_qx*this_nrc_x;
699- if (!iqk_dequantize_ktquants (typeA, ne00, (const char *)Aup + (first_x + ix)*strideA, strideA, Xu, ne00, this_nrc_x)) {
699+ if (!iqk_convert_repack (typeA, ne00, (const char *)Aup + (first_x + ix)*strideA, strideA, Xu, ne00, this_nrc_x)) {
700700 GGML_ABORT (" Fatal error" );
701701 }
702- if (!iqk_dequantize_ktquants (typeA, ne00, (const char *)Agate + (first_x + ix)*strideA, strideA, Xg, ne00, this_nrc_x)) {
702+ if (!iqk_convert_repack (typeA, ne00, (const char *)Agate + (first_x + ix)*strideA, strideA, Xg, ne00, this_nrc_x)) {
703703 GGML_ABORT (" Fatal error" );
704704 }
705705 mm.mul_mat_up_gate_NxM (ne00, Xu, Xg, row_size_qx, this_info, this_nrc_x, Ny, unary_op);
0 commit comments