@@ -3477,10 +3477,9 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
34773477    }
34783478}
34793479
3480- #ifdef  GGML_USE_CPU_AARCH64 
3481- static  void  repack_q4_0_to_q4_0_4_bl (struct  ggml_tensor  *  t , int  interleave_block , const  void  *  data , size_t  data_size ) {
3480+ #ifdef  GGML_USE_RUNTIME_REPACK 
3481+ static  int  repack_q4_0_to_q4_0_4_bl (struct  ggml_tensor  *  t , int  interleave_block , const  void  *  data , size_t  data_size ) {
34823482    GGML_ASSERT (t -> type  ==  GGML_TYPE_Q4_0 );
3483-     GGML_ASSERT (t -> ne [0 ] % 8  ==  0 );
34843483    GGML_ASSERT (interleave_block  ==  4  ||  interleave_block  ==  8 );
34853484
34863485    block_q4_0x4  * dst  =  (block_q4_0x4  * )t -> data ;
@@ -3492,23 +3491,26 @@ static void repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_bloc
34923491
34933492    GGML_ASSERT (data_size  ==  nrow  *  nblocks  *  sizeof (block_q4_0 ));
34943493
3494+     if  (nrow  % nrows_interleaved  !=  0  ||  t -> ne [0 ] % 8  !=  0 ) {
3495+         return  -1 ;
3496+     }
3497+ 
34953498    for  (int  b  =  0 ; b  <  nrow ; b  +=  nrows_interleaved ) {
3496-         for  (int64_t  x  =  0 ; x  <  nblocks ; x ++ )
3497-         {
3499+         for  (int64_t  x  =  0 ; x  <  nblocks ; x ++ ) {
34983500            for  (int  i  =  0 ; i  <  nrows_interleaved ; i ++ ) {
34993501                dst_tmp [i ] =  src [x  +  i  *  nblocks ];
35003502            }
35013503            * dst ++  =  make_block_q4_0x4 (dst_tmp , interleave_block , 0x88 );
35023504        }
35033505        src  +=  nrows_interleaved  *  nblocks ;
35043506    }
3507+     return  0 ;
35053508
35063509    GGML_UNUSED (data_size );
35073510}
35083511
3509- static  void  repack_q4_0_to_q4_0_8_bl (struct  ggml_tensor  * t , int  interleave_block ,   const  void  *  data , size_t  data_size ) {
3512+ static  int  repack_q4_0_to_q4_0_8_bl (struct  ggml_tensor  * t , int  interleave_block , const  void  *  data , size_t  data_size ) {
35103513    GGML_ASSERT (t -> type  ==  GGML_TYPE_Q4_0 );
3511-     GGML_ASSERT (t -> ne [0 ] % 8  ==  0 );
35123514    GGML_ASSERT (interleave_block  ==  8 );
35133515
35143516    block_q4_0x8  * dst  =  (block_q4_0x8 * )t -> data ;
@@ -3520,6 +3522,10 @@ static void repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block
35203522
35213523    GGML_ASSERT (data_size  ==  nrow  *  nblocks  *  sizeof (block_q4_0 ));
35223524
3525+     if  (nrow  % nrows_interleaved  !=  0  ||  t -> ne [0 ] % 8  !=  0 ) {
3526+         return  -1 ;
3527+     }
3528+ 
35233529    for  (int  b  =  0 ; b  <  nrow ; b  +=  nrows_interleaved ) {
35243530        for  (int64_t  x  =  0 ; x  <  nblocks ; x ++ ) {
35253531            for  (int  i   =  0 ; i  <  nrows_interleaved ; i ++  ) {
@@ -3529,29 +3535,26 @@ static void repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block
35293535        }
35303536        src  +=  nrows_interleaved  *  nblocks ;
35313537    }
3538+     return  0 ;
35323539
35333540    GGML_UNUSED (data_size );
35343541}
35353542
35363543// Prepare for optimized kernels if applicable 
35373544int  ggml_prepare_optimal_kernel (struct  ggml_tensor  *  cur , const  void  *  data , size_t  data_size ) {
35383545    GGML_ASSERT (cur -> type  ==  GGML_TYPE_Q4_0 );
3539-     int  ret  =  -1 ;
35403546#if  defined(__ARM_ARCH )
35413547    if  (ggml_cpu_has_sve () &&  ggml_cpu_has_matmul_int8 () &&  ggml_cpu_get_sve_cnt () ==  QK8_0 ) {
3542-         repack_q4_0_to_q4_0_8_bl (cur , 8 , data , data_size );
3543-         ret  =  0 ;
3548+         return  repack_q4_0_to_q4_0_8_bl (cur , 8 , data , data_size );
35443549    }
35453550    else  if  (ggml_cpu_has_neon () &&  ggml_cpu_has_matmul_int8 ()) {
3546-         repack_q4_0_to_q4_0_4_bl (cur , 8 , data , data_size );
3547-         ret  =  0 ;
3551+         return  repack_q4_0_to_q4_0_4_bl (cur , 8 , data , data_size );
35483552    }
35493553    else  if  (ggml_cpu_has_neon ()) {
3550-         repack_q4_0_to_q4_0_4_bl (cur , 4 , data , data_size );
3551-         ret  =  0 ;
3554+         return  repack_q4_0_to_q4_0_4_bl (cur , 4 , data , data_size );
35523555    }
35533556#endif 
3554-     return  ret ;
3557+     return  -1 ;
35553558
35563559    GGML_UNUSED (cur );
35573560    GGML_UNUSED (data );
0 commit comments