File tree Expand file tree Collapse file tree 1 file changed +5
-6
lines changed Expand file tree Collapse file tree 1 file changed +5
-6
lines changed Original file line number Diff line number Diff line change @@ -11679,17 +11679,16 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
1167911679    }
1168011680    ggml_barrier (params -> threadpool );
1168111681
11682-     
11683- 
11684-     #ifdef  __AVX2__ 
11682+       
11683+     #if  defined(__AVX__ ) &&  !defined(__AVX512F__ )
1168511684        #define  GGML_F32X  GGML_F32x8
1168611685        #define  GGML_F32X_SET1  GGML_F32x8_SET1
1168711686        #define  GGML_F32X_LOAD  GGML_F32x8_LOAD
1168811687        #define  GGML_F32X_STORE  GGML_F32x8_STORE
1168911688        #define  GGML_F32X_MUL  GGML_F32x8_MUL
1169011689        #define  GGML_F32X_FMA  GGML_F32x8_FMA
11691-         #define  VECTOR_SIZE  8
11692-     #elif  __AVX512F__ 
11690+         #define  WKV_VECTOR_SIZE  8
11691+     #elif  defined( __AVX512F__ ) 
1169311692        #define  GGML_F32X  GGML_F32x16
1169411693        #define  GGML_F32X_SET1  GGML_F32x16_SET1
1169511694        #define  GGML_F32X_LOAD  GGML_F32x16_LOAD
@@ -11763,7 +11762,7 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
1176311762                    }
1176411763
1176511764                    // Handle remaining elements, this will not be used. 
11766-                     for  (int64_t  j  =  vec_count  *  VECTOR_SIZE ; j  <  head_size ; j ++ ) {
11765+                     for  (int64_t  j  =  vec_count  *  WKV_VECTOR_SIZE ; j  <  head_size ; j ++ ) {
1176711766                        size_t  t_h_j_offset  =  t_h_offset  +  j ;
1176811767                        size_t  h_2d_i_j_offset  =  h_2d_i_offset  +  j ;
1176911768                        float  v_val  =  v [t_h_j_offset ];
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments