File tree Expand file tree Collapse file tree 5 files changed +15
-7
lines changed Expand file tree Collapse file tree 5 files changed +15
-7
lines changed Original file line number Diff line number Diff line change @@ -290,9 +290,9 @@ if (GGML_CPU_ALL_VARIANTS)
290290    ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 FMA)
291291    ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512)
292292    ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
293+     ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
293294    if  (NOT  MSVC )
294-         # MSVC doesn't support AVX-VNNI or AMX 
295-         ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
295+         # MSVC doesn't support AMX 
296296        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
297297    endif ()
298298else  ()
Original file line number Diff line number Diff line change @@ -215,8 +215,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
215215                list (APPEND  ARCH_DEFINITIONS GGML_SSE42)
216216            endif ()
217217            if  (GGML_AVX_VNNI)
218-                 # MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2 
219-                 #list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI) 
218+                 list (APPEND  ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
220219            endif ()
221220        else  ()
222221            if  (GGML_NATIVE)
Original file line number Diff line number Diff line change @@ -194,9 +194,12 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) {
194194}
195195
196196static  inline  __m256i mul_sum_us8_pairs_int32x8 (const  __m256i ax, const  __m256i sy) {
197- #if  defined(__AVXVNNI__) || (defined( __AVX512VNNI__) && defined(__AVX512VL__) )
197+ #if  defined(__AVX512VNNI__) && defined(__AVX512VL__)
198198    const  __m256i zero = _mm256_setzero_si256 ();
199199    return  _mm256_dpbusd_epi32 (zero, ax, sy);
200+ #elif  defined(__AVXVNNI__)
201+     const  __m256i zero = _mm256_setzero_si256 ();
202+     return  _mm256_dpbusd_avx_epi32 (zero, ax, sy);
200203#else 
201204    //  Perform multiplication and create 16-bit values
202205    const  __m256i dot = _mm256_maddubs_epi16 (ax, sy);
Original file line number Diff line number Diff line change @@ -103,10 +103,14 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
103103}
104104
105105static  inline  __m256  mul_sum_us8_pairs_float (const  __m256i  ax , const  __m256i  sy ) {
106- #if  defined(__AVXVNNI__ )  ||  (defined( __AVX512VNNI__ ) &&  defined(__AVX512VL__ ) )
106+ #if  defined(__AVX512VNNI__ ) &&  defined(__AVX512VL__ )
107107    const  __m256i  zero  =  _mm256_setzero_si256 ();
108108    const  __m256i  summed_pairs  =  _mm256_dpbusd_epi32 (zero , ax , sy );
109109    return  _mm256_cvtepi32_ps (summed_pairs );
110+ #elif  defined(__AVXVNNI__ )
111+     const  __m256i  zero  =  _mm256_setzero_si256 ();
112+     const  __m256i  summed_pairs  =  _mm256_dpbusd_avx_epi32 (zero , ax , sy );
113+     return  _mm256_cvtepi32_ps (summed_pairs );
110114#else 
111115    // Perform multiplication and create 16-bit values 
112116    const  __m256i  dot  =  _mm256_maddubs_epi16 (ax , sy );
Original file line number Diff line number Diff line change @@ -1000,8 +1000,10 @@ class tinyBLAS_Q0_AVX {
10001000
10011001    inline  __m256 updot (__m256i u, __m256i s) {
10021002        __m256i res;
1003- #if  defined(__AVXVNNI__) || (defined( __AVX512VNNI__) && defined(__AVX512VL__) )
1003+ #if  defined(__AVX512VNNI__) && defined(__AVX512VL__)
10041004        res = _mm256_dpbusd_epi32 (_mm256_setzero_si256 (), u, s);
1005+ #elif  defined(__AVXVNNI__)
1006+         res = _mm256_dpbusd_avx_epi32 (_mm256_setzero_si256 (), u, s);
10051007#else 
10061008        res = _mm256_madd_epi16 (_mm256_set1_epi16 (1 ), _mm256_maddubs_epi16 (u, s));
10071009#endif 
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments