diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
index 314a38fb2..bd340fadd 100644
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -111,7 +111,6 @@ option(GGML_BLAS                            "ggml: use BLAS"
 set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
                                             "ggml: BLAS library vendor")
 option(GGML_LLAMAFILE                       "ggml: use LLAMAFILE"                             OFF)
-option(GGML_IQK_MUL_MAT                     "ggml: use optimized iqk matrix multiplications"  ON)
 
 option(GGML_CUDA                            "ggml: use CUDA"                                  OFF)
 option(GGML_MUSA                            "ggml: use MUSA"                                  OFF)
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index b0db417de..ff29fab59 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -255,44 +255,41 @@ endif()
 
 set (GGML_SOURCES_IQK iqk/iqk_quantize.cpp)
 set (GGML_HEADERS_IQK iqk/iqk_config.h)
-if (GGML_IQK_MUL_MAT)
-    message(STATUS "Using optimized iqk matrix multiplications")
-    add_compile_definitions(GGML_USE_IQK_MULMAT)
-    set(GGML_SOURCES_IQK_MM iqk/iqk_mul_mat.cpp
-                            iqk/iqk_flash_attn.cpp
-                            iqk/fa/iqk_fa_576_512.cpp
-                            iqk/fa/iqk_fa_192_128.cpp
-                            iqk/fa/iqk_fa_256_256.cpp
-                            iqk/fa/iqk_fa_128_128.cpp
-                            iqk/fa/iqk_fa_96_96.cpp
-                            iqk/fa/iqk_fa_64_64.cpp
-                            iqk/iqk_gemm_floats.cpp
-                            iqk/iqk_gemm_kquants.cpp
-                            iqk/iqk_gemm_ktquants.cpp
-                            iqk/iqk_gemm_iquants.cpp
-                            iqk/iqk_gemm_iqk_quants.cpp
-                            iqk/iqk_gemm_1bit.cpp
-                            iqk/iqk_gemm_legacy_quants.cpp)
-    set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h
-                            iqk/iqk_flash_impl.h
-                            iqk/fa/iqk_fa_templates.h
-                            iqk/iqk_gemm_floats.h
-                            iqk/iqk_gemm_kquants.h
-                            iqk/iqk_gemm_ktquants.h
-                            iqk/iqk_gemm_iquants.h
-                            iqk/iqk_gemm_iqk_quants.h
-                            iqk/iqk_gemm_1bit.h
-                            iqk/iqk_gemm_legacy_quants.h)
-    if (GGML_IQK_FLASH_ATTENTION)
-        message(STATUS "Enabling IQK Flash Attention kernels")
-        add_compile_definitions(GGML_IQK_FLASH_ATTENTION)
-        if (GGML_IQK_FA_ALL_QUANTS)
-            message(STATUS "Including all IQK FA kernels")
-            add_compile_definitions(GGML_IQK_FA_ALL_QUANTS)
-        endif()
-    else()
-        message(STATUS "Disabling IQK Flash Attention kernels")
+message(STATUS "Using optimized iqk matrix multiplications")
+set(GGML_SOURCES_IQK_MM iqk/iqk_mul_mat.cpp
+                        iqk/iqk_flash_attn.cpp
+                        iqk/fa/iqk_fa_576_512.cpp
+                        iqk/fa/iqk_fa_192_128.cpp
+                        iqk/fa/iqk_fa_256_256.cpp
+                        iqk/fa/iqk_fa_128_128.cpp
+                        iqk/fa/iqk_fa_96_96.cpp
+                        iqk/fa/iqk_fa_64_64.cpp
+                        iqk/iqk_gemm_floats.cpp
+                        iqk/iqk_gemm_kquants.cpp
+                        iqk/iqk_gemm_ktquants.cpp
+                        iqk/iqk_gemm_iquants.cpp
+                        iqk/iqk_gemm_iqk_quants.cpp
+                        iqk/iqk_gemm_1bit.cpp
+                        iqk/iqk_gemm_legacy_quants.cpp)
+set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h
+                        iqk/iqk_flash_impl.h
+                        iqk/fa/iqk_fa_templates.h
+                        iqk/iqk_gemm_floats.h
+                        iqk/iqk_gemm_kquants.h
+                        iqk/iqk_gemm_ktquants.h
+                        iqk/iqk_gemm_iquants.h
+                        iqk/iqk_gemm_iqk_quants.h
+                        iqk/iqk_gemm_1bit.h
+                        iqk/iqk_gemm_legacy_quants.h)
+if (GGML_IQK_FLASH_ATTENTION)
+    message(STATUS "Enabling IQK Flash Attention kernels")
+    add_compile_definitions(GGML_IQK_FLASH_ATTENTION)
+    if (GGML_IQK_FA_ALL_QUANTS)
+        message(STATUS "Including all IQK FA kernels")
+        add_compile_definitions(GGML_IQK_FA_ALL_QUANTS)
     endif()
+else()
+    message(STATUS "Disabling IQK Flash Attention kernels")
 endif()
 
 if (GGML_LLAMAFILE)
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 220c0c997..05b764199 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -10,11 +10,9 @@
 
 #include "ggml-quants.h"
 #include "ggml-impl.h"
-#if GGML_USE_IQK_MULMAT
 #include "iqk/iqk_config.h"
 #include "iqk/iqk_mul_mat.h"
 #include "iqk/iqk_quantize.h"
-#endif
 
 
 #include <math.h>
@@ -3933,11 +3931,7 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int6
 }
 
 void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
-#ifdef GGML_USE_IQK_MULMAT
     iqk_quantize_row_q8_K(x, y, k);
-#else
-    quantize_row_q8_K_ref(x, y, k);
-#endif
 }
 
 //===================================== Dot ptoducts =================================
@@ -4023,11 +4017,9 @@ static inline __m128i get_scale_shuffle(int i) {
 #endif
 
 void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q4_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -4510,11 +4502,9 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q4_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     const int qk = QK8_1;
     const int nb = n / qk;
 
@@ -4802,7 +4792,6 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
 #ifdef __AVX2__
     const enum ggml_type vec_dot_type = GGML_TYPE_Q8_1;
 #else
@@ -4811,7 +4800,6 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, vec_dot_type, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -5167,11 +5155,9 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     const int qk = QK8_1;
     const int nb = n / qk;
 
@@ -5546,7 +5532,6 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q6_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
 #ifdef __AVX2__
     const enum ggml_type vec_dot_type = GGML_TYPE_Q8_1;
 #else
@@ -5555,13 +5540,11 @@ void ggml_vec_dot_q6_0_q8_0(int n, float * restrict s, size_t bs, const void * r
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q6_0, vx, bx, vec_dot_type, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     // TODO
     *s = 0;
 }
 
 void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
 #ifdef HAVE_FANCY_SIMD
     enum ggml_type dot_type = GGML_TYPE_Q8_1_X4;
 #else
@@ -5570,7 +5553,6 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q8_0, vx, bx, dot_type, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -11940,11 +11922,9 @@ void ggml_vec_dot_iq1_m_q8_K  (int n, float * restrict s, size_t bs, const void
 }
 
 void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_IQ4_NL, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
         return;
     }
-#endif
     assert(nrc == 1);
     UNUSED(nrc);
     UNUSED(bx);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index d8025a5a4..6b2136205 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -12,10 +12,8 @@
 #include "ggml.h"
 #include "ggml-aarch64.h"
 #include "iqk/iqk_quantize.h"
-#if GGML_USE_IQK_MULMAT
 #include "iqk/iqk_mul_mat.h"
 #include "iqk/iqk_config.h"
-#endif
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
@@ -715,15 +713,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q4_0,
         .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
         .vec_dot                  = ggml_vec_dot_q4_0_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
 #endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
-#endif
 #if defined (__ARM_FEATURE_MATMUL_INT8)
         .nrows                    = 2,
 #else
@@ -740,15 +734,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q4_1,
         .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
         .vec_dot                  = ggml_vec_dot_q4_1_q8_1,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_1_X4,
 #endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_1,
-#endif
 #if defined (__ARM_FEATURE_MATMUL_INT8)
         .nrows                    = 2,
 #else
@@ -791,14 +781,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q5_0,
         .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
         .vec_dot                  = ggml_vec_dot_q5_0_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -812,14 +798,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q5_1,
         .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
         .vec_dot                  = ggml_vec_dot_q5_1_q8_1,
-#if GGML_USE_IQK_MULMAT
 #ifdef __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_1_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_1,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -833,14 +815,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q6_0,
         .from_float_ref           = (ggml_from_float_t) quantize_row_q6_0_ref,
         .vec_dot                  = ggml_vec_dot_q6_0_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -855,7 +833,6 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
         .from_float_to_mat        = quantize_mat_q8_0,
         .vec_dot                  = ggml_vec_dot_q8_0_q8_0,
-#if GGML_USE_IQK_MULMAT
 #ifdef HAVE_FANCY_SIMD
         // Remember: we cannot add 128 to the Q8 quants and use iblock sum in Q8_1 to subtract as we do on Zen4 for pure AVX2
         //           because there the result of the _mm256_maddubs_epi16() instruction may overflow the int16_t range
@@ -864,9 +841,6 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
 #endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
-#endif
 #if defined (__ARM_FEATURE_MATMUL_INT8)
         .nrows                    = 2,
 #else
@@ -1288,14 +1262,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_iq4_nl,
         .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
         .vec_dot                  = ggml_vec_dot_iq4_nl_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined HAVE_FANCY_SIMD
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -1713,14 +1683,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_iq4_nl_r4,
         .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_r4_ref,
         .vec_dot                  = vec_dot_iq4_nl_r4_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -1747,14 +1713,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q4_0_r8,
         .from_float_ref           = (ggml_from_float_t)quantize_row_q4_0_r8_ref,
         .vec_dot                  = vec_dot_q4_0_r8_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -1768,14 +1730,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q8_0_r8,
         .from_float_ref           = (ggml_from_float_t)quantize_row_q8_0_r8_ref,
         .vec_dot                  = vec_dot_q8_0_r8_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -1789,14 +1747,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q5_0_r4,
         .from_float_ref           = (ggml_from_float_t)quantize_row_q5_0_r4_ref,
         .vec_dot                  = vec_dot_q5_0_r4_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -1810,14 +1764,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .from_float               = quantize_row_q6_0_r4,
         .from_float_ref           = (ggml_from_float_t)quantize_row_q6_0_r4_ref,
         .vec_dot                  = vec_dot_q6_0_r4_q8_0,
-#if GGML_USE_IQK_MULMAT
 #if defined __AVX2__
         .vec_dot_type             = GGML_TYPE_Q8_2_X4,
 #else
         .vec_dot_type             = GGML_TYPE_Q8_0_X4,
-#endif
-#else
-        .vec_dot_type             = GGML_TYPE_Q8_0,
 #endif
         .nrows                    = 1,
         .row_meta_size            = 0,
@@ -14501,10 +14451,6 @@ static void ggml_compute_forward_mul_mat(
     ggml_from_float_t        const from_float           = type_traits[vec_dot_type].from_float;
     int64_t                  const vec_dot_num_rows     = type_traits[type].nrows;
     int64_t                  const matmul_num_cols      = type_traits[type].ncols;
-#if !GGML_USE_IQK_MULMAT
-    ggml_from_float_to_mat_t const from_float_to_mat    = type_traits[vec_dot_type].from_float_to_mat;
-    int64_t                  const blck_size_interleave = type_traits[type].blck_size_interleave;
-#endif
     ggml_gemv_t              const gemv                 = type_traits[type].gemv;
     ggml_gemm_t              const gemm                 = type_traits[type].gemm;
 
@@ -14532,7 +14478,6 @@ static void ggml_compute_forward_mul_mat(
     const int64_t r3 = ne13 / ne03;
 #endif
 
-#if GGML_USE_IQK_MULMAT
     if (dst->type == GGML_TYPE_F32) {
         if (iqk_mul_mat_4d(ne01, ne11, ne00,
                     ne02, ne03, ne12, ne13, nb02, nb03, nb12, nb13, nb2/sizeof(float), nb3/sizeof(float),
@@ -14540,7 +14485,6 @@ static void ggml_compute_forward_mul_mat(
                     src1->type, src1->data, nb11,
                     (float *)dst->data, nb1/sizeof(float), ith, nth)) return;
     }
-#endif
 
 #if GGML_USE_LLAMAFILE
 
@@ -14579,62 +14523,23 @@ UseGgmlGemm1:;
 
         assert(params->wsize >= ne13*nbw3);
         if (src1->type != GGML_TYPE_F32) {
-#if GGML_USE_IQK_MULMAT
             char * work_buffer = wdata + ne13*nbw3 + ith*ne10*sizeof(float);
             GGML_ASSERT(params->wsize >= ne13*nbw3 + nth*ne10*sizeof(float));
             iqk_quantize_any(src1->type, vec_dot_type, ne10, ne11, ne12, ne13, nb10, nb11, nb12, nb13,
                     src1->data, wdata, work_buffer, type_traits[src1->type].to_float, from_float, ith, nth);
-#else
-            GGML_ABORT("fatal error");
-#endif
         }
         else {
-
-//#ifdef GGML_USE_IQK_MULMAT
-//        int ts = type_traits[vec_dot_type].type_size;
-//        int bs = type_traits[vec_dot_type].blck_size;
-//        int64_t blocks_per_row = ne10/bs;
-//        int64_t num_blocks = ne11*ne12*ne13*blocks_per_row;
-//        int gcd = simple_gcd(128, ts); // 128 is to cover cache line sizes for common architectures without getting involved
-//                                       // with trying to get it from ggml
-//        int64_t num_blocks_gcd = (num_blocks + gcd - 1)/gcd;
-//        int64_t block_per_thread = ((num_blocks_gcd + nth - 1)/nth)*gcd;
-//        int64_t first_block = ith*block_per_thread;
-//        int64_t last_block = MIN(num_blocks, first_block + block_per_thread);
-//        while (first_block < last_block) {
-//            int64_t i13 = first_block/(ne11*ne12*blocks_per_row);
-//            int64_t i12 = (first_block - i13*ne11*ne12*blocks_per_row)/(ne11*blocks_per_row);
-//            int64_t i11 = (first_block - (i13*ne12 + i12)*ne11*blocks_per_row)/blocks_per_row;
-//            int64_t i10 = first_block % blocks_per_row;
-//            int64_t blocks_to_do = MIN(blocks_per_row - i10, last_block - first_block);
-//            from_float((float *)((char *)src1->data + i13*nb13 + i12*nb12 + i11*nb11) + i10*bs,
-//                    (void *)(wdata + i13*nbw3 + i12*nbw2 + i11*nbw1 + i10*ts), blocks_to_do*bs);
-//            first_block += blocks_to_do;
-//        }
-//#else
-
-        for (int64_t i13 = 0; i13 < ne13; ++i13) {
-            for (int64_t i12 = 0; i12 < ne12; ++i12) {
-                int64_t i11_processed = 0;
-#if !GGML_USE_IQK_MULMAT
-                if ((ggml_n_dims(src1) == 2) && from_float_to_mat && gemm) {
-                    for (int64_t i11 = ith * 4; i11 < ne11 - ne11 % 4; i11 += nth * 4) {
-                        from_float_to_mat((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11),
-                                          (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
-                                          4, ne10, blck_size_interleave);
+            for (int64_t i13 = 0; i13 < ne13; ++i13) {
+                for (int64_t i12 = 0; i12 < ne12; ++i12) {
+                    int64_t i11_processed = 0;
+                    for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
+                        from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11),
+                                (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
+                                ne10);
                     }
-                    i11_processed = ne11 - ne11 % 4;
-                }
-#endif
-                for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
-                    from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11),
-                           (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
-                           ne10);
                 }
             }
         }
-//#endif
-        }
 
         ggml_barrier(params->shared);
 
@@ -14653,7 +14558,6 @@ UseGgmlGemm1:;
 
     const void * wdata    = (src1->type == vec_dot_type) ? src1->data : params->wdata;
 
-#if GGML_USE_IQK_MULMAT
     if (src1->type != vec_dot_type && dst->type == GGML_TYPE_F32) {
         const size_t row_size = ggml_row_size(vec_dot_type, ne10);
         if (iqk_mul_mat_4d(ne01, ne11, ne00,
@@ -14663,7 +14567,6 @@ UseGgmlGemm1:;
                     vec_dot_type, wdata, row_size,
                     (float *)dst->data, nb1/sizeof(float), ith, nth)) return;
     }
-#endif
 
 #if GGML_USE_LLAMAFILE
     if (src1->type != vec_dot_type) {
@@ -14901,7 +14804,6 @@ static void ggml_compute_forward_mul_mat_id(
         const int64_t nr0 = ne01; // src0 rows
         const int64_t nr1 = cne1; // src1 rows
                                   //
-#if GGML_USE_IQK_MULMAT
         if (ne13 == 1 && dst->type == GGML_TYPE_F32) {
            if (!iqk_mul_mat_moe(nr0, nr1, ne00, ne11,
                        src0->type, (const char *)src0_cur, nb01, ///ggml_type_size(src0->type),
@@ -14911,7 +14813,6 @@ static void ggml_compute_forward_mul_mat_id(
                 continue;
         }
 IQK_MulMat_Not_Available:;
-#endif
 
         if (((ggml_n_dims(src0) - 1) == 2) && gemv) {
             int64_t src0_cur_start = (ith * ne01) / nth;
@@ -15041,7 +14942,6 @@ IQK_MulMat_Not_Available:;
 #undef MMID_MATRIX_ROW
 }
 
-#if GGML_USE_IQK_MULMAT
 static void ggml_compute_forward_mul_mat_id_up_gate(
         const struct ggml_compute_params * params,
               struct ggml_tensor * dst) {
@@ -15176,32 +15076,10 @@ static void ggml_compute_forward_mul_mat_id_up_gate(
                             (float *)dst->data, nb1, nb2,
                             matrix_rows + cur_a*ne12, ith, nth)) GGML_ABORT("fatal error");
 
-//        if (nth%2 == 0) {
-//            const char * src0_d = ith%2 == 0 ? src0_1_cur : src0_2_cur;
-//            void *        dst_d = ith%2 == 0 ? dst1->data : dst2->data;
-//            if (!iqk_mul_mat_moe(nr0, nr1, ne00, ne11,
-//                        type, src0_d, nb01,
-//                        vec_dot_type, (const char *)wdata, row_size,
-//                        (float *)dst_d, nb1, nb2,
-//                        matrix_rows + cur_a*ne12, ith/2, nth/2)) GGML_ABORT("fatal error");
-//
-//        } else {
-//            if (!iqk_mul_mat_moe(nr0, nr1, ne00, ne11,
-//                        src0_1->type, (const char *)src0_1_cur, nb01,
-//                        vec_dot_type, (const char *)wdata, row_size,
-//                        (float *)dst1->data, nb1, nb2,
-//                        matrix_rows + cur_a*ne12, ith, nth)) GGML_ABORT("fatal error");
-//            if (!iqk_mul_mat_moe(nr0, nr1, ne00, ne11,
-//                        src0_2->type, (const char *)src0_2_cur, nb01,
-//                        vec_dot_type, (const char *)wdata, row_size,
-//                        (float *)dst2->data, nb1, nb2,
-//                        matrix_rows + cur_a*ne12, ith, nth)) GGML_ABORT("fatal error");
-//        }
     }
 
 #undef MMID_MATRIX_ROW
 }
-#endif
 
 // ggml_compute_forward_out_prod
 
@@ -18251,7 +18129,6 @@ static void ggml_compute_forward_flash_attn_ext_f16(
         scale /= softcap;
     }
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_flash_attn_noalibi(q->type, mask->type, max_bias,
                 q->ne[3], q->ne[2], q->nb[3], q->nb[2],
                 k->ne[3], k->ne[2], k->nb[3], k->nb[2],
@@ -18303,7 +18180,6 @@ static void ggml_compute_forward_flash_attn_ext_f16(
 //IQK_Flash_Attn_NotAvailable:;
 //        printf("iqk_flash was rejected\n");
 //    }
-#endif
 
     const uint32_t n_head      = neq2;
     const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2(n_head));
@@ -21930,7 +21806,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
                     const int64_t D  = MAX(Dk, Dv);
 
                     cur = 3*sizeof(float)*D*n_tasks; // 3x head size/thread
-#if GGML_USE_IQK_MULMAT
+#if GGML_IQK_FLASH_ATTENTION
                     size_t qsize = 0;
                     const struct ggml_tensor * q = node->src[0];
                     const struct ggml_tensor * k = node->src[1];
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 43be08857..b0c7c7a67 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -902,6 +902,7 @@ bool iqk_flash_attn_impl(int int_type_k,         // type of k
 #else  // IQK_IMPLEMENT
 
 extern "C" IQK_API bool iqk_mul_mat(int, long, long, long, int, const void *, long, int, const void *, long, float *, long, int, int) {
+    GGML_ABORT("=============================== Unsupported CPU");
     return false;
 }
 
@@ -911,11 +912,13 @@ extern "C" IQK_API bool iqk_mul_mat_4d(long /*Nx*/, long /*Ny*/, long /*ne00*/,
         int /*typeA*/, const void * /*A*/, long /*strideA*/,
         int /*typeB*/, const void * /*B*/, long /*strideB*/,
         float * /*C*/, long /*stride_C*/, int /*ith*/, int /*nth*/) {
+    GGML_ABORT("=============================== Unsupported CPU");
     return false;
 }
 
 extern "C" IQK_API bool iqk_mul_mat_moe(long, long, long, int, int, const void *, long, int, const void *, long, float *, long, long,
         const void *, int, int) {
+    GGML_ABORT("=============================== Unsupported CPU");
     return false;
 }
 
@@ -923,7 +926,8 @@ extern "C" IQK_API bool iqk_moe_fused_up_gate(long /*Nx*/, long /*Ny*/, long /*n
         int /*typeA*/, const void * /*Aup*/, const void * /*Agate*/, long /*strideA*/,
         int /*typeB*/, const void * /*B*/, long /*strideB*/,
         float * /*C*/, long /*nb1*/, long /*nb2*/, const void * /*vrow_mapping*/, int /*ith*/, int /*nth*/) {
+    GGML_ABORT("=============================== Unsupported CPU");
     return false;
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index c1f7a8e4e..2d81c9d51 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -4,9 +4,7 @@
 // SPDX-License-Identifier: MIT
 //
 
-#if GGML_USE_IQK_MULMAT
 #include "iqk_mul_mat.h"
-#endif
 #include "ggml-quants.h"
 #include "ggml-impl.h"
 #define GGML_COMMON_IMPL_C
@@ -356,11 +354,9 @@ void ggml_vec_dot_iq1_bn_q8_K64(int n, float * s, size_t bs, const void * vx, si
 
     static_assert(QK_IQ1BN == 64, "This dot product implementation for iq1_bn requires a block size of 64");
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ1_BN, vx, 0, GGML_TYPE_Q8_K64, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     const block_iq1_bn * x = (const block_iq1_bn *)vx;
 
@@ -407,11 +403,9 @@ void vec_dot_iq2_bn_q8_K64(int n, float * s, size_t bs, const void * vx, size_t
 
     static_assert(QK_IQ1BN == 64, "This dot product implementation for iq2_bn requires a block size of 64");
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_BN, vx, 0, GGML_TYPE_Q8_K64, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     constexpr int Nj = QK_IQ1BN/4;
 
@@ -1187,11 +1181,9 @@ void vec_dot_iq2_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_K, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     GGML_ABORT("not implemented");
 
@@ -1492,11 +1484,9 @@ void vec_dot_iq2_ks_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_KS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     const ggml_half * dptr = (const ggml_half *)vx;
     const float d = GGML_FP16_TO_FP32(*dptr);
@@ -1834,11 +1824,9 @@ void vec_dot_iq3_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_K, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     GGML_ABORT("not implemented");
 }
@@ -1883,11 +1871,9 @@ void vec_dot_iq4_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_K, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     const int nb = n / QK_K;
 
@@ -2183,11 +2169,9 @@ void vec_dot_iq5_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ5_K, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     const int nb = n / QK_K;
 
@@ -2524,11 +2508,9 @@ void vec_dot_iq6_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ6_K, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
     GGML_ABORT("not implemented");
 
@@ -3379,11 +3361,9 @@ void dequantize_row_iq4_ks(const block_iq4_ks * x, float * y, int64_t k) {
 
 void  vec_dot_iq4_ks_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
     constexpr int kBlockSize = 32;
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK_K == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -3624,11 +3604,9 @@ void dequantize_row_iq5_ks(const block_iq5_ks * x, float * y, int64_t k) {
 
 void  vec_dot_iq5_ks_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
     constexpr int kBlockSize = 32;
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ5_KS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK_K == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4079,11 +4057,9 @@ void dequantize_row_iq4_kss(const block_iq4_kss * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq4_kss_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KSS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK_K == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4163,11 +4139,9 @@ void dequantize_row_iq4_nl_r4(const block_iq4_nl_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq4_nl_r4_q8_0(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_NL_R4, vx, 0, GGML_TYPE_Q8_0, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4268,11 +4242,9 @@ void dequantize_row_q4_0_r8(const block_iq4_nl_r8 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q4_0_r8_q8_0(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q4_0_R8, vx, 0, GGML_TYPE_Q8_0, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4368,11 +4340,9 @@ void dequantize_row_q8_0_r8(const block_q8_0_r8 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q8_0_r8_q8_0(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q8_0_R8, vx, 0, GGML_TYPE_Q8_0, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4469,11 +4439,9 @@ void dequantize_row_q5_0_r4(const block_q5_0_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q5_0_r4_q8_0(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q5_0_R4, vx, 0, GGML_TYPE_Q8_0, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4566,11 +4534,9 @@ void dequantize_row_q6_0_r4(const block_q6_0_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q6_0_r4_q8_0(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q6_0_R4, vx, 0, GGML_TYPE_Q8_0, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4658,11 +4624,9 @@ void dequantize_row_iq4_xs_r8(const block_iq4_xs_r8 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq4_xs_r8_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_XS_R8, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4759,11 +4723,9 @@ void dequantize_row_iq4_ks_r4(const block_iq4_ks_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq4_ks_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KS_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -4881,11 +4843,9 @@ void dequantize_row_iq2_bn_r4(const block_iq2_bn * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq2_bn_r4_q8_K64(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_BN_R4, vx, 0, GGML_TYPE_Q8_K64, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5002,11 +4962,9 @@ void dequantize_row_q4_k_r4(const block_q4_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q4_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q4_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5120,11 +5078,9 @@ void dequantize_row_q6_k_r4(const block_q6_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q6_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q6_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5240,11 +5196,9 @@ void dequantize_row_q5_k_r4(const block_q5_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q5_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q5_K_R4, vx, 0, GGML_TYPE_Q8_K32, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5376,11 +5330,9 @@ void dequantize_row_q3_k_r4(const block_q3_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q3_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q3_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5491,11 +5443,9 @@ void dequantize_row_q2_k_r4(const block_q2_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q2_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q2_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5605,11 +5555,9 @@ void dequantize_row_iq4_k_r4(const block_iq4_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq4_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5742,11 +5690,9 @@ void dequantize_row_iq5_k_r4(const block_iq5_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq5_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ5_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5862,11 +5808,9 @@ void dequantize_row_iq5_ks_r4(const block_iq5_ks_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq5_ks_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ5_KS_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -5960,11 +5904,9 @@ void dequantize_row_q8_k_r8(const block_q8_k_r8 * x, float * y, int64_t k) {
 }
 
 void vec_dot_q8_k_r8_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q8_K_R8, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6104,11 +6046,9 @@ void dequantize_row_q8_KV_r8(const void * vx, float * y, int64_t k) {
 }
 
 void vec_dot_q8_KV_r8_q8_KV(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q8_KV_R8, vx, 0, GGML_TYPE_Q8_KV, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6277,11 +6217,9 @@ void dequantize_row_iq3_k_r4(const block_iq3_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq3_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6400,11 +6338,9 @@ void dequantize_row_iq2_k_r4(const block_iq2_k_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq2_k_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_K_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6516,11 +6452,9 @@ void dequantize_row_iq2_xxs_r4(const block_iq2_xxs_r4 * x, float * y, int64_t k)
 }
 
 void vec_dot_iq2_xxs_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_XXS_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6603,11 +6537,9 @@ void dequantize_row_iq2_xs_r4(const block_iq2_xs_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq2_xs_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_XS_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6690,11 +6622,9 @@ void dequantize_row_iq2_s_r4(const block_iq2_s_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq2_s_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_S_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6793,11 +6723,9 @@ void dequantize_row_iq3_xxs_r4(const block_iq3_xxs_r4 * x, float * y, int64_t k)
 }
 
 void vec_dot_iq3_xxs_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_XXS_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -6894,11 +6822,9 @@ void dequantize_row_iq3_s_r4(const block_iq3_s_r4 * x, float * y, int64_t k) {
 }
 
 void vec_dot_iq3_s_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_S_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -7025,11 +6951,9 @@ void dequantize_row_iq1_s_r4(const block_iq1_s_r4  * x, float * y, int64_t n) {
 }
 
 void vec_dot_iq1_s_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ1_S_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -7171,11 +7095,9 @@ void dequantize_row_iq1_m_r4(const block_iq1_m_r4  * x, float * y, int64_t n) {
 }
 
 void vec_dot_iq1_m_r4_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ1_M_R4, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -7211,11 +7133,9 @@ void dequantize_row_q8_KV(const void * x, float * y, int64_t k) {
 }
 
 void vec_dot_q8_KV_q8_KV(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_Q8_KV, vx, 0, GGML_TYPE_Q8_KV, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
     GGML_ASSERT(n%QK4_NL == 0);
     GGML_ASSERT(nrc == 1);
     GGML_UNUSED(bs);
@@ -8274,11 +8194,9 @@ void vec_dot_iq2_kt_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
 }
 
@@ -8536,11 +8454,9 @@ void vec_dot_iq3_kt_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
 }
 
@@ -8787,10 +8703,8 @@ void vec_dot_iq4_kt_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx
     GGML_UNUSED(by);
     GGML_UNUSED(bs);
 
-#if GGML_USE_IQK_MULMAT
     if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
         return;
     }
-#endif
 
 }