nicoboss
diff --git a/‎ci/run.sh‎
Lines changed: 1 addition & 1 deletion b/‎ci/run.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions b/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎common/common.h‎
Lines changed: 1 addition & 0 deletions b/‎common/common.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 0 additions & 14 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎examples/embedding/embedding.cpp‎
Lines changed: 30 additions & 4 deletions b/‎examples/embedding/embedding.cpp‎
Lines changed: 30 additions & 4 deletions
diff --git a/‎ggml/src/CMakeLists.txt‎
Lines changed: 17 additions & 0 deletions b/‎ggml/src/CMakeLists.txt‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cpu/CMakeLists.txt‎
Lines changed: 21 additions & 0 deletions b/‎ggml/src/ggml-cpu/CMakeLists.txt‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp‎
Lines changed: 82 additions & 0 deletions b/‎ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cpu/repack.cpp‎
Lines changed: 15 additions & 14 deletions b/‎ggml/src/ggml-cpu/repack.cpp‎
Lines changed: 15 additions & 14 deletions
diff --git a/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 2 additions & 16 deletions b/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 2 additions & 16 deletions
@@ -779,7 +779,7 @@ function gg_run_rerank_tiny {
     model_f16="${path_models}/ggml-model-f16.gguf"
 
     # for this model, the SEP token is "</s>"
-    (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
+    (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
 
     # sample output
     # rerank score 0:    0.029
 
@@ -2706,6 +2706,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.embd_sep = value;
         }
     ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
+    add_opt(common_arg(
+        {"--cls-separator"}, "STRING",
+        "separator of classification sequences (default \\t) for example \"<#seq#>\"",
+        [](common_params & params, const std::string & value) {
+            params.cls_sep = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
     add_opt(common_arg(
         {"--host"}, "HOST",
         string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()),
 
@@ -358,6 +358,7 @@ struct common_params {
     int32_t embd_normalize = 2;     // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
     std::string embd_out   = "";    // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
     std::string embd_sep   = "\n";  // separator of embeddings
+    std::string cls_sep    = "\t";  // separator of classification sequences
 
     // server params
     int32_t port           = 8080;         // server listens on this network port
 
@@ -2145,7 +2145,6 @@ def __init__(self, *args, **kwargs):
 
     def set_vocab(self):
         self._set_vocab_gpt2()
-        self.gguf_writer.add_add_bos_token(True)
 
     def set_gguf_parameters(self):
         super().set_gguf_parameters()
@@ -3918,9 +3917,6 @@ def _xlmroberta_set_vocab(self) -> None:
         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
         special_vocab.add_to_gguf(self.gguf_writer)
 
-        self.gguf_writer.add_add_bos_token(True)
-        self.gguf_writer.add_add_eos_token(True)
-
 
 @ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
 class DistilBertModel(BertModel):
@@ -3962,8 +3958,6 @@ def set_vocab(self):
         bpe_tok_path = self.dir_model / "tokenizer.json"
         if bpe_tok_path.exists():
             self._set_vocab_gpt2()
-            self.gguf_writer.add_add_bos_token(True)
-            self.gguf_writer.add_add_eos_token(True)
 
             # we need this to validate the size of the token_type embeddings
             # though currently we are passing all zeros to the token_type embeddings
@@ -4848,8 +4842,6 @@ def set_vocab(self):
             self.gguf_writer.add_token_type_count(2)
         else:
             raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
-        self.gguf_writer.add_add_bos_token(True)
-        self.gguf_writer.add_add_eos_token(True)
 
 
 @ModelBase.register("OpenELMForCausalLM")
@@ -5451,9 +5443,6 @@ def set_vocab(self):
         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
         special_vocab.add_to_gguf(self.gguf_writer)
 
-        self.gguf_writer.add_add_bos_token(False)
-        self.gguf_writer.add_add_eos_token(True)
-
     def set_gguf_parameters(self):
         if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
             logger.warning("Couldn't find context length in config.json, assuming default value of 512")
@@ -5591,9 +5580,6 @@ def set_vocab(self):
         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
         special_vocab.add_to_gguf(self.gguf_writer)
 
-        self.gguf_writer.add_add_bos_token(False)
-        self.gguf_writer.add_add_eos_token(True)
-
     def set_gguf_parameters(self):
         if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
             logger.warning("Couldn't find context length in config.json, assuming default value of 512")
 
@@ -133,10 +133,36 @@ int main(int argc, char ** argv) {
     // max batch size
     const uint64_t n_batch = params.n_batch;
 
+    // get added sep and eos token, if any
+    const std::string added_sep_token = llama_vocab_get_add_sep(vocab) ? llama_vocab_get_text(vocab, llama_vocab_sep(vocab)) : "";
+    const std::string added_eos_token = llama_vocab_get_add_eos(vocab) ? llama_vocab_get_text(vocab, llama_vocab_eos(vocab)) : "";
+
     // tokenize the prompts and trim
     std::vector<std::vector<int32_t>> inputs;
     for (const auto & prompt : prompts) {
-        auto inp = common_tokenize(ctx, prompt, true, true);
+        std::vector<llama_token> inp;
+
+        // split classification pairs and insert expected separator tokens
+        if (pooling_type == LLAMA_POOLING_TYPE_RANK && prompt.find(params.cls_sep) != std::string::npos) {
+            std::vector<std::string> pairs = split_lines(prompt, params.cls_sep);
+            std::string final_prompt;
+
+            for (size_t i = 0; i < pairs.size(); i++) {
+                final_prompt += pairs[i];
+                if (i != pairs.size() - 1) {
+                    if (!added_eos_token.empty()) {
+                        final_prompt += added_eos_token;
+                    }
+                    if (!added_sep_token.empty()) {
+                        final_prompt += added_sep_token;
+                    }
+                }
+            }
+
+            inp = common_tokenize(ctx, final_prompt, true, true);
+        } else {
+            inp = common_tokenize(ctx, prompt, true, true);
+        }
         if (inp.size() > n_batch) {
             LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
                     __func__, (long long int) inp.size(), (long long int) n_batch);
@@ -145,11 +171,11 @@ int main(int argc, char ** argv) {
         inputs.push_back(inp);
     }
 
-    // check if the last token is SEP
+    // check if the last token is SEP/EOS
     // it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true'
     for (auto & inp : inputs) {
-        if (inp.empty() || inp.back() != llama_vocab_sep(vocab)) {
-            LOG_WRN("%s: last token in the prompt is not SEP\n", __func__);
+        if (inp.empty() || (inp.back() != llama_vocab_sep(vocab) && inp.back() != llama_vocab_eos(vocab))) {
+            LOG_WRN("%s: last token in the prompt is not SEP or EOS\n", __func__);
             LOG_WRN("%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__);
         }
     }
 
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
         foreach (feat ${ARGN})
             set(GGML_INTERNAL_${feat} ON)
         endforeach()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+        foreach (feat ${ARGN})
+            set(GGML_INTERNAL_${feat} ON)
+        endforeach()
     endif()
 
     ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
         else()
             message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
         endif()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+        if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+            ggml_add_cpu_backend_variant(power0)
+            ggml_add_cpu_backend_variant(power7_1       POWER7)
+            ggml_add_cpu_backend_variant(power7_2       POWER7  VSX)
+            ggml_add_cpu_backend_variant(power8_1       POWER8)
+            ggml_add_cpu_backend_variant(power8_2       POWER8  VSX)
+            ggml_add_cpu_backend_variant(power9         POWER9  VSX)
+            ggml_add_cpu_backend_variant(power10        POWER10 VSX)
+            ggml_add_cpu_backend_variant(power11        POWER11 VSX)
+        else()
+            message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
+        endif()
     else()
         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
     endif()
 
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
             else()
                 list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
             endif()
+        elseif(GGML_CPU_ALL_VARIANTS)
+            # Begin with the lowest baseline
+            set(ARCH_DEFINITIONS "")
+
+            # When a feature is selected, bump the MCPU to the first
+            # version that supported it
+            foreach(PVER RANGE 7 11)
+                if(DEFINED GGML_INTERNAL_POWER${PVER})
+                    set(POWERPC_MCPU "power${PVER}")
+                    list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
+                endif()
+            endforeach()
+            if (GGML_INTERNAL_VSX)
+                list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
+                list(APPEND ARCH_FLAGS -mvsx)
+            endif()
+
+            if (DEFINED POWERPC_MCPU)
+                list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
+            endif()
+            ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
         else()
             if (GGML_CPU_POWERPC_CPUTYPE)
                 list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
 
@@ -0,0 +1,82 @@
+# include "ggml-backend-impl.h"
+
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
+
+#if defined(__linux__)
+#include <sys/auxv.h>
+#endif
+
+#include <string>
+
+struct powerpc_features {
+    std::string platform = "";
+    int power_version    = -1;
+
+    bool has_vsx         = false;
+
+    powerpc_features() {
+#if defined(__linux__)
+        unsigned long auxval = getauxval(AT_PLATFORM);
+        if (auxval) {
+            platform = std::string(reinterpret_cast<const char*>(auxval));
+            // TBD: Do systems exist that return this in uppercase?
+            if (platform.substr(0, 5) == "power") {
+                // Extractt a numeric suffix, if one exists
+                int vpos = -1;
+                for (int i = platform.length() - 1; i >= 0; i--) {
+                    if (std::isdigit(platform[i])) {
+                        vpos = i;
+                    } else {
+                        break;
+                    }
+                }
+                if (vpos > -1) {
+                    power_version = std::stoi(platform.substr(vpos));
+                }
+            }
+        }
+#endif
+        if (power_version >= 9) {
+            has_vsx = true;
+        }
+    }
+};
+
+static int ggml_backend_cpu_powerpc_score() {
+    int score = 1;
+    powerpc_features pf;
+
+// Platform scores
+#if defined(GGML_USE_POWER7)
+    if (pf.power_version < 7) { return 0; }
+    score += 1<<1;
+#endif
+#if defined(GGML_USE_POWER8)
+    if (pf.power_version < 8) { return 0; }
+    score += 1<<2;
+#endif
+#if defined(GGML_USE_POWER9)
+    if (pf.power_version < 9) { return 0; }
+    score += 1<<3;
+#endif
+#if defined(GGML_USE_POWER10)
+    if (pf.power_version < 10) { return 0; }
+    score += 1<<4;
+#endif
+#if defined(GGML_USE_POWER11)
+    if (pf.power_version < 11) { return 0; }
+    score += 1<<5;
+#endif
+
+// Feature scores
+#if defined(GGML_USE_VSX)
+    if (!pf.has_vsx) { return 0; }
+    score += 1<<6;
+#endif
+
+    return score;
+}
+
+GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
+
+#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
     }
 };
 
-// instance for Q4
-static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
-static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
-static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
-static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
-
-// instance for IQ4
-static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
-
 }  // namespace ggml::cpu::repack
 
 static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
+
+    // instance for Q4
+    static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
+    static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
+    static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
+    static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
+
+    // instance for IQ4
+    static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
+
     if (cur->type == GGML_TYPE_Q4_0) {
         if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
             if (cur->ne[1] % 8 == 0) {
-                return &ggml::cpu::repack::q4_0_8x8_q8_0;
+                return &q4_0_8x8_q8_0;
             }
         }
         if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
             if (cur->ne[1] % 4 == 0) {
-                return &ggml::cpu::repack::q4_0_4x8_q8_0;
+                return &q4_0_4x8_q8_0;
             }
         }
         if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
             if (cur->ne[1] % 4 == 0) {
-                return &ggml::cpu::repack::q4_0_4x4_q8_0;
+                return &q4_0_4x4_q8_0;
             }
         }
     } else if (cur->type == GGML_TYPE_Q4_K) {
         if (ggml_cpu_has_avx2()) {
             if (cur->ne[1] % 8 == 0) {
-                return &ggml::cpu::repack::q4_K_8x8_q8_K;
+                return &q4_K_8x8_q8_K;
             }
         }
     } else if (cur->type == GGML_TYPE_IQ4_NL) {
         if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
             if (cur->ne[1] % 4 == 0) {
-                return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
+                return &iq4_nl_4x4_q8_0;
             }
         }
     }
 
@@ -19,10 +19,10 @@
 #endif
 #include "ggml-common.h"
 
-#include <cstdio>
 #include <array>
 #include <cassert>
 #include <cfloat>
+#include <cstdio>
 #include <string>
 #include <vector>
 
@@ -767,21 +767,7 @@ struct ggml_backend_cuda_context {
         name(GGML_CUDA_NAME + std::to_string(device)) {
     }
 
-    ~ggml_backend_cuda_context() {
-        if (copy_event != nullptr) {
-            CUDA_CHECK(cudaEventDestroy(copy_event));
-        }
-        for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) {
-            for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) {
-                if (streams[i][j] != nullptr) {
-                    CUDA_CHECK(cudaStreamDestroy(streams[i][j]));
-                }
-            }
-            if (cublas_handles[i] != nullptr) {
-                CUBLAS_CHECK(cublasDestroy(cublas_handles[i]));
-            }
-        }
-    }
+    ~ggml_backend_cuda_context();
 
     cudaStream_t stream(int device, int stream) {
         if (streams[device][stream] == nullptr) {
Original file line number	Diff line number	Diff line change
`@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR`
`1411`	`1411`	`}`
`1412`	`1412`	`};`
`1413`	`1413`
`1414`		`-// instance for Q4`
`1415`		`-static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;`
`1416`		`-static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;`
`1417`		`-static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;`
`1418`		`-static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;`
`1419`		`-`
`1420`		`-// instance for IQ4`
`1421`		`-static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;`
`1422`		`-`
`1423`	`1414`	`} // namespace ggml::cpu::repack`
`1424`	`1415`
`1425`	`1416`	`static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {`
	`1417`	`+`
	`1418`	`+ // instance for Q4`
	`1419`	`+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;`
	`1420`	`+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;`
	`1421`	`+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;`
	`1422`	`+ static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;`
	`1423`	`+`
	`1424`	`+ // instance for IQ4`
	`1425`	`+ static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;`
	`1426`	`+`
`1426`	`1427`	`if (cur->type == GGML_TYPE_Q4_0) {`
`1427`	`1428`	`if (ggml_cpu_has_avx2() \|\| (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {`
`1428`	`1429`	`if (cur->ne[1] % 8 == 0) {`
`1429`		`- return &ggml::cpu::repack::q4_0_8x8_q8_0;`
	`1430`	`+ return &q4_0_8x8_q8_0;`
`1430`	`1431`	`}`
`1431`	`1432`	`}`
`1432`	`1433`	`if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {`
`1433`	`1434`	`if (cur->ne[1] % 4 == 0) {`
`1434`		`- return &ggml::cpu::repack::q4_0_4x8_q8_0;`
	`1435`	`+ return &q4_0_4x8_q8_0;`
`1435`	`1436`	`}`
`1436`	`1437`	`}`
`1437`	`1438`	`if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {`
`1438`	`1439`	`if (cur->ne[1] % 4 == 0) {`
`1439`		`- return &ggml::cpu::repack::q4_0_4x4_q8_0;`
	`1440`	`+ return &q4_0_4x4_q8_0;`
`1440`	`1441`	`}`
`1441`	`1442`	`}`
`1442`	`1443`	`} else if (cur->type == GGML_TYPE_Q4_K) {`
`1443`	`1444`	`if (ggml_cpu_has_avx2()) {`
`1444`	`1445`	`if (cur->ne[1] % 8 == 0) {`
`1445`		`- return &ggml::cpu::repack::q4_K_8x8_q8_K;`
	`1446`	`+ return &q4_K_8x8_q8_K;`
`1446`	`1447`	`}`
`1447`	`1448`	`}`
`1448`	`1449`	`} else if (cur->type == GGML_TYPE_IQ4_NL) {`
`1449`	`1450`	`if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {`
`1450`	`1451`	`if (cur->ne[1] % 4 == 0) {`
`1451`		`- return &ggml::cpu::repack::iq4_nl_4x4_q8_0;`
	`1452`	`+ return &iq4_nl_4x4_q8_0;`
`1452`	`1453`	`}`
`1453`	`1454`	`}`
`1454`	`1455`	`}`