From a1b10181169648c918b00f0ac52ba79326246ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 31 May 2025 08:59:25 +0200 Subject: [PATCH 01/11] add multiple classifier outputs and labels support --- src/llama-context.cpp | 7 ++++--- src/llama-model.cpp | 46 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index e153351af3809..6940440ecf380 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -808,16 +808,17 @@ int llama_context::encode(llama_batch & inp_batch) { } break; case LLAMA_POOLING_TYPE_RANK: { - // extract the rerank score - a single float per sequence + // extract the rerank score - n_cls_out floats per sequence auto & embd_seq_out = embd_seq; + const uint32_t n_cls_out = hparams.n_cls_out; for (uint32_t s = 0; s < ubatch.n_seqs; ++s) { const llama_seq_id seq_id = ubatch.seq_id[s][0]; if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { continue; } - embd_seq_out[seq_id].resize(1); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (seq_id)*sizeof(float), sizeof(float)); + embd_seq_out[seq_id].resize(n_cls_out); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_cls_out*seq_id)*sizeof(float), n_cls_out*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_UNSPECIFIED: diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 3f1f6c9bf3b06..08e178aa55f7c 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -417,18 +417,41 @@ void llama_model::load_arch(llama_model_loader & ml) { } } +struct LLM_KV_MATCH_WITHOUT_ARCH { + const LLM_KV kv_arch = LLM_KV(LLM_ARCH_UNKNOWN); + const std::string kv_arch_prefix = llm_arch_name(LLM_ARCH_UNKNOWN); + + bool operator()(const llm_kv & kv, const std::string & kv_name) const + { + std::string kv_match = kv_arch(kv); + auto kv_arch_pos = kv_match.find(kv_arch_prefix); + + return kv_name.find(kv_match.substr(kv_arch_pos == std::string::npos ? 0 : kv_arch_pos + kv_arch_prefix.size())) != std::string::npos; + } +}; + void llama_model::load_hparams(llama_model_loader & ml) { const gguf_context * ctx = ml.meta.get(); // get metadata as string for (int i = 0; i < gguf_get_n_kv(ctx); i++) { + const char * name = gguf_get_key(ctx, i); gguf_type type = gguf_get_kv_type(ctx, i); + if (type == GGUF_TYPE_ARRAY) { - continue; + if (LLM_KV_MATCH_WITHOUT_ARCH()(LLM_KV_CLASSIFIER_OUTPUT_LABELS, name)) { + const size_t n_items = gguf_get_arr_n(ctx, i); + + for (size_t j = 0; j < n_items; j++) { + const std::string name_i = format("%s.%zu", name, j); + const std::string value = gguf_get_arr_str(ctx, i, j); + gguf_kv.emplace(name_i, value); + } + } + } else { + const std::string value = gguf_kv_to_str(ctx, i); + gguf_kv.emplace(name, value); } - const char * name = gguf_get_key(ctx, i); - const std::string value = gguf_kv_to_str(ctx, i); - gguf_kv.emplace(name, value); } // get general kv @@ -13593,6 +13616,21 @@ int32_t llama_model_n_head_kv(const llama_model * model) { return model->hparams.n_head_kv(); } +uint32_t llama_model_n_cls_out(const struct llama_model * model) { + return model->hparams.n_cls_out; +} + +const char * llama_model_get_classifier_label_by_index(const struct llama_model * model, uint32_t i) { + const std::string key = format("%s.%u", LLM_KV(model->arch)(LLM_KV_CLASSIFIER_OUTPUT_LABELS).c_str(), i); + const auto & it = model->gguf_kv.find(key); + + if (it != model->gguf_kv.end()) { + return it->second.c_str(); + } + + return nullptr; +} + // deprecated int32_t llama_n_ctx_train(const llama_model * model) { return llama_model_n_ctx_train(model); From 6ef43bab4783c9ec84a6ba78a156f667712ecae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 31 May 2025 09:01:37 +0200 Subject: [PATCH 02/11] make public --- include/llama.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/llama.h b/include/llama.h index 01762bea2bf96..06ef86542453b 100644 --- a/include/llama.h +++ b/include/llama.h @@ -506,6 +506,12 @@ extern "C" { // Get the model's RoPE frequency scaling factor LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model); + // Returns the number of classifier outputs (only valid for classifier models) + LLAMA_API uint32_t llama_model_n_cls_out(const struct llama_model * model); + + // Returns label of classifier output by index ( Date: Sat, 31 May 2025 09:04:02 +0200 Subject: [PATCH 03/11] show multiple rankings and associated labels ggml-ci --- examples/embedding/embedding.cpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 71f700877a3b9..518289435980a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,6 +4,7 @@ #include "llama.h" #include +#include #include #if defined(_MSC_VER) @@ -236,9 +237,24 @@ int main(int argc, char ** argv) { LOG("\n"); } } else if (pooling_type == LLAMA_POOLING_TYPE_RANK) { + const uint32_t n_cls_out = llama_model_n_cls_out(model); + std::vector cls_out_labels; + + for (uint32_t i = 0; i < n_cls_out; i++) { + const char * label = llama_model_get_classifier_label_by_index(model, i); + const std::string label_i = label == nullptr || strlen(label) == 0 ? std::to_string(i) : label; + cls_out_labels.emplace_back(label_i); + } + for (int j = 0; j < n_embd_count; j++) { - // NOTE: if you change this log - update the tests in ci/run.sh - LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]); + for (uint32_t i = 0; i < n_cls_out; i++) { + // NOTE: if you change this log - update the tests in ci/run.sh + if (n_cls_out == 1) { + LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]); + } else { + LOG("rerank score %d: %8.3f [%s]\n", j, emb[j * n_embd + i], cls_out_labels[i].c_str()); + } + } } } else { // print the first part of the embeddings or for a single prompt, the full embedding From 38ece05b9a1631d93eb7afefdb8e7e548591fd41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 1 Jun 2025 22:29:14 +0200 Subject: [PATCH 04/11] move labels to llama_model --- examples/embedding/embedding.cpp | 2 +- include/llama.h | 2 +- src/llama-model.cpp | 27 +++++---------------------- src/llama-model.h | 3 +++ 4 files changed, 10 insertions(+), 24 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 518289435980a..50bafabc8bd9f 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -241,7 +241,7 @@ int main(int argc, char ** argv) { std::vector cls_out_labels; for (uint32_t i = 0; i < n_cls_out; i++) { - const char * label = llama_model_get_classifier_label_by_index(model, i); + const char * label = llama_model_cls_label(model, i); const std::string label_i = label == nullptr || strlen(label) == 0 ? std::to_string(i) : label; cls_out_labels.emplace_back(label_i); } diff --git a/include/llama.h b/include/llama.h index 06ef86542453b..c46e1bb744591 100644 --- a/include/llama.h +++ b/include/llama.h @@ -510,7 +510,7 @@ extern "C" { LLAMA_API uint32_t llama_model_n_cls_out(const struct llama_model * model); // Returns label of classifier output by index (hparams.n_cls_out; } -const char * llama_model_get_classifier_label_by_index(const struct llama_model * model, uint32_t i) { - const std::string key = format("%s.%u", LLM_KV(model->arch)(LLM_KV_CLASSIFIER_OUTPUT_LABELS).c_str(), i); - const auto & it = model->gguf_kv.find(key); - - if (it != model->gguf_kv.end()) { - return it->second.c_str(); +const char * llama_model_cls_label(const struct llama_model * model, uint32_t i) { + if (i < model->classifier_labels.size()) { + return model->classifier_labels[i].c_str(); } return nullptr; diff --git a/src/llama-model.h b/src/llama-model.h index cbea2cb331b62..4f0b5c36fb9b9 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -363,6 +363,9 @@ struct llama_model { // for quantize-stats only std::vector> tensors_by_name; + // for classifier models + std::vector classifier_labels; + int64_t t_load_us = 0; int64_t t_start_us = 0; From 3a52f4c7155fa72c1641097c4dda9d3c74d99f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 2 Jun 2025 09:58:05 +0200 Subject: [PATCH 05/11] update n_cls_out for any arch with labels --- src/llama-model.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index f620abb3773f9..964dcd24753f5 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -552,6 +552,11 @@ void llama_model::load_hparams(llama_model_loader & ml) { uint32_t n_vocab = 0; ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false); + // for classifier models + if (!classifier_labels.empty()) { + hparams.n_cls_out = classifier_labels.size(); + } + // arch-specific KVs switch (arch) { case LLM_ARCH_LLAMA: @@ -695,7 +700,6 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false); - ml.get_arr_n(LLM_KV_CLASSIFIER_OUTPUT_LABELS, hparams.n_cls_out, false); switch (hparams.n_layer) { case 3: From 41049e6c539253b0b9d9a2991185fb35e678a0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 2 Jun 2025 12:17:56 +0200 Subject: [PATCH 06/11] be more specific about behaviour --- include/llama.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/llama.h b/include/llama.h index ba97619c1b882..6f6b19f3dfdc6 100644 --- a/include/llama.h +++ b/include/llama.h @@ -510,6 +510,7 @@ extern "C" { LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model); // Returns the number of classifier outputs (only valid for classifier models) + // Undefined behavior for non-classifier models LLAMA_API uint32_t llama_model_n_cls_out(const struct llama_model * model); // Returns label of classifier output by index ( Date: Mon, 2 Jun 2025 23:04:31 +0200 Subject: [PATCH 07/11] move string array functionality into model-loader --- src/llama-model-loader.cpp | 59 +++++++++++++++++++++++++++----------- src/llama-model.cpp | 18 ++++-------- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index ddb1b03675b28..5214727057bd0 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -288,9 +288,10 @@ namespace GGUFMeta { template bool llama_model_loader::get_arr(const std::string & key, std::vector & result, bool required) { - const int kid = gguf_find_key(meta.get(), key.c_str()); + const gguf_context * ctx = meta.get(); + const int kid = gguf_find_key(ctx, key.c_str()); - if (kid < 0 || gguf_get_kv_type(meta.get(), kid) != GGUF_TYPE_ARRAY) { + if (kid < 0 || gguf_get_kv_type(ctx, kid) != GGUF_TYPE_ARRAY) { if (required) { throw std::runtime_error(format("array key not found in model: %s", key.c_str())); } @@ -298,28 +299,40 @@ namespace GGUFMeta { } struct GGUFMeta::ArrayInfo arr_info = - GGUFMeta::GKV::get_kv(meta.get(), kid); + GGUFMeta::GKV::get_kv(ctx, kid); switch (arr_info.gt) { case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same::value) || - (std::is_same::value)); break; - case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same::value)); break; + case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same::value) || + (std::is_same::value)); break; + case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same::value)); break; + case GGUF_TYPE_STRING: GGML_ASSERT((std::is_same::value)); break; default: - throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str())); + throw std::runtime_error(format("%s is not a string/float32/uint32/int32 array", key.c_str())); } - result.resize(arr_info.length); - result.assign((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length); + if constexpr (std::is_same::value) { + const size_t n_items = gguf_get_arr_n(ctx, kid); + result.clear(); + + for (size_t i = 0; i < n_items; i++) { + const T value = gguf_get_arr_str(ctx, kid, i); + result.emplace_back(value); + } + } else { + result.resize(arr_info.length); + result.assign((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length); + } return true; } template bool llama_model_loader::get_arr(const std::string & key, std::array & result, bool required) { - const int kid = gguf_find_key(meta.get(), key.c_str()); + const gguf_context * ctx = meta.get(); + const int kid = gguf_find_key(ctx, key.c_str()); - if (kid < 0 || gguf_get_kv_type(meta.get(), kid) != GGUF_TYPE_ARRAY) { + if (kid < 0 || gguf_get_kv_type(ctx, kid) != GGUF_TYPE_ARRAY) { if (required) { throw std::runtime_error(format("array key not found in model: %s", key.c_str())); } @@ -327,22 +340,32 @@ namespace GGUFMeta { } struct GGUFMeta::ArrayInfo arr_info = - GGUFMeta::GKV::get_kv(meta.get(), kid); + GGUFMeta::GKV::get_kv(ctx, kid); switch (arr_info.gt) { case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same::value) || - (std::is_same::value)); break; - case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same::value)); break; + case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same::value) || + (std::is_same::value)); break; + case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same::value)); break; + case GGUF_TYPE_STRING: GGML_ASSERT((std::is_same::value)); break; default: - throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str())); + throw std::runtime_error(format("%s is not a string/float32/uint32/int32 array", key.c_str())); } if (arr_info.length > N_MAX) { throw std::runtime_error(format("array length %u for key %s exceeds max %u", (uint32_t) arr_info.length, key.c_str(), (uint32_t) N_MAX)); } - std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin()); + if constexpr (std::is_same::value) { + const size_t n_items = gguf_get_arr_n(meta.get(), kid); + + for (size_t i = 0; i < n_items; i++) { + const T value = gguf_get_arr_str(meta.get(), kid, i); + result[i] = value; + } + } else { + std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin()); + } return true; } @@ -352,6 +375,8 @@ namespace GGUFMeta { return get_arr(llm_kv(kid), result, required); } + template bool llama_model_loader::get_arr>(enum llm_kv kid, std::vector & result, bool required); + template bool llama_model_loader::get_key(const std::string & key, T & result, bool required) { auto it = kv_overrides.find(key); diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 964dcd24753f5..35a39496bccfb 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -425,22 +425,13 @@ void llama_model::load_hparams(llama_model_loader & ml) { // get metadata as string for (int i = 0; i < gguf_get_n_kv(ctx); i++) { - const char * name = gguf_get_key(ctx, i); gguf_type type = gguf_get_kv_type(ctx, i); - if (type == GGUF_TYPE_ARRAY) { - if (LLM_KV(arch)(LLM_KV_CLASSIFIER_OUTPUT_LABELS) == name) { - const size_t n_items = gguf_get_arr_n(ctx, i); - - for (size_t j = 0; j < n_items; j++) { - const std::string value = gguf_get_arr_str(ctx, i, j); - classifier_labels.emplace_back(value); - } - } - } else { - const std::string value = gguf_kv_to_str(ctx, i); - gguf_kv.emplace(name, value); + continue; } + const char * name = gguf_get_key(ctx, i); + const std::string value = gguf_kv_to_str(ctx, i); + gguf_kv.emplace(name, value); } // get general kv @@ -553,6 +544,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false); // for classifier models + ml.get_arr(LLM_KV_CLASSIFIER_OUTPUT_LABELS, classifier_labels, false); if (!classifier_labels.empty()) { hparams.n_cls_out = classifier_labels.size(); } From 9aa5d730e57a63cf1784195ec7c9a297108830ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 2 Jun 2025 23:27:09 +0200 Subject: [PATCH 08/11] forgotten variable replacements --- src/llama-model-loader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 5214727057bd0..bd9e6da8832b7 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -357,10 +357,10 @@ namespace GGUFMeta { } if constexpr (std::is_same::value) { - const size_t n_items = gguf_get_arr_n(meta.get(), kid); + const size_t n_items = gguf_get_arr_n(ctx, kid); for (size_t i = 0; i < n_items; i++) { - const T value = gguf_get_arr_str(meta.get(), kid, i); + const T value = gguf_get_arr_str(ctx, kid, i); result[i] = value; } } else { From 6bba8ed38f09cf16e0539f633b2d74806df22269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 3 Jun 2025 09:54:27 +0200 Subject: [PATCH 09/11] improved comment [no ci] --- include/llama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llama.h b/include/llama.h index 6f6b19f3dfdc6..a4263841423da 100644 --- a/include/llama.h +++ b/include/llama.h @@ -923,7 +923,7 @@ extern "C" { // Get the embeddings for a sequence id // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE - // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[i] with the rank(s) of the sequence + // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[n_cls_out] with the rank(s) of the sequence // otherwise: float[n_embd] (1-dimensional) LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id); From 7443156a9141f255c451a313ece957d20ee53478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 5 Jun 2025 17:16:04 +0200 Subject: [PATCH 10/11] logging and minor changes --- examples/embedding/embedding.cpp | 4 ++-- src/llama-model.cpp | 9 +++++++++ src/llama-model.h | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 50bafabc8bd9f..f368716d519fe 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -242,8 +242,8 @@ int main(int argc, char ** argv) { for (uint32_t i = 0; i < n_cls_out; i++) { const char * label = llama_model_cls_label(model, i); - const std::string label_i = label == nullptr || strlen(label) == 0 ? std::to_string(i) : label; - cls_out_labels.emplace_back(label_i); + const std::string label_i(label == nullptr ? "" : label); + cls_out_labels.emplace_back(label_i.empty() ? std::to_string(i) : label_i); } for (int j = 0; j < n_embd_count; j++) { diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 35a39496bccfb..eb97019a0f928 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -4361,6 +4361,15 @@ void llama_model::print_info() const { LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state); LLAMA_LOG_INFO("%s: ssm_dt_rank = %u\n", __func__, hparams.ssm_dt_rank); LLAMA_LOG_INFO("%s: ssm_dt_b_c_rms = %d\n", __func__, hparams.ssm_dt_b_c_rms); + + if (!classifier_labels.empty()) { + LLAMA_LOG_INFO("%s: n_cls_out = %u\n", __func__, hparams.n_cls_out); + + size_t i = 0; + for (auto label : classifier_labels) { + LLAMA_LOG_INFO("%s: cls_label[%2zu] = %s\n", __func__, i++, label.c_str()); + } + } } LLAMA_LOG_INFO("%s: model type = %s\n", __func__, type_name().c_str()); diff --git a/src/llama-model.h b/src/llama-model.h index 4f0b5c36fb9b9..18b714620bbcf 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -329,6 +329,9 @@ struct llama_model { llama_hparams hparams = {}; llama_vocab vocab; + // for classifier models + std::vector classifier_labels; + struct ggml_tensor * tok_embd = nullptr; struct ggml_tensor * type_embd = nullptr; struct ggml_tensor * pos_embd = nullptr; @@ -363,9 +366,6 @@ struct llama_model { // for quantize-stats only std::vector> tensors_by_name; - // for classifier models - std::vector classifier_labels; - int64_t t_load_us = 0; int64_t t_start_us = 0; From 67c4cd21d5a2de94c8f04b985d7e55c2999a32fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 5 Jun 2025 17:23:05 +0200 Subject: [PATCH 11/11] forgot to remove cstring --- examples/embedding/embedding.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index f368716d519fe..8bef7f8f6ba25 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,7 +4,6 @@ #include "llama.h" #include -#include #include #if defined(_MSC_VER)