Skip to content

Commit 8075582

Browse files
committed
cont : avoid embeddings_org
ggml-ci
1 parent aadc68b commit 8075582

File tree

4 files changed

+1
-16
lines changed

4 files changed

+1
-16
lines changed

include/llama.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,6 @@ extern "C" {
965965
LLAMA_API int32_t llama_n_threads_batch(struct llama_context * ctx);
966966

967967
// Set whether the context outputs embeddings or not
968-
// Note: set to true only if the context was created with llama_context_params.embeddings = true
969968
LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
970969

971970
// Set whether to use causal attention or not

src/llama-context.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ llama_context::llama_context(
4141
cparams.yarn_beta_slow = params.yarn_beta_slow;
4242
cparams.defrag_thold = params.defrag_thold;
4343
cparams.embeddings = params.embeddings;
44-
cparams.embeddings_org = params.embeddings;
4544
cparams.offload_kqv = params.offload_kqv;
4645
cparams.flash_attn = params.flash_attn;
4746
cparams.no_perf = params.no_perf;
@@ -82,12 +81,6 @@ llama_context::llama_context(
8281
}
8382
}
8483

85-
if (!cparams.embeddings && cparams.pooling_type != LLAMA_POOLING_TYPE_NONE) {
86-
LLAMA_LOG_WARN("%s: pooling_type is set to %d but embeddings is set to false - disabling pooling\n", __func__, cparams.pooling_type);
87-
88-
cparams.pooling_type = LLAMA_POOLING_TYPE_NONE;
89-
}
90-
9184
if (params.attention_type == LLAMA_ATTENTION_TYPE_UNSPECIFIED) {
9285
cparams.causal_attn = hparams.causal_attn;
9386
} else {
@@ -630,12 +623,6 @@ void llama_context::set_abort_callback(bool (*abort_callback)(void * data), void
630623
}
631624

632625
void llama_context::set_embeddings(bool value) {
633-
if (value && !cparams.embeddings_org) {
634-
LLAMA_LOG_ERROR("%s: cannot enable embeddings for this context (%s)\n",
635-
__func__, "https://github.com/ggml-org/llama.cpp/pull/14208");
636-
return;
637-
}
638-
639626
LLAMA_LOG_DEBUG("%s: value = %d\n", __func__, value);
640627

641628
cparams.embeddings = value;

src/llama-cparams.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ struct llama_cparams {
2727
float defrag_thold;
2828

2929
bool embeddings;
30-
bool embeddings_org;
3130
bool causal_attn;
3231
bool offload_kqv;
3332
bool flash_attn;

tools/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3389,7 +3389,7 @@ struct server_context {
33893389

33903390
// pad the batch so that batch.n_tokens >= n_slots
33913391
// TODO: temporary workaround for https://github.com/ggml-org/llama.cpp/issues/13689
3392-
if (llama_get_embeddings(ctx)) {
3392+
if (slot_batched->need_embd()) {
33933393
const int n_slots = slots.size();
33943394

33953395
if (batch.n_tokens < n_slots) {

0 commit comments

Comments
 (0)