Skip to content

Commit 2f462d3

Browse files
committed
server : fix incorrect usage of llama_get_embeddings()
ggml-ci
1 parent 6adc3c3 commit 2f462d3

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,7 @@ extern "C" {
965965
LLAMA_API int32_t llama_n_threads_batch(struct llama_context * ctx);
966966

967967
// Set whether the context outputs embeddings or not
968+
// TODO: rename to avoid confusion with llama_get_embeddings()
968969
LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
969970

970971
// Set whether to use causal attention or not

tools/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1933,7 +1933,7 @@ struct server_context {
19331933
// also we cannot split if the pooling would require any past tokens
19341934
bool can_split() const {
19351935
return
1936-
!llama_get_embeddings(ctx) ||
1936+
!params_base.embedding ||
19371937
(llama_get_memory(ctx) && llama_pooling_type(ctx) == LLAMA_POOLING_TYPE_LAST);
19381938
}
19391939

0 commit comments

Comments
 (0)