Skip to content

Commit c709275

Browse files
committed
examples : use llama_encode() when appropriate
ggml-ci
1 parent c14ee72 commit c709275

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

examples/embedding/embedding.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,14 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
3535

3636
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) {
3737
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
38-
const struct llama_model * model = llama_get_model(ctx);
3938

4039
// clear previous kv_cache values (irrelevant for embeddings)
4140
llama_kv_self_clear(ctx);
4241

4342
// run model
4443
LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq);
45-
if (llama_model_has_encoder(model) && !llama_model_has_decoder(model)) {
46-
// encoder-only model
47-
if (llama_encode(ctx, batch) < 0) {
48-
LOG_ERR("%s : failed to encode\n", __func__);
49-
}
50-
} else if (!llama_model_has_encoder(model) && llama_model_has_decoder(model)) {
51-
// decoder-only model
52-
if (llama_encode(ctx, batch) < 0) {
53-
LOG_ERR("%s : failed to decode\n", __func__);
54-
}
44+
if (llama_encode(ctx, batch) < 0) {
45+
LOG_ERR("%s : failed to encode\n", __func__);
5546
}
5647

5748
for (int i = 0; i < batch.n_tokens; i++) {

tools/server/server.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3212,7 +3212,14 @@ struct server_context {
32123212
batch.logits + i,
32133213
};
32143214

3215-
const int ret = llama_decode(ctx, batch_view);
3215+
int ret = 0;
3216+
3217+
if (params_base.embedding || params_base.reranking) {
3218+
ret = llama_encode(ctx, batch_view);
3219+
} else {
3220+
ret = llama_decode(ctx, batch_view);
3221+
}
3222+
32163223
metrics.on_decoded(slots);
32173224

32183225
if (ret != 0) {

0 commit comments

Comments
 (0)