Skip to content

Commit 1ba3df3

Browse files
committed
rerank : use [SEP] token instead of [BOS]
ggml-ci
1 parent d5ed2b9 commit 1ba3df3

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

ci/run.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
751751

752752
model_f16="${path_models}/ggml-model-f16.gguf"
753753

754-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s><s>hi\nwhat is panda?</s><s>it's a bear\nwhat is panda?</s><s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
754+
# for this model, the SEP token is "</s>"
755+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
755756

756757
# sample output
757758
# rerank score 0: 0.029

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,15 +2027,15 @@ struct server_context {
20272027
continue;
20282028
}
20292029

2030-
// prompt: <s>query</s><s>doc</s>
2030+
// prompt: [BOS]query[EOS][SEP]doc[EOS]
20312031
prompt_tokens.clear();
20322032
prompt_tokens.push_back(llama_token_bos(model));
20332033
{
20342034
const auto part = tokenize(slot.prompt[0], false);
20352035
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
20362036
}
20372037
prompt_tokens.push_back(llama_token_eos(model));
2038-
prompt_tokens.push_back(llama_token_bos(model));
2038+
prompt_tokens.push_back(llama_token_sep(model));
20392039
{
20402040
const auto part = tokenize(slot.prompt[1], false);
20412041
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());

0 commit comments

Comments
 (0)