Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3325,5 +3325,33 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples({LLAMA_EXAMPLE_SERVER}));

add_opt(common_arg(
{"--rerank-bge-v2-m3-default"},
string_format("use default bge-reranker-v2-m3 (note: can download weights from the internet)"),
[](common_params & params) {
params.model.hf_repo = "gpustack/bge-reranker-v2-m3-GGUF";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should host this model in ggml-org. I'll make a copy when GGUF My Repo is back online.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should host this model in ggml-org. I'll make a copy when GGUF My Repo is back online.

it is now back online

params.model.hf_file = "bge-reranker-v2-m3-Q8_0.gguf";
params.n_ctx = 512;
params.flash_attn = true;
params.verbose_prompt = false;
params.reranking = true;
params.pooling_type = LLAMA_POOLING_TYPE_CLS;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING"));

add_opt(common_arg(
{"--rerank-jina-v1-turbo-en-default"},
string_format("use default jina-reranker-v1-turbo-en (note: can download weights from the internet)"),
[](common_params & params) {
params.model.hf_repo = "ggml-org/jina-reranker-v1-turbo-en-GGUF";
params.model.hf_file = "Jina-Bert-Implementation-38M-F16.gguf";
params.n_ctx = 512;
params.flash_attn = true;
params.verbose_prompt = false;
params.reranking = true;
params.pooling_type = LLAMA_POOLING_TYPE_CLS;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING"));

return ctx_arg;
}
Loading