@@ -3325,5 +3325,33 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
33253325 }
33263326 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
33273327
3328+ add_opt (common_arg (
3329+ {" --rerank-bge-v2-m3-default" },
3330+ string_format (" use default bge-reranker-v2-m3 (note: can download weights from the internet)" ),
3331+ [](common_params & params) {
3332+ params.model .hf_repo = " gpustack/bge-reranker-v2-m3-GGUF" ;
3333+ params.model .hf_file = " bge-reranker-v2-m3-Q8_0.gguf" ;
3334+ params.n_ctx = 512 ;
3335+ params.flash_attn = true ;
3336+ params.verbose_prompt = false ;
3337+ params.reranking = true ;
3338+ params.pooling_type = LLAMA_POOLING_TYPE_CLS;
3339+ }
3340+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_RERANKING" ));
3341+
3342+ add_opt (common_arg (
3343+ {" --rerank-jina-v1-turbo-en-default" },
3344+ string_format (" use default jina-reranker-v1-turbo-en (note: can download weights from the internet)" ),
3345+ [](common_params & params) {
3346+ params.model .hf_repo = " ggml-org/jina-reranker-v1-turbo-en-GGUF" ;
3347+ params.model .hf_file = " Jina-Bert-Implementation-38M-F16.gguf" ;
3348+ params.n_ctx = 512 ;
3349+ params.flash_attn = true ;
3350+ params.verbose_prompt = false ;
3351+ params.reranking = true ;
3352+ params.pooling_type = LLAMA_POOLING_TYPE_CLS;
3353+ }
3354+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_RERANKING" ));
3355+
33283356 return ctx_arg;
33293357}
0 commit comments