@@ -43,6 +43,7 @@ struct Args
4343 std::string n_gpu_layers;
4444 std::string cur_vs_name = " default" ;
4545 std::string dump_dot;
46+ std::string emb_rank_query_sep;
4647 std::map<std::string, std::vector<std::string>> vector_stores;
4748 int max_length = -1 ;
4849 int max_context_length = 512 ;
@@ -150,6 +151,8 @@ void usage(const std::string &prog)
150151 << " items with a lower score are discarded.\n "
151152 << " --rerank_top_n N number of selected items using reranker model (default: 1)\n "
152153 << " +rerank_rewrite reranker use the rewritten query (default: OFF, i.e. use the original user input)\n "
154+ << " --emb_rank_query_sep separator for embedding & rerank query (default: \"\" , i.e. disabled)\n "
155+ << " only used without main model\n "
153156 << " --hide_reference do not show references (default: false)\n "
154157 << " --rag_template ... prompt template for RAG (macros: {context}, {question}) (optional).\n "
155158 << " Support some C escape sequences (\\ n). Example:\n "
@@ -231,7 +234,7 @@ static size_t parse_args(Args &args, const std::vector<std::string> &argv)
231234 while (c < argc)
232235 {
233236 const char *arg = argv[c].c_str ();
234- if ((strcmp (arg, " --help" ) == 0 ) || (strcmp (arg, " -h" ) == 0 ))
237+ if ((strcmp (arg, " --help" ) == 0 ) || (strcmp (arg, " -h" ) == 0 ) || ( strcmp (arg, " -? " ) == 0 ) )
235238 {
236239 args.show_help = true ;
237240 }
@@ -345,6 +348,7 @@ static size_t parse_args(Args &args, const std::vector<std::string> &argv)
345348 handle_para0 (" --rag_post_extending" , rag_post_extending, std::stoi)
346349 handle_para0 (" --rag_template" , rag_template, std::string)
347350 handle_para0 (" --rag_context_sep" , rag_context_sep, std::string)
351+ handle_para0 (" --emb_rank_query_sep" , emb_rank_query_sep, std::string)
348352 handle_para0 (" --init_vs" , vector_store_in, std::string)
349353 handle_para0 (" --merge_vs" , merge_vs, std::string)
350354 handle_para0 (" --layer_spec" , layer_spec, std::string)
@@ -641,10 +645,28 @@ static void run_qa_ranker(Args &args, chatllm::Pipeline &pipeline, TextStreamer
641645
642646#define DEF_GenerationConfig (gen_config, args ) chatllm::GenerationConfig gen_config (args.max_length, args.max_context_length, args.temp > 0 , args.reversed_role, \
643647 args.top_k, args.top_p, args.temp, args.num_threads, args.sampling, args.presence_penalty, args.tfs_z); \
644- gen_config.set_ai_prefix(args.ai_prefix); gen_config.dump_dot = args.dump_dot;
648+ gen_config.set_ai_prefix(args.ai_prefix); gen_config.dump_dot = args.dump_dot; \
649+ gen_config.emb_rank_query_sep = args.emb_rank_query_sep;
650+
651+ static void _ggml_log_callback (enum ggml_log_level level, const char * text, void * user_data)
652+ {
653+ chatllm::BaseStreamer *streamer = (chatllm::BaseStreamer *)user_data;
654+ std::ostringstream oss;
655+ static const char tags[] = {' ' , ' D' , ' I' , ' W' , ' E' , ' .' };
656+
657+ if ((0 <= level) && (level < sizeof (tags)))
658+ oss << tags[level];
659+ else
660+ oss << ' ?' ;
661+
662+ oss << text;
663+ streamer->putln (oss.str (), chatllm::BaseStreamer::LOGGING);
664+ }
645665
646666void chat (Args &args, chatllm::Pipeline &pipeline, TextStreamer &streamer)
647667{
668+ ggml_log_set (_ggml_log_callback, &streamer);
669+
648670 if (args.system .size () > 0 )
649671 pipeline.set_system_prompt (args.system );
650672
@@ -1092,6 +1114,8 @@ static int start_chat(Chat *chat, Args &args, chatllm::Pipeline &pipeline, chatl
10921114 chat->pipeline = &pipeline;
10931115 chat->streamer = &streamer;
10941116
1117+ ggml_log_set (_ggml_log_callback, &streamer);
1118+
10951119 if (args.system .size () > 0 )
10961120 pipeline.set_system_prompt (args.system );
10971121
0 commit comments