@@ -493,7 +493,7 @@ std::string string_from(const struct llama_context * ctx, const std::vector<llam
493493 first = false ;
494494 }
495495
496- auto detokenized = llama_token_to_piece (ctx, token);
496+ auto detokenized = common_token_to_piece (ctx, token);
497497
498498 detokenized.erase (
499499 std::remove_if (
@@ -524,7 +524,7 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
524524 first = false ;
525525 }
526526
527- auto detokenized = llama_token_to_piece (ctx, batch.token [i]);
527+ auto detokenized = common_token_to_piece (ctx, batch.token [i]);
528528
529529 detokenized.erase (
530530 std::remove_if (
@@ -819,16 +819,16 @@ std::string fs_get_cache_file(const std::string & filename) {
819819//
820820// Model utils
821821//
822- struct llama_init_result llama_init_from_gpt_params (gpt_params & params) {
823- llama_init_result iparams;
824- auto mparams = llama_model_params_from_gpt_params (params);
822+ struct common_init_result llama_init_from_gpt_params (gpt_params & params) {
823+ common_init_result iparams;
824+ auto mparams = common_model_params_from_gpt_params (params);
825825
826826 llama_model * model = nullptr ;
827827
828828 if (!params.hf_repo .empty () && !params.hf_file .empty ()) {
829- model = llama_load_model_from_hf (params.hf_repo .c_str (), params.hf_file .c_str (), params.model .c_str (), params.hf_token .c_str (), mparams);
829+ model = common_load_model_from_hf (params.hf_repo .c_str (), params.hf_file .c_str (), params.model .c_str (), params.hf_token .c_str (), mparams);
830830 } else if (!params.model_url .empty ()) {
831- model = llama_load_model_from_url (params.model_url .c_str (), params.model .c_str (), params.hf_token .c_str (), mparams);
831+ model = common_load_model_from_url (params.model_url .c_str (), params.model .c_str (), params.hf_token .c_str (), mparams);
832832 } else {
833833 model = llama_load_model_from_file (params.model .c_str (), mparams);
834834 }
@@ -863,7 +863,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
863863 }
864864 }
865865
866- auto cparams = llama_context_params_from_gpt_params (params);
866+ auto cparams = common_context_params_from_gpt_params (params);
867867
868868 llama_context * lctx = llama_new_context_with_model (model, cparams);
869869 if (lctx == NULL ) {
@@ -900,7 +900,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
900900
901901 // load and optionally apply lora adapters
902902 for (auto & la : params.lora_adapters ) {
903- llama_lora_adapter_container loaded_la;
903+ common_lora_adapter_container loaded_la;
904904 loaded_la.path = la.path ;
905905 loaded_la.scale = la.scale ;
906906 loaded_la.adapter = llama_lora_adapter_init (model, la.path .c_str ());
@@ -913,7 +913,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
913913 iparams.lora_adapters .push_back (loaded_la); // copy to list of loaded adapters
914914 }
915915 if (!params.lora_init_without_apply ) {
916- llama_lora_adapters_apply (lctx, iparams.lora_adapters );
916+ common_lora_adapters_apply (lctx, iparams.lora_adapters );
917917 }
918918
919919 if (params.sparams .ignore_eos && llama_token_eos (model) == LLAMA_TOKEN_NULL) {
@@ -961,7 +961,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
961961 return iparams;
962962}
963963
964- void llama_lora_adapters_apply (struct llama_context * ctx, std::vector<llama_lora_adapter_container > & lora_adapters) {
964+ void common_lora_adapters_apply (struct llama_context * ctx, std::vector<common_lora_adapter_container > & lora_adapters) {
965965 llama_lora_adapter_clear (ctx);
966966 for (auto & la : lora_adapters) {
967967 if (la.scale != 0 .0f ) {
@@ -970,7 +970,7 @@ void llama_lora_adapters_apply(struct llama_context * ctx, std::vector<llama_lor
970970 }
971971}
972972
973- struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params) {
973+ struct llama_model_params common_model_params_from_gpt_params (const gpt_params & params) {
974974 auto mparams = llama_model_default_params ();
975975
976976 if (params.n_gpu_layers != -1 ) {
@@ -1022,7 +1022,7 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
10221022 throw std::runtime_error (" Invalid cache type: " + s);
10231023}
10241024
1025- struct llama_context_params llama_context_params_from_gpt_params (const gpt_params & params) {
1025+ struct llama_context_params common_context_params_from_gpt_params (const gpt_params & params) {
10261026 auto cparams = llama_context_default_params ();
10271027
10281028 cparams.n_ctx = params.n_ctx ;
@@ -1430,7 +1430,7 @@ struct llama_model * llama_load_model_from_hf(
14301430
14311431#else
14321432
1433- struct llama_model * llama_load_model_from_url (
1433+ struct llama_model * common_load_model_from_url (
14341434 const char * /* model_url*/ ,
14351435 const char * /* path_model*/ ,
14361436 const char * /* hf_token*/ ,
@@ -1439,7 +1439,7 @@ struct llama_model * llama_load_model_from_url(
14391439 return nullptr ;
14401440}
14411441
1442- struct llama_model * llama_load_model_from_hf (
1442+ struct llama_model * common_load_model_from_hf (
14431443 const char * /* repo*/ ,
14441444 const char * /* model*/ ,
14451445 const char * /* path_model*/ ,
@@ -1455,11 +1455,11 @@ struct llama_model * llama_load_model_from_hf(
14551455// Batch utils
14561456//
14571457
1458- void llama_batch_clear (struct llama_batch & batch) {
1458+ void common_batch_clear (struct llama_batch & batch) {
14591459 batch.n_tokens = 0 ;
14601460}
14611461
1462- void llama_batch_add (
1462+ void common_batch_add (
14631463 struct llama_batch & batch,
14641464 llama_token id,
14651465 llama_pos pos,
@@ -1482,15 +1482,15 @@ void llama_batch_add(
14821482// Vocab utils
14831483//
14841484
1485- std::vector<llama_token> llama_tokenize (
1485+ std::vector<llama_token> common_tokenize (
14861486 const struct llama_context * ctx,
14871487 const std::string & text,
14881488 bool add_special,
14891489 bool parse_special) {
1490- return llama_tokenize (llama_get_model (ctx), text, add_special, parse_special);
1490+ return common_tokenize (llama_get_model (ctx), text, add_special, parse_special);
14911491}
14921492
1493- std::vector<llama_token> llama_tokenize (
1493+ std::vector<llama_token> common_tokenize (
14941494 const struct llama_model * model,
14951495 const std::string & text,
14961496 bool add_special,
@@ -1509,7 +1509,7 @@ std::vector<llama_token> llama_tokenize(
15091509 return result;
15101510}
15111511
1512- std::string llama_token_to_piece (const struct llama_context * ctx, llama_token token, bool special) {
1512+ std::string common_token_to_piece (const struct llama_context * ctx, llama_token token, bool special) {
15131513 std::string piece;
15141514 piece.resize (piece.capacity ()); // using string internal cache, 15 bytes + '\n'
15151515 const int n_chars = llama_token_to_piece (llama_get_model (ctx), token, &piece[0 ], piece.size (), 0 , special);
@@ -1525,7 +1525,7 @@ std::string llama_token_to_piece(const struct llama_context * ctx, llama_token t
15251525 return piece;
15261526}
15271527
1528- std::string llama_detokenize (llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
1528+ std::string common_detokenize (llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
15291529 std::string text;
15301530 text.resize (std::max (text.capacity (), tokens.size ()));
15311531 int32_t n_chars = llama_detokenize (llama_get_model (ctx), tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special);
@@ -1551,9 +1551,9 @@ bool llama_chat_verify_template(const std::string & tmpl) {
15511551 return res >= 0 ;
15521552}
15531553
1554- std::string llama_chat_apply_template (const struct llama_model * model,
1554+ std::string common_chat_apply_template (const struct llama_model * model,
15551555 const std::string & tmpl,
1556- const std::vector<llama_chat_msg > & msgs,
1556+ const std::vector<common_chat_msg > & msgs,
15571557 bool add_ass) {
15581558 int alloc_size = 0 ;
15591559 bool fallback = false ; // indicate if we must fallback to default chatml
@@ -1595,42 +1595,42 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15951595 return formatted_chat;
15961596}
15971597
1598- std::string llama_chat_format_single (const struct llama_model * model,
1598+ std::string common_chat_format_single (const struct llama_model * model,
15991599 const std::string & tmpl,
1600- const std::vector<llama_chat_msg > & past_msg,
1601- const llama_chat_msg & new_msg,
1600+ const std::vector<common_chat_msg > & past_msg,
1601+ const common_chat_msg & new_msg,
16021602 bool add_ass) {
16031603 std::ostringstream ss;
1604- auto fmt_past_msg = past_msg.empty () ? " " : llama_chat_apply_template (model, tmpl, past_msg, false );
1605- std::vector<llama_chat_msg > chat_new (past_msg);
1604+ auto fmt_past_msg = past_msg.empty () ? " " : common_chat_apply_template (model, tmpl, past_msg, false );
1605+ std::vector<common_chat_msg > chat_new (past_msg);
16061606 // if the past_msg ends with a newline, we must preserve it in the formatted version
16071607 if (add_ass && !fmt_past_msg.empty () && fmt_past_msg.back () == ' \n ' ) {
16081608 ss << " \n " ;
16091609 };
16101610 // format chat with new_msg
16111611 chat_new.push_back (new_msg);
1612- auto fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass);
1612+ auto fmt_new_msg = common_chat_apply_template (model, tmpl, chat_new, add_ass);
16131613 // get the diff part
16141614 ss << fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
16151615 return ss.str ();
16161616}
16171617
1618- std::string llama_chat_format_example (const struct llama_model * model,
1618+ std::string common_chat_format_example (const struct llama_model * model,
16191619 const std::string & tmpl) {
1620- std::vector<llama_chat_msg > msgs = {
1620+ std::vector<common_chat_msg > msgs = {
16211621 {" system" , " You are a helpful assistant" },
16221622 {" user" , " Hello" },
16231623 {" assistant" , " Hi there" },
16241624 {" user" , " How are you?" },
16251625 };
1626- return llama_chat_apply_template (model, tmpl, msgs, true );
1626+ return common_chat_apply_template (model, tmpl, msgs, true );
16271627}
16281628
16291629//
16301630// KV cache utils
16311631//
16321632
1633- void llama_kv_cache_dump_view (const llama_kv_cache_view & view, int row_size) {
1633+ void common_kv_cache_dump_view (const llama_kv_cache_view & view, int row_size) {
16341634 static const char slot_chars[] = " .123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+" ;
16351635
16361636 printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d" ,
@@ -1653,7 +1653,7 @@ void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) {
16531653 printf (" \n === Done dumping\n " );
16541654}
16551655
1656- void llama_kv_cache_dump_view_seqs (const llama_kv_cache_view & view, int row_size) {
1656+ void common_kv_cache_dump_view_seqs (const llama_kv_cache_view & view, int row_size) {
16571657 static const char slot_chars[] = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
16581658
16591659 printf (" === Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n " ,
@@ -1705,7 +1705,7 @@ void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_siz
17051705// Embedding utils
17061706//
17071707
1708- void llama_embd_normalize (const float * inp, float * out, int n, int embd_norm) {
1708+ void common_embd_normalize (const float * inp, float * out, int n, int embd_norm) {
17091709 double sum = 0.0 ;
17101710
17111711 switch (embd_norm) {
@@ -1739,7 +1739,7 @@ void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm)
17391739 }
17401740}
17411741
1742- float llama_embd_similarity_cos (const float * embd1, const float * embd2, int n){
1742+ float common_embd_similarity_cos (const float * embd1, const float * embd2, int n){
17431743 double sum = 0.0 ;
17441744 double sum1 = 0.0 ;
17451745 double sum2 = 0.0 ;
@@ -1765,8 +1765,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
17651765// Control vector utils
17661766//
17671767
1768- static llama_control_vector_data llama_control_vector_load_one (const llama_control_vector_load_info & load_info) {
1769- llama_control_vector_data result = { -1 , {} };
1768+ static common_control_vector_data llama_control_vector_load_one (const common_control_vector_load_info & load_info) {
1769+ common_control_vector_data result = { -1 , {} };
17701770
17711771 ggml_context * ctx = nullptr ;
17721772 struct gguf_init_params meta_gguf_params = {
@@ -1850,8 +1850,8 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
18501850 return result;
18511851}
18521852
1853- llama_control_vector_data llama_control_vector_load (const std::vector<llama_control_vector_load_info > & load_infos) {
1854- llama_control_vector_data result = { -1 , {} };
1853+ common_control_vector_data llama_control_vector_load (const std::vector<common_control_vector_load_info > & load_infos) {
1854+ common_control_vector_data result = { -1 , {} };
18551855
18561856 for (const auto & info : load_infos) {
18571857 auto cur = llama_control_vector_load_one (info);
0 commit comments