@@ -891,9 +891,8 @@ struct common_init_result common_init_from_params(common_params & params) {
891891 }
892892
893893 if (params.ctx_shift && !llama_kv_cache_can_shift (lctx)) {
894- LOG_ERR (" %s: KV cache shifting is not supported for this model (--no-context-shift to disable)'\n " , __func__);
895- llama_free_model (model);
896- return iparams;
894+ LOG_WRN (" %s: KV cache shifting is not supported for this model, disabling KV cache shifting\n " , __func__);
895+ params.ctx_shift = false ;
897896 }
898897
899898 if (!params.control_vectors .empty ()) {
@@ -924,20 +923,21 @@ struct common_init_result common_init_from_params(common_params & params) {
924923
925924 // load and optionally apply lora adapters
926925 for (auto & la : params.lora_adapters ) {
927- common_lora_adapter_container loaded_la;
928- loaded_la.path = la.path ;
929- loaded_la.scale = la.scale ;
930- loaded_la.adapter = llama_lora_adapter_init (model, la.path .c_str ());
931- if (loaded_la.adapter == nullptr ) {
926+ llama_lora_adapter_ptr lora;
927+ lora.reset (llama_lora_adapter_init (model, la.path .c_str ()));
928+ if (lora == nullptr ) {
932929 LOG_ERR (" %s: failed to apply lora adapter '%s'\n " , __func__, la.path .c_str ());
933930 llama_free (lctx);
934931 llama_free_model (model);
935932 return iparams;
936933 }
937- iparams.lora_adapters .push_back (loaded_la); // copy to list of loaded adapters
934+
935+ la.ptr = lora.get ();
936+ iparams.lora .emplace_back (std::move (lora)); // copy to list of loaded adapters
938937 }
938+
939939 if (!params.lora_init_without_apply ) {
940- common_lora_adapters_apply (lctx, iparams .lora_adapters );
940+ common_lora_adapters_apply (lctx, params .lora_adapters );
941941 }
942942
943943 if (params.sampling .ignore_eos && llama_token_eos (model) == LLAMA_TOKEN_NULL) {
@@ -998,17 +998,17 @@ struct common_init_result common_init_from_params(common_params & params) {
998998 llama_perf_context_reset (lctx);
999999 }
10001000
1001- iparams.model = model;
1002- iparams.context = lctx;
1001+ iparams.model . reset ( model) ;
1002+ iparams.context . reset ( lctx) ;
10031003
10041004 return iparams;
10051005}
10061006
1007- void common_lora_adapters_apply (struct llama_context * ctx, std::vector<common_lora_adapter_container > & lora_adapters ) {
1007+ void common_lora_adapters_apply (struct llama_context * ctx, std::vector<common_lora_adapter_info > & lora ) {
10081008 llama_lora_adapter_clear (ctx);
1009- for (auto & la : lora_adapters ) {
1009+ for (auto & la : lora ) {
10101010 if (la.scale != 0 .0f ) {
1011- llama_lora_adapter_set (ctx, la.adapter , la.scale );
1011+ llama_lora_adapter_set (ctx, la.ptr , la.scale );
10121012 }
10131013 }
10141014}
@@ -1365,7 +1365,7 @@ struct llama_model * common_load_model_from_url(
13651365 return NULL ;
13661366 }
13671367
1368- auto key_n_split = gguf_find_key (ctx_gguf, LLM_KV_SPLIT_COUNT_STR );
1368+ auto key_n_split = gguf_find_key (ctx_gguf, LLM_KV_SPLIT_COUNT );
13691369 if (key_n_split >= 0 ) {
13701370 n_split = gguf_get_val_u16 (ctx_gguf, key_n_split);
13711371 }
0 commit comments