Skip to content

Commit c038eaa

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 0f39d8d + dca7ab5 commit c038eaa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+25468
-25245
lines changed

.github/workflows/kcpp-build-release-arm64.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ jobs:
7878
"
7979
8080
- name: Save artifact
81-
uses: actions/upload-artifact@v3
81+
uses: actions/upload-artifact@v4
8282
with:
8383
name: kcpp_linux_arm64_binary
8484
path: dist/

.github/workflows/kcpp-build-release-linux-cuda12.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
./koboldcpp.sh dist
2828
2929
- name: Save artifact
30-
uses: actions/upload-artifact@v3
30+
uses: actions/upload-artifact@v4
3131
with:
3232
name: kcpp_linux_binary
3333
path: dist/

.github/workflows/kcpp-build-release-linux.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
./koboldcpp.sh dist
2828
2929
- name: Save artifact
30-
uses: actions/upload-artifact@v3
30+
uses: actions/upload-artifact@v4
3131
with:
3232
name: kcpp_linux_binary
3333
path: dist/

.github/workflows/kcpp-build-release-osx.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
dist/koboldcpp-mac-arm64 --model baby_llama.gguf --gpulayers 99 --benchmark --prompt 'Hi, my name is'
3535
3636
- name: Save artifact
37-
uses: actions/upload-artifact@v3
37+
uses: actions/upload-artifact@v4
3838
with:
3939
name: kcpp_mac_binary
4040
path: dist/

.github/workflows/kcpp-build-release-win-full-cu12.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
./make_pyinstaller_cuda12.bat
8181
8282
- name: Save artifact
83-
uses: actions/upload-artifact@v3
83+
uses: actions/upload-artifact@v4
8484
with:
8585
name: kcpp_windows_pyinstallers
8686
path: dist/

.github/workflows/kcpp-build-release-win-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
./make_pyinstaller_cuda.bat
8282
8383
- name: Save artifact
84-
uses: actions/upload-artifact@v3
84+
uses: actions/upload-artifact@v4
8585
with:
8686
name: kcpp_windows_pyinstallers
8787
path: dist/

.github/workflows/kcpp-build-release-win-oldcpu-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
./make_pyinstaller_cuda_oldcpu.bat
8181
8282
- name: Save artifact
83-
uses: actions/upload-artifact@v3
83+
uses: actions/upload-artifact@v4
8484
with:
8585
name: kcpp_windows_pyinstallers
8686
path: dist/

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ whispercpp_cublas.o: otherarch/whispercpp/whisper_adapter.cpp
676676
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
677677

678678
# idiotic "for easier compilation"
679-
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp src/llama-grammar.cpp src/llama-sampling.cpp src/llama-vocab.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cpu.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h
679+
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp src/llama-impl.cpp src/llama-chat.cpp src/llama-mmap.cpp src/llama-context.cpp src/llama-adapter.cpp src/llama-arch.cpp src/llama-batch.cpp src/llama-vocab.cpp src/llama-grammar.cpp src/llama-sampling.cpp src/llama-kv-cache.cpp src/llama-model-loader.cpp src/llama-model.cpp src/llama-quant.cpp src/llama-hparams.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cpu.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h
680680
gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
681681
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
682682
gpttype_adapter.o: $(GPTTYPE_ADAPTER)

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,15 +1514,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15141514
{"--lora"}, "FNAME",
15151515
"path to LoRA adapter (can be repeated to use multiple adapters)",
15161516
[](common_params & params, const std::string & value) {
1517-
params.lora_adapters.push_back({ std::string(value), 1.0 });
1517+
params.lora_adapters.push_back({ std::string(value), 1.0, nullptr });
15181518
}
15191519
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
15201520
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
15211521
add_opt(common_arg(
15221522
{"--lora-scaled"}, "FNAME", "SCALE",
15231523
"path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
15241524
[](common_params & params, const std::string & fname, const std::string & scale) {
1525-
params.lora_adapters.push_back({ fname, std::stof(scale) });
1525+
params.lora_adapters.push_back({ fname, std::stof(scale), nullptr });
15261526
}
15271527
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
15281528
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));

common/common.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -891,9 +891,8 @@ struct common_init_result common_init_from_params(common_params & params) {
891891
}
892892

893893
if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) {
894-
LOG_ERR("%s: KV cache shifting is not supported for this model (--no-context-shift to disable)'\n", __func__);
895-
llama_free_model(model);
896-
return iparams;
894+
LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__);
895+
params.ctx_shift = false;
897896
}
898897

899898
if (!params.control_vectors.empty()) {
@@ -924,20 +923,21 @@ struct common_init_result common_init_from_params(common_params & params) {
924923

925924
// load and optionally apply lora adapters
926925
for (auto & la : params.lora_adapters) {
927-
common_lora_adapter_container loaded_la;
928-
loaded_la.path = la.path;
929-
loaded_la.scale = la.scale;
930-
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
931-
if (loaded_la.adapter == nullptr) {
926+
llama_lora_adapter_ptr lora;
927+
lora.reset(llama_lora_adapter_init(model, la.path.c_str()));
928+
if (lora == nullptr) {
932929
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
933930
llama_free(lctx);
934931
llama_free_model(model);
935932
return iparams;
936933
}
937-
iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
934+
935+
la.ptr = lora.get();
936+
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
938937
}
938+
939939
if (!params.lora_init_without_apply) {
940-
common_lora_adapters_apply(lctx, iparams.lora_adapters);
940+
common_lora_adapters_apply(lctx, params.lora_adapters);
941941
}
942942

943943
if (params.sampling.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
@@ -998,17 +998,17 @@ struct common_init_result common_init_from_params(common_params & params) {
998998
llama_perf_context_reset(lctx);
999999
}
10001000

1001-
iparams.model = model;
1002-
iparams.context = lctx;
1001+
iparams.model.reset(model);
1002+
iparams.context.reset(lctx);
10031003

10041004
return iparams;
10051005
}
10061006

1007-
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters) {
1007+
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_info> & lora) {
10081008
llama_lora_adapter_clear(ctx);
1009-
for (auto & la : lora_adapters) {
1009+
for (auto & la : lora) {
10101010
if (la.scale != 0.0f) {
1011-
llama_lora_adapter_set(ctx, la.adapter, la.scale);
1011+
llama_lora_adapter_set(ctx, la.ptr, la.scale);
10121012
}
10131013
}
10141014
}
@@ -1365,7 +1365,7 @@ struct llama_model * common_load_model_from_url(
13651365
return NULL;
13661366
}
13671367

1368-
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT_STR);
1368+
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
13691369
if (key_n_split >= 0) {
13701370
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
13711371
}

0 commit comments

Comments
 (0)