Skip to content

Commit 4b2ca11

Browse files
committed
more consistency fixes
1 parent 955cf66 commit 4b2ca11

File tree

3 files changed

+5
-1
lines changed

3 files changed

+5
-1
lines changed

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1898,7 +1898,7 @@ def embeddings_load_model(model_filename):
18981898
inputs = embeddings_load_model_inputs()
18991899
inputs.model_filename = model_filename.encode("UTF-8")
19001900
inputs.gpulayers = (999 if args.embeddingsgpu else 0)
1901-
inputs.flash_attention = False
1901+
inputs.flash_attention = args.flashattention
19021902
inputs.threads = args.threads
19031903
inputs.use_mmap = args.usemmap
19041904
inputs.embeddingsmaxctx = (args.embeddingsmaxctx if args.embeddingsmaxctx else args.contextsize) # for us to clamp to contextsize if embeddingsmaxctx unspecified

otherarch/embeddings_adapter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
120120
model_params.use_mmap = inputs.use_mmap;
121121
model_params.use_mlock = false;
122122
model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
123+
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
124+
model_params.main_gpu = kcpp_parseinfo_maindevice;
123125
model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
124126

125127
llama_model * embeddingsmodel = llama_model_load_from_file(modelfile.c_str(), model_params);

otherarch/tts_adapter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,8 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
532532
tts_model_params.use_mlock = false;
533533
tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
534534
tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
535+
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
536+
tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
535537
tts_ctx_params.n_ctx = 8192;
536538
tts_ctx_params.offload_kqv = true;
537539
tts_ctx_params.n_batch = 8192;

0 commit comments

Comments
 (0)