File tree Expand file tree Collapse file tree 3 files changed +5
-1
lines changed Expand file tree Collapse file tree 3 files changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -1898,7 +1898,7 @@ def embeddings_load_model(model_filename):
18981898 inputs = embeddings_load_model_inputs ()
18991899 inputs .model_filename = model_filename .encode ("UTF-8" )
19001900 inputs .gpulayers = (999 if args .embeddingsgpu else 0 )
1901- inputs .flash_attention = False
1901+ inputs .flash_attention = args . flashattention
19021902 inputs .threads = args .threads
19031903 inputs .use_mmap = args .usemmap
19041904 inputs .embeddingsmaxctx = (args .embeddingsmaxctx if args .embeddingsmaxctx else args .contextsize ) # for us to clamp to contextsize if embeddingsmaxctx unspecified
Original file line number Diff line number Diff line change @@ -120,6 +120,8 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
120120 model_params.use_mmap = inputs.use_mmap ;
121121 model_params.use_mlock = false ;
122122 model_params.n_gpu_layers = inputs.gpulayers ; // offload if possible
123+ int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu <=0 ?0 :inputs.kcpp_main_gpu ;
124+ model_params.main_gpu = kcpp_parseinfo_maindevice;
123125 model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
124126
125127 llama_model * embeddingsmodel = llama_model_load_from_file (modelfile.c_str (), model_params);
Original file line number Diff line number Diff line change @@ -532,6 +532,8 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
532532 tts_model_params.use_mlock = false ;
533533 tts_model_params.n_gpu_layers = inputs.gpulayers ; // offload if possible
534534 tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
535+ int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu <=0 ?0 :inputs.kcpp_main_gpu ;
536+ tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
535537 tts_ctx_params.n_ctx = 8192 ;
536538 tts_ctx_params.offload_kqv = true ;
537539 tts_ctx_params.n_batch = 8192 ;
You can’t perform that action at this time.
0 commit comments