File tree Expand file tree Collapse file tree 3 files changed +11
-1
lines changed Expand file tree Collapse file tree 3 files changed +11
-1
lines changed Original file line number Diff line number Diff line change @@ -255,7 +255,8 @@ llama_context::llama_context(
255255 model.n_devices () > 1 &&
256256 model.params .n_gpu_layers > (int ) model.hparams .n_layer &&
257257 model.params .split_mode == LLAMA_SPLIT_MODE_LAYER &&
258- cparams.offload_kqv ;
258+ cparams.offload_kqv &&
259+ !model.has_tensor_overrides ();
259260
260261 // pipeline parallelism requires support for async compute and events in all devices
261262 if (pipeline_parallel) {
Original file line number Diff line number Diff line change @@ -379,9 +379,12 @@ struct llama_model::impl {
379379 layer_dev dev_input = {};
380380 layer_dev dev_output = {};
381381 std::vector<layer_dev> dev_layer;
382+
383+ bool has_tensor_overrides;
382384};
383385
384386llama_model::llama_model(const llama_model_params & params) : params(params), pimpl(std::make_unique<impl>()) {
387+ pimpl->has_tensor_overrides = params.tensor_buft_overrides && params.tensor_buft_overrides[0].pattern;
385388}
386389
387390llama_model::~llama_model() {}
@@ -4169,6 +4172,10 @@ ggml_backend_buffer_type_t llama_model::select_buft(int il) const {
41694172 });
41704173}
41714174
4175+ bool llama_model::has_tensor_overrides() const {
4176+ return pimpl->has_tensor_overrides;
4177+ }
4178+
41724179const ggml_tensor * llama_model::get_tensor(const char * name) const {
41734180 auto it = std::find_if(tensors_by_name.begin(), tensors_by_name.end(),
41744181 [name](const std::pair<std::string, ggml_tensor *> & it) {
Original file line number Diff line number Diff line change @@ -382,6 +382,8 @@ struct llama_model {
382382
383383 ggml_backend_buffer_type_t select_buft (int il) const ;
384384
385+ bool has_tensor_overrides () const ;
386+
385387 const struct ggml_tensor * get_tensor (const char * name) const ;
386388
387389 // TODO: move this to new llm_arch_model_i interface
You can’t perform that action at this time.
0 commit comments