File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -331,7 +331,7 @@ static void llama_params_fit_impl(
331331 uint32_t n_part = 0 ; // number of partial layers, <= n_layer
332332
333333 // for the first partial layer varying parts can overflow, all further layers use LAYER_FRACTION_MOE:
334- layer_fraction_t overflow_type = LAYER_FRACTION_NONE ;
334+ layer_fraction_t overflow_type = LAYER_FRACTION_MOE ;
335335 };
336336
337337 const size_t ntbo = llama_max_tensor_buft_overrides ();
@@ -350,14 +350,15 @@ static void llama_params_fit_impl(
350350 }
351351 }
352352 assert (uint32_t (mparams.n_gpu_layers ) <= hp_ngl);
353+ uint32_t il0 = hp_ngl - mparams.n_gpu_layers ; // start index for tensor buft overrides
354+
353355 if (add_nonrepeating) {
354356 mparams.n_gpu_layers += 1 ;
355357 tensor_split[nd - 1 ] += 1 ;
356358 }
357359 mparams.tensor_split = tensor_split;
358360
359361 size_t itbo = 0 ;
360- uint32_t il0 = 0 ;
361362 for (size_t id = 0 ; id < nd; id++) {
362363 il0 += ngl_per_device[id].n_layer - ngl_per_device[id].n_part ;
363364 for (uint32_t il = il0; il < il0 + ngl_per_device[id].n_part ; il++) {
You can’t perform that action at this time.
0 commit comments