Skip to content

Commit 97820aa

Browse files
fix assignment of 1 dense layer
1 parent 9d0a0bb commit 97820aa

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

src/llama.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ static void llama_params_fit_impl(
331331
uint32_t n_part = 0; // number of partial layers, <= n_layer
332332

333333
// for the first partial layer varying parts can overflow, all further layers use LAYER_FRACTION_MOE:
334-
layer_fraction_t overflow_type = LAYER_FRACTION_NONE;
334+
layer_fraction_t overflow_type = LAYER_FRACTION_MOE;
335335
};
336336

337337
const size_t ntbo = llama_max_tensor_buft_overrides();
@@ -350,14 +350,15 @@ static void llama_params_fit_impl(
350350
}
351351
}
352352
assert(uint32_t(mparams.n_gpu_layers) <= hp_ngl);
353+
uint32_t il0 = hp_ngl - mparams.n_gpu_layers; // start index for tensor buft overrides
354+
353355
if (add_nonrepeating) {
354356
mparams.n_gpu_layers += 1;
355357
tensor_split[nd - 1] += 1;
356358
}
357359
mparams.tensor_split = tensor_split;
358360

359361
size_t itbo = 0;
360-
uint32_t il0 = 0;
361362
for (size_t id = 0; id < nd; id++) {
362363
il0 += ngl_per_device[id].n_layer - ngl_per_device[id].n_part;
363364
for (uint32_t il = il0; il < il0 + ngl_per_device[id].n_part; il++) {

0 commit comments

Comments
 (0)