We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7c8fc01 commit 07416e0Copy full SHA for 07416e0
src/llama-model.cpp
@@ -13494,7 +13494,7 @@ struct llm_build_glm4_moe : public llm_graph_context {
13494
13495
// Only process up to last layer (skip final NextN layer)
13496
// Final layer tensors are loaded but not processed in forward pass
13497
- const int n_transformer_layers = n_layer - 1;
+ const int n_transformer_layers = n_layer - hparam.nextn_predict_layers;
13498
for (int il = 0; il < n_transformer_layers; ++il) {
13499
ggml_tensor * inpSA = inpL;
13500
0 commit comments