@@ -38,21 +38,18 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
3838 Qcur = ggml_add (ctx0, Qcur, model.layers [il].bq );
3939 cb (Qcur, " Qcur" , il);
4040 }
41- ;
4241 ggml_tensor * Kcur = build_lora_mm (model.layers [il].wk , cur);
4342 cb (Kcur, " Kcur" , il);
4443 if (model.layers [il].bk ) {
4544 Kcur = ggml_add (ctx0, Kcur, model.layers [il].bk );
4645 cb (Kcur, " Kcur" , il);
4746 }
48- ;
4947 ggml_tensor * Vcur = build_lora_mm (model.layers [il].wv , cur);
5048 cb (Vcur, " Vcur" , il);
5149 if (model.layers [il].bv ) {
5250 Vcur = ggml_add (ctx0, Vcur, model.layers [il].bv );
5351 cb (Vcur, " Vcur" , il);
5452 }
55- ;
5653 Qcur = ggml_reshape_3d (ctx0, Qcur, n_embd_head, n_head, n_tokens);
5754 Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
5855 Vcur = ggml_reshape_3d (ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
@@ -77,12 +74,10 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
7774 model.layers [il].wo , model.layers [il].bo ,
7875 Qcur, Kcur, Vcur, nullptr , nullptr , nullptr , 1 .0f , il);
7976 }
80- ;
8177 if (il == n_layer - 1 && inp_out_ids) {
8278 cur = ggml_get_rows (ctx0, cur, inp_out_ids);
8379 inpSA = ggml_get_rows (ctx0, inpSA, inp_out_ids);
8480 }
85- ;
8681 cur = build_norm (cur,
8782 model.layers [il].attn_out_norm , NULL ,
8883 LLM_NORM_RMS, il);
@@ -125,7 +120,6 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
125120 } else {
126121 cur = moe_out;
127122 }
128- ;
129123 cur = build_norm (cur,
130124 model.layers [il].ffn_post_norm , NULL ,
131125 LLM_NORM_RMS, il);
@@ -140,7 +134,6 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
140134 // input for next layer
141135 inpL = cur;
142136 }
143- ;
144137 cur = inpL;
145138
146139 cur = build_norm (cur,
@@ -161,7 +154,6 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
161154 cur = ggml_tanh (ctx0, cur);
162155 cur = ggml_scale (ctx0, cur, hparams.f_final_logit_softcapping );
163156 }
164- ;
165157 cb (cur, " result_output" , -1 );
166158 res->t_logits = cur;
167159
0 commit comments