@@ -7763,7 +7763,7 @@ static bool llm_load_tensors(
77637763 // output
77647764 model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
77657765 model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
7766-
7766+
77677767 // if output is NULL, init from the input tok embed
77687768 if (model.output == NULL) {
77697769 model.output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
@@ -11029,19 +11029,19 @@ struct llm_build_context {
1102911029
1103011030 struct ggml_cgraph * build_deci() {
1103111031 struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
11032-
11032+
1103311033 // mutable variable, needed during the last layer of the computation to skip unused tokens
1103411034 int32_t n_tokens = this->n_tokens;
11035-
11035+
1103611036 const int64_t n_embd_head = hparams.n_embd_head_v;
1103711037 GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1103811038 GGML_ASSERT(n_embd_head == hparams.n_rot);
11039-
11039+
1104011040 struct ggml_tensor * cur;
1104111041 struct ggml_tensor * inpL;
1104211042
1104311043 inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb);
11044-
11044+
1104511045 // inp_pos - contains the positions
1104611046 struct ggml_tensor * inp_pos = build_inp_pos();
1104711047
0 commit comments