@@ -9636,8 +9636,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
96369636 const int n_layer_sparsity = 10; // number of layers using activation sparsity
96379637 const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
96389638
9639- ggml_tensor * one; // containing single element 1.0f
9640-
96419639 llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf)
96429640 : llm_graph_context(params),
96439641 model(model),
@@ -9649,14 +9647,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
96499647 ggml_tensor * cur;
96509648 ggml_tensor * inpL;
96519649
9652- // TODO: remove this when ggml_scale_add is implemented
9653- one = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
9654- {
9655- auto inp = std::make_unique<llm_graph_input_one>();
9656- inp->one = one;
9657- res->add_input(std::move(inp));
9658- }
9659-
96609650 inpL = build_inp_embd(model.tok_embd);
96619651
96629652 // important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
@@ -10046,7 +10036,7 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
1004610036 cb(innovation, "innovation", il);
1004710037
1004810038 ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens]
10049- all_coefs = ggml_add (ctx0, all_coefs, one);
10039+ all_coefs = ggml_scale_bias (ctx0, all_coefs, 1.0f, 1.0f); // + 1.0
1005010040 cb(all_coefs, "all_coefs", il);
1005110041 all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup]
1005210042 all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]
0 commit comments