@@ -513,10 +513,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
513513    std::fill(hparams.rope_sections.begin(), hparams.rope_sections.end(), 0);
514514    std::fill(hparams.swa_layers.begin(), hparams.swa_layers.end(), 0);
515515
516-     std::fill(hparams.xielu_alpha_n.begin(), hparams.xielu_alpha_n.end(), 0);
517-     std::fill(hparams.xielu_alpha_p.begin(), hparams.xielu_alpha_p.end(), 0);
518-     std::fill(hparams.xielu_beta.begin(), hparams.xielu_beta.end(), 0);
519-     std::fill(hparams.xielu_eps.begin(), hparams.xielu_eps.end(), 0);
516+     std::fill(hparams.xielu_alpha_n.begin(), hparams.xielu_alpha_n.end(), 0.0f );
517+     std::fill(hparams.xielu_alpha_p.begin(), hparams.xielu_alpha_p.end(), 0.0f );
518+     std::fill(hparams.xielu_beta.begin(), hparams.xielu_beta.end(), 0.0f );
519+     std::fill(hparams.xielu_eps.begin(), hparams.xielu_eps.end(), 0.0f );
520520
521521    ml.get_key_or_arr(LLM_KV_FEED_FORWARD_LENGTH,  hparams.n_ff_arr,   hparams.n_layer, false);
522522    ml.get_key_or_arr(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, hparams.n_layer, false);
@@ -2014,10 +2014,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
20142014        case LLM_ARCH_APERTUS:
20152015            {
20162016                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
2017-                 ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_N,  hparams.xielu_alpha_n, hparams.n_layer);
2018-                 ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_P, hparams.xielu_alpha_p, hparams.n_layer);
2019-                 ml.get_key_or_arr(LLM_KV_XIELU_BETA, hparams.xielu_beta, hparams.n_layer);
2020-                 ml.get_key_or_arr(LLM_KV_XIELU_EPS, hparams.xielu_eps, hparams.n_layer);
2017+                 ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_N,         hparams.xielu_alpha_n, hparams.n_layer);
2018+                 ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_P,         hparams.xielu_alpha_p, hparams.n_layer);
2019+                 ml.get_key_or_arr(LLM_KV_XIELU_BETA,            hparams.xielu_beta,     hparams.n_layer);
2020+                 ml.get_key_or_arr(LLM_KV_XIELU_EPS,             hparams.xielu_eps,      hparams.n_layer);
20212021
20222022                switch (hparams.n_layer) {
20232023                    case 32: type = LLM_TYPE_8B; break;
@@ -5858,19 +5858,18 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
58585858
58595859                    // output
58605860                    output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd }, 0);
5861-                     output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, 0);
5861+                     output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {  n_embd, n_vocab  }, 0);
58625862
58635863                    for (int i = 0; i < n_layer; ++i) {
58645864                        auto & layer = layers[i];
58655865
58665866                        layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd }, 0);
58675867
58685868                        if (hparams.rope_scaling_type_train == LLAMA_ROPE_SCALING_TYPE_LONGROPE) {
5869-                             layer.rope_long  = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG,  "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
5870-                             layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
5871-                         }
5872-                         else {
5873-                             layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
5869+                             layer.rope_long  = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG,  "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
5870+                             layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
5871+                         } else {
5872+                             layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
58745873                        }
58755874
58765875                        layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q,   "weight", i), { n_embd, n_embd_head_k * n_head }, 0);
0 commit comments