Skip to content

Commit ab11d94

Browse files
committed
Migrate xielu params from tensors to hyperparameters
1 parent 74dcf89 commit ab11d94

File tree

8 files changed

+77
-45
lines changed

8 files changed

+77
-45
lines changed

convert_hf_to_gguf.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8585,10 +8585,34 @@ class ApertusModel(LlamaModel):
85858585
model_arch = gguf.MODEL_ARCH.APERTUS
85868586
undo_permute = False
85878587

8588+
_alpha_n = {}
8589+
_alpha_p = {}
8590+
_beta = {}
8591+
_eps = {}
8592+
85888593
def modify_tensors(self, data_torch, name, bid):
85898594
# Handle xIELU activation parameters
8590-
if name.endswith(".act_fn.alpha_n") or name.endswith(".act_fn.alpha_p") or name.endswith(".act_fn.beta") or name.endswith(".act_fn.eps"):
8591-
return [(self.map_tensor_name(name), data_torch)]
8595+
n_layers = self.hparams.get("num_hidden_layers")
8596+
if name.endswith(".act_fn.alpha_n"):
8597+
self._alpha_n[bid] = data_torch.to("cpu").float().item()
8598+
if (len(self._alpha_n) == n_layers):
8599+
self.gguf_writer.add_xielu_alpha_n([self._alpha_n[k] for k in sorted(self._alpha_n)])
8600+
return []
8601+
if name.endswith(".act_fn.alpha_p"):
8602+
self._alpha_p[bid] = data_torch.to("cpu").float().item()
8603+
if (len(self._alpha_p) == n_layers):
8604+
self.gguf_writer.add_xielu_alpha_p([self._alpha_p[k] for k in sorted(self._alpha_p)])
8605+
return []
8606+
if name.endswith(".act_fn.beta"):
8607+
self._beta[bid] = data_torch.to("cpu").float().item()
8608+
if (len(self._beta) == n_layers):
8609+
self.gguf_writer.add_xielu_beta([self._beta[k] for k in sorted(self._beta)])
8610+
return []
8611+
if name.endswith(".act_fn.eps"):
8612+
self._eps[bid] = data_torch.to("cpu").float().item()
8613+
if (len(self._eps) == n_layers):
8614+
self.gguf_writer.add_xielu_eps([self._eps[k] for k in sorted(self._eps)])
8615+
return []
85928616

85938617
return super().modify_tensors(data_torch, name, bid)
85948618

gguf-py/gguf/constants.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,13 @@ class Projector:
286286
class Diffusion:
287287
SHIFT_LOGITS = "diffusion.shift_logits"
288288

289+
class xIELU:
290+
XIELU_ALPHA_P = "xielu.alpha_p"
291+
XIELU_ALPHA_N = "xielu.alpha_n"
292+
XIELU_BETA = "xielu.beta"
293+
XIELU_EPS = "xielu.eps"
294+
295+
289296
#
290297
# recommended mapping of model tensor names for storage in gguf
291298
#
@@ -780,20 +787,12 @@ class MODEL_TENSOR(IntEnum):
780787
MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
781788
MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
782789
MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
783-
MODEL_TENSOR.FFN_ACT_ALPHA_N: "blk.{bid}.ffn_act_alpha_n",
784-
MODEL_TENSOR.FFN_ACT_ALPHA_P: "blk.{bid}.ffn_act_alpha_p",
785-
MODEL_TENSOR.FFN_ACT_BETA: "blk.{bid}.ffn_act_beta",
786-
MODEL_TENSOR.FFN_ACT_EPS: "blk.{bid}.ffn_act_eps",
787790
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
788791
MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
789792
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
790793
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
791794
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
792795
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
793-
MODEL_TENSOR.FFN_ACT_ALPHA_N: "blk.{bid}.ffn_act_alpha_n",
794-
MODEL_TENSOR.FFN_ACT_ALPHA_P: "blk.{bid}.ffn_act_alpha_p",
795-
MODEL_TENSOR.FFN_ACT_BETA: "blk.{bid}.ffn_act_beta",
796-
MODEL_TENSOR.FFN_ACT_EPS: "blk.{bid}.ffn_act_eps",
797796
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
798797
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: "per_layer_token_embd", # gemma3n
799798
MODEL_TENSOR.PER_LAYER_MODEL_PROJ: "per_layer_model_proj", # gemma3n
@@ -2715,10 +2714,6 @@ class MODEL_TENSOR(IntEnum):
27152714
MODEL_TENSOR.FFN_GATE,
27162715
MODEL_TENSOR.FFN_DOWN,
27172716
MODEL_TENSOR.FFN_UP,
2718-
MODEL_TENSOR.FFN_ACT_ALPHA_N,
2719-
MODEL_TENSOR.FFN_ACT_ALPHA_P,
2720-
MODEL_TENSOR.FFN_ACT_BETA,
2721-
MODEL_TENSOR.FFN_ACT_EPS,
27222717
],
27232718
# TODO
27242719
}

gguf-py/gguf/gguf_writer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,18 @@ def add_audio_num_mel_bins(self, value: int) -> None:
10511051
def add_audio_stack_factor(self, value: int) -> None:
10521052
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
10531053

1054+
def add_xielu_alpha_p(self, value: Sequence[float]):
1055+
self.add_array(Keys.xIELU.XIELU_ALPHA_P, value)
1056+
1057+
def add_xielu_alpha_n(self, value: Sequence[float]):
1058+
self.add_array(Keys.xIELU.XIELU_ALPHA_N, value)
1059+
1060+
def add_xielu_beta(self, value: Sequence[float]):
1061+
self.add_array(Keys.xIELU.XIELU_BETA, value)
1062+
1063+
def add_xielu_eps(self, value: Sequence[float]):
1064+
self.add_array(Keys.xIELU.XIELU_EPS, value)
1065+
10541066
# diffusion models
10551067

10561068
def add_diffusion_shift_logits(self, value: bool) -> None:

src/llama-arch.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,11 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
244244
{ LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, "adapter.lora.prompt_prefix" },
245245
{ LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
246246

247+
{ LLM_KV_XIELU_ALPHA_N, "xielu.alpha_n" },
248+
{ LLM_KV_XIELU_ALPHA_P, "xielu.alpha_p" },
249+
{ LLM_KV_XIELU_BETA, "xielu.beta" },
250+
{ LLM_KV_XIELU_EPS, "xielu.eps" },
251+
247252
// deprecated
248253
{ LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
249254
{ LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
@@ -2119,10 +2124,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
21192124
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
21202125
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
21212126
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2122-
{ LLM_TENSOR_FFN_ACT_ALPHA_N, "blk.%d.ffn_act_alpha_n" },
2123-
{ LLM_TENSOR_FFN_ACT_ALPHA_P, "blk.%d.ffn_act_alpha_p" },
2124-
{ LLM_TENSOR_FFN_ACT_BETA, "blk.%d.ffn_act_beta" },
2125-
{ LLM_TENSOR_FFN_ACT_EPS, "blk.%d.ffn_act_eps" },
21262127
},
21272128
},
21282129
{
@@ -2308,10 +2309,6 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
23082309
{LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
23092310
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
23102311
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2311-
{LLM_TENSOR_FFN_ACT_ALPHA_N, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2312-
{LLM_TENSOR_FFN_ACT_ALPHA_P, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2313-
{LLM_TENSOR_FFN_ACT_BETA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2314-
{LLM_TENSOR_FFN_ACT_EPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
23152312
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
23162313
// altup / laurel (gemma 3n)
23172314
{LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},

src/llama-arch.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ enum llm_kv {
248248

249249
LLM_KV_SHORTCONV_L_CACHE,
250250

251+
LLM_KV_XIELU_ALPHA_N,
252+
LLM_KV_XIELU_ALPHA_P,
253+
LLM_KV_XIELU_BETA,
254+
LLM_KV_XIELU_EPS,
255+
251256
// deprecated:
252257
LLM_KV_TOKENIZER_PREFIX_ID,
253258
LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -294,10 +299,6 @@ enum llm_tensor {
294299
LLM_TENSOR_FFN_GATE_SHEXP,
295300
LLM_TENSOR_FFN_UP_SHEXP,
296301
LLM_TENSOR_FFN_EXP_PROBS_B,
297-
LLM_TENSOR_FFN_ACT_ALPHA_N,
298-
LLM_TENSOR_FFN_ACT_ALPHA_P,
299-
LLM_TENSOR_FFN_ACT_BETA,
300-
LLM_TENSOR_FFN_ACT_EPS,
301302
LLM_TENSOR_ATTN_Q_NORM,
302303
LLM_TENSOR_ATTN_K_NORM,
303304
LLM_TENSOR_LAYER_OUT_NORM,

src/llama-hparams.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ struct llama_hparams {
156156
uint32_t laurel_rank = 64;
157157
uint32_t n_embd_altup = 256;
158158

159+
// xIELU
160+
std::array<float, LLAMA_MAX_LAYERS> xielu_alpha_n;
161+
std::array<float, LLAMA_MAX_LAYERS> xielu_alpha_p;
162+
std::array<float, LLAMA_MAX_LAYERS> xielu_beta;
163+
std::array<float, LLAMA_MAX_LAYERS> xielu_eps;
164+
159165
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
160166
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
161167
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;

src/llama-model-loader.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ namespace GGUFMeta {
465465
// TODO: this is not very clever - figure out something better
466466
template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv kid, std::array<int, 4> & result, uint32_t n, bool required);
467467
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
468+
template bool llama_model_loader::get_key_or_arr<std::array<float, 512>>(enum llm_kv kid, std::array<float, 512> & result, uint32_t n, bool required);
469+
468470

469471
llama_model_loader::llama_model_loader(
470472
const std::string & fname,

src/llama-model.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -508,9 +508,13 @@ void llama_model::load_hparams(llama_model_loader & ml) {
508508
llm_arch_is_recurrent(ml.get_arch()));
509509

510510
std::fill(hparams.rope_sections.begin(), hparams.rope_sections.end(), 0);
511-
512511
std::fill(hparams.swa_layers.begin(), hparams.swa_layers.end(), 0);
513512

513+
std::fill(hparams.xielu_alpha_n.begin(), hparams.xielu_alpha_n.end(), 0);
514+
std::fill(hparams.xielu_alpha_p.begin(), hparams.xielu_alpha_p.end(), 0);
515+
std::fill(hparams.xielu_beta.begin(), hparams.xielu_beta.end(), 0);
516+
std::fill(hparams.xielu_eps.begin(), hparams.xielu_eps.end(), 0);
517+
514518
ml.get_key_or_arr(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, hparams.n_layer, false);
515519
ml.get_key_or_arr(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, hparams.n_layer, false);
516520

@@ -1948,7 +1952,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
19481952
case LLM_ARCH_APERTUS:
19491953
{
19501954
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1951-
hparams.n_ctx_orig_yarn = 8192;
1955+
ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_N, hparams.xielu_alpha_n, hparams.n_layer);
1956+
ml.get_key_or_arr(LLM_KV_XIELU_ALPHA_P, hparams.xielu_alpha_p, hparams.n_layer);
1957+
ml.get_key_or_arr(LLM_KV_XIELU_BETA, hparams.xielu_beta, hparams.n_layer);
1958+
ml.get_key_or_arr(LLM_KV_XIELU_EPS, hparams.xielu_eps, hparams.n_layer);
1959+
19521960
switch (hparams.n_layer) {
19531961
case 32: type = LLM_TYPE_8B; break;
19541962
default: type = LLM_TYPE_UNKNOWN;
@@ -5769,12 +5777,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
57695777
layer.attn_q_norm_b = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "bias", i), { n_embd_head_k }, TENSOR_NOT_REQUIRED);
57705778
layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), { n_embd_head_k }, 0);
57715779
layer.attn_k_norm_b = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "bias", i), { n_embd_head_k }, TENSOR_NOT_REQUIRED);
5772-
5773-
// xIELU parameters for Apertus
5774-
layer.ffn_act_alpha_n = create_tensor(tn(LLM_TENSOR_FFN_ACT_ALPHA_N, i), { 1 }, 0);
5775-
layer.ffn_act_alpha_p = create_tensor(tn(LLM_TENSOR_FFN_ACT_ALPHA_P, i), { 1 }, 0);
5776-
layer.ffn_act_beta = create_tensor(tn(LLM_TENSOR_FFN_ACT_BETA, i), { 1 }, 0);
5777-
layer.ffn_act_eps = create_tensor(tn(LLM_TENSOR_FFN_ACT_EPS, i), { 1 }, 0);
57785780
}
57795781
} break;
57805782
default:
@@ -18727,17 +18729,10 @@ struct llm_build_apertus : public llm_graph_context {
1872718729
ggml_tensor * up = build_lora_mm(model.layers[il].ffn_up, cur);
1872818730
cb(up, "ffn_up", il);
1872918731

18730-
// xIELU activation
18731-
// Get the xIELU parameters from the model layers
18732-
ggml_tensor * alpha_n = model.layers[il].ffn_act_alpha_n;
18733-
ggml_tensor * alpha_p = model.layers[il].ffn_act_alpha_p;
18734-
ggml_tensor * beta = model.layers[il].ffn_act_beta;
18735-
ggml_tensor * eps = model.layers[il].ffn_act_eps;
18736-
18737-
float alpha_n_val = get_scalar_f32_val(alpha_n);
18738-
float alpha_p_val = get_scalar_f32_val(alpha_p);
18739-
float beta_val = get_scalar_f32_val(beta);
18740-
float eps_val = get_scalar_f32_val(eps);
18732+
float alpha_n_val = hparams.xielu_alpha_n[il];
18733+
float alpha_p_val = hparams.xielu_alpha_p[il];
18734+
float beta_val = hparams.xielu_beta[il];
18735+
float eps_val = hparams.xielu_eps[il];
1874118736

1874218737
// Apply xIELU activation
1874318738
ggml_tensor * activated = ggml_xielu(ctx0, up, alpha_n_val, alpha_p_val, beta_val, eps_val);

0 commit comments

Comments
 (0)