Skip to content

Commit 9a048d8

Browse files
flake8 fixes
1 parent adff470 commit 9a048d8

File tree

2 files changed

+9
-19
lines changed

2 files changed

+9
-19
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -686,14 +686,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
686686
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
687687
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
688688
res = "falcon3"
689-
if (
690-
chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86" or
691-
chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896" or
692-
chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b" or
693-
chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"
694-
):
695-
# ref: https://huggingface.co/collections/tiiuae/falcon-h1-6819f2795bc406da60fab8df
696-
res = "falcon_h1"
697689
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
698690
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
699691
res = "bert-bge-large"
@@ -6608,7 +6600,7 @@ def _generate_mup_vector(self, block_id: int) -> torch.Tensor:
66086600

66096601
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
66106602
tensors = list(super().modify_tensors(data_torch, name, bid))
6611-
tensor = tensors[0][1]
6603+
tensor = tensors[0][1]
66126604

66136605
if "down_proj" in name:
66146606
tensor = tensor * self.mlp_multipliers[1]
@@ -6669,9 +6661,7 @@ def set_gguf_parameters(self):
66696661
## Validation ##
66706662
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
66716663
assert self.d_inner % d_head == 0, f"SSM inner size {self.d_inner} not a multiple of head dim {d_head}"
6672-
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or
6673-
self.find_hparam(["num_attention_heads"]))
6674-
6664+
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or self.find_hparam(["num_attention_heads"]))
66756665

66766666
# Add any other Falcon Mamba2 specific configuration
66776667
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))

gguf-py/gguf/tensor_mapping.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class TensorNameMap:
293293
MODEL_TENSOR.FFN_POST_NORM: (
294294
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
295295
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
296-
"model.layers.{bid}.feed_forward.up_proj",
296+
"model.layers.{bid}.feed_forward.up_proj",
297297
),
298298

299299
MODEL_TENSOR.FFN_GATE_INP: (
@@ -364,7 +364,7 @@ class TensorNameMap:
364364
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
365365
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
366366
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
367-
"model.layers.{bid}.feed_forward.down_proj",
367+
"model.layers.{bid}.feed_forward.down_proj",
368368
),
369369

370370
# AWQ-activation gate
@@ -550,13 +550,13 @@ class TensorNameMap:
550550
MODEL_TENSOR.SSM_IN: (
551551
"model.layers.{bid}.in_proj",
552552
"backbone.layers.{bid}.mixer.in_proj",
553-
"model.layers.{bid}.mamba.in_proj",
553+
"model.layers.{bid}.mamba.in_proj",
554554
),
555555

556556
MODEL_TENSOR.SSM_CONV1D: (
557557
"model.layers.{bid}.conv1d",
558558
"backbone.layers.{bid}.mixer.conv1d",
559-
"model.layers.{bid}.mamba.conv1d",
559+
"model.layers.{bid}.mamba.conv1d",
560560
),
561561

562562
MODEL_TENSOR.SSM_X: (
@@ -567,13 +567,13 @@ class TensorNameMap:
567567
MODEL_TENSOR.SSM_DT: (
568568
"model.layers.{bid}.dt_proj",
569569
"backbone.layers.{bid}.mixer.dt_proj",
570-
"model.layers.{bid}.mamba.dt_proj",
570+
"model.layers.{bid}.mamba.dt_proj",
571571
),
572572

573573
MODEL_TENSOR.SSM_A: (
574574
"model.layers.{bid}.A_log",
575575
"backbone.layers.{bid}.mixer.A_log",
576-
"model.layers.{bid}.mamba.A_log",
576+
"model.layers.{bid}.mamba.A_log",
577577
),
578578

579579
MODEL_TENSOR.SSM_D: (
@@ -1177,7 +1177,7 @@ class TensorNameMap:
11771177
MODEL_TENSOR.V_RESMPL_ATTN_OUT: (
11781178
"resampler.attn.out_proj",
11791179
),
1180-
1180+
11811181
MODEL_TENSOR.V_RESMPL_KV: (
11821182
"resampler.kv_proj",
11831183
),

0 commit comments

Comments
 (0)