Skip to content

Commit 571a45d

Browse files
committed
convertible to gguf
1 parent 13c9a33 commit 571a45d

File tree

4 files changed

+59
-3
lines changed

4 files changed

+59
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
419419
@staticmethod
420420
def load_hparams(dir_model: Path):
421421
try:
422-
return AutoConfig.from_pretrained(dir_model).to_dict()
422+
# for security reason, we don't allow loading remote code by default
423+
# if a model need remote code, we will fallback to config.json
424+
return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
423425
except Exception as e:
424426
logger.warning(f"Failed to load model config from {dir_model}: {e}")
425427
logger.warning("Trying to load config.json instead")
@@ -1739,7 +1741,8 @@ def prepare_tensors(self):
17391741
"MistralForCausalLM",
17401742
"MixtralForCausalLM",
17411743
"VLlama3ForCausalLM",
1742-
"LlavaForConditionalGeneration")
1744+
"LlavaForConditionalGeneration",
1745+
)
17431746
class LlamaModel(TextModel):
17441747
model_arch = gguf.MODEL_ARCH.LLAMA
17451748
undo_permute = True
@@ -2595,6 +2598,32 @@ def set_gguf_parameters(self):
25952598
self.gguf_writer.add_causal_attention(False)
25962599

25972600

2601+
@ModelBase.register("MiMoForCausalLM")
2602+
class MimoModel(Qwen2Model):
2603+
model_arch = gguf.MODEL_ARCH.QWEN2
2604+
n_multi_token_predict: int
2605+
n_layers_no_mtp: int
2606+
2607+
def __init__(self, *args, **kwargs):
2608+
super().__init__(*args, **kwargs)
2609+
self.n_multi_token_predict = self.hparams["num_nextn_predict_layers"]
2610+
self.n_layers_no_mtp = self.block_count
2611+
self.block_count = self.block_count + self.n_multi_token_predict
2612+
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
2613+
2614+
def set_gguf_parameters(self):
2615+
super().set_gguf_parameters()
2616+
print(self.hparams)
2617+
self.gguf_writer.add_n_multi_token_predict(self.hparams["num_nextn_predict_layers"])
2618+
2619+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2620+
if "mtp_layers" in name and bid is not None:
2621+
name = name.replace(".mtp_layers", ".layers")
2622+
for i in range(self.n_multi_token_predict):
2623+
name = name.replace(f"layers.{i}.", f"layers.{self.n_layers_no_mtp + i}.")
2624+
return super().modify_tensors(data_torch, name, bid)
2625+
2626+
25982627
@ModelBase.register("Qwen2MoeForCausalLM")
25992628
class Qwen2MoeModel(TextModel):
26002629
model_arch = gguf.MODEL_ARCH.QWEN2MOE

gguf-py/gguf/constants.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class LLM:
118118
EMBEDDING_SCALE = "{arch}.embedding_scale"
119119
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
120120
INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
121+
N_MULTI_TOKEN_PREDICT = "{arch}.n_multi_token_predict"
121122

122123
class Attention:
123124
HEAD_COUNT = "{arch}.attention.head_count"
@@ -373,6 +374,9 @@ class MODEL_TENSOR(IntEnum):
373374
ATTN_Q_NORM = auto()
374375
ATTN_K_NORM = auto()
375376
LAYER_OUT_NORM = auto()
377+
MTP_INP_PROJ = auto()
378+
MTP_TOKEN_NORM = auto() # token_layernorm
379+
MTP_HIDDEN_NORM = auto() # hidden_layernorm
376380
SSM_IN = auto()
377381
SSM_CONV1D = auto()
378382
SSM_X = auto()
@@ -628,6 +632,9 @@ class MODEL_TENSOR(IntEnum):
628632
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
629633
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
630634
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
635+
MODEL_TENSOR.MTP_INP_PROJ: "blk.{bid}.mtp_inp_proj",
636+
MODEL_TENSOR.MTP_TOKEN_NORM: "blk.{bid}.mtp_token_norm",
637+
MODEL_TENSOR.MTP_HIDDEN_NORM: "blk.{bid}.mtp_hidden_norm",
631638
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
632639
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
633640
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
@@ -1095,6 +1102,10 @@ class MODEL_TENSOR(IntEnum):
10951102
MODEL_TENSOR.FFN_GATE,
10961103
MODEL_TENSOR.FFN_DOWN,
10971104
MODEL_TENSOR.FFN_UP,
1105+
MODEL_TENSOR.MTP_INP_PROJ, # xiaomi mimo
1106+
MODEL_TENSOR.MTP_HIDDEN_NORM, # xiaomi mimo
1107+
MODEL_TENSOR.MTP_TOKEN_NORM, # xiaomi mimo
1108+
MODEL_TENSOR.LAYER_OUT_NORM, # xiaomi mimo
10981109
],
10991110
MODEL_ARCH.QWEN2VL: [
11001111
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,9 @@ def add_remove_extra_whitespaces(self, value: bool) -> None:
899899
def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
900900
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
901901

902+
def add_n_multi_token_predict(self, value: int) -> None:
903+
self.add_uint32(Keys.LLM.N_MULTI_TOKEN_PREDICT.format(arch=self.arch), value)
904+
902905
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
903906
if not isinstance(value, str):
904907
template_default = None

gguf-py/gguf/tensor_mapping.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,20 @@ class TensorNameMap:
457457
"encoder.layers.{bid}.norm2", # nomic-bert
458458
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
459459
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
460-
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
460+
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
461+
"model.layers.{bid}.final_layernorm", # xiaomi mimo
462+
),
463+
464+
MODEL_TENSOR.MTP_INP_PROJ: (
465+
"model.layers.{bid}.input_proj.weight", # xiaomi mimo
466+
),
467+
468+
MODEL_TENSOR.MTP_TOKEN_NORM: (
469+
"model.layers.{bid}.token_layernorm.weight", # xiaomi mimo
470+
),
471+
472+
MODEL_TENSOR.MTP_HIDDEN_NORM: (
473+
"model.layers.{bid}.hidden_layernorm.weight", # xiaomi mimo
461474
),
462475

463476
MODEL_TENSOR.SSM_IN: (

0 commit comments

Comments
 (0)