Skip to content

Commit 656759a

Browse files
committed
convert ok
1 parent 6f67cf1 commit 656759a

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,7 +1899,10 @@ def prepare_tensors(self):
18991899
raise ValueError(f"Unprocessed experts: {experts}")
19001900

19011901

1902-
@ModelBase.register("LlavaForConditionalGeneration")
1902+
@ModelBase.register(
1903+
"LlavaForConditionalGeneration", # pixtral
1904+
"Mistral3ForConditionalGeneration", # mistral small 3.1
1905+
)
19031906
class LlavaVisionModel(VisionModel):
19041907
img_break_tok_id = -1
19051908

@@ -1908,10 +1911,20 @@ def __init__(self, *args, **kwargs):
19081911
if self.hparams["model_type"] == "pixtral":
19091912
# layer_norm_eps is not in config.json, it is hard-coded in modeling_pixtral.py
19101913
self.hparams["layer_norm_eps"] = self.hparams.get("layer_norm_eps", 1e-5)
1911-
self.img_break_tok_id = 12 # see tokenizer_config.json
1914+
self.img_break_tok_id = self.get_token_id("[IMG_BREAK]")
1915+
logger.info(f"Image break token id: {self.img_break_tok_id}")
19121916
else:
19131917
raise ValueError(f"Unsupported model type: {self.hparams['model_type']}")
19141918

1919+
def get_token_id(self, token: str) -> int:
1920+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
1921+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
1922+
added_tokens_decoder = json.load(f)['added_tokens_decoder']
1923+
for id_, token_data in added_tokens_decoder.items():
1924+
if token_data["content"] == token:
1925+
return int(id_)
1926+
raise ValueError(f"Token '{token}' not found in tokenizer config.")
1927+
19151928
def set_gguf_parameters(self):
19161929
super().set_gguf_parameters()
19171930
hparams = self.hparams

examples/llava/clip-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@
6868
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
6969
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
7070
#define TN_IMAGE_NEWLINE "model.image_newline"
71+
#define TN_MM_INP_NORM "mm.input_norm.weight"
7172
#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3
7273
#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3
7374
#define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3
75+
#define TN_MM_PATCH_MERGER "mm.patch_merger.weight" // mistral small 3.1
7476
#define TN_TOK_IMG_BREAK "v.token_embd.img_break" // pixtral
7577

7678
// mimicpmv

gguf-py/gguf/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ class MODEL_TENSOR(IntEnum):
491491
V_ENC_FFN_DOWN = auto()
492492
V_PRE_NORM = auto()
493493
V_POST_NORM = auto()
494+
V_MM_INP_NORM = auto()
494495
V_MM_INP_PROJ = auto() # gemma3
495496
V_MM_SOFT_EMB_NORM = auto() # gemma3
496497
V_RESMPL_POS_EMBD_K = auto() # minicpmv
@@ -505,6 +506,7 @@ class MODEL_TENSOR(IntEnum):
505506
V_RESMPL_PROJ = auto() # minicpmv
506507
V_RESMPL_QUERY = auto() # minicpmv
507508
V_TOK_EMBD_IMG_BREAK = auto() # pixtral
509+
V_MM_PATCH_MERGER = auto() # mistral small 3.1
508510

509511

510512
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -747,6 +749,7 @@ class MODEL_TENSOR(IntEnum):
747749
MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
748750
MODEL_TENSOR.V_POST_NORM: "v.post_ln",
749751
MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
752+
MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
750753
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm",
751754
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
752755
MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
@@ -760,6 +763,7 @@ class MODEL_TENSOR(IntEnum):
760763
MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
761764
MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
762765
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
766+
MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
763767
}
764768

765769
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -783,6 +787,7 @@ class MODEL_TENSOR(IntEnum):
783787
MODEL_TENSOR.V_PRE_NORM,
784788
MODEL_TENSOR.V_POST_NORM,
785789
MODEL_TENSOR.V_MM_INP_PROJ,
790+
MODEL_TENSOR.V_MM_INP_NORM,
786791
MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
787792
MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
788793
MODEL_TENSOR.V_RESMPL_ATTN_Q,
@@ -796,6 +801,7 @@ class MODEL_TENSOR(IntEnum):
796801
MODEL_TENSOR.V_RESMPL_PROJ,
797802
MODEL_TENSOR.V_RESMPL_QUERY,
798803
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
804+
MODEL_TENSOR.V_MM_PATCH_MERGER,
799805
],
800806
MODEL_ARCH.LLAMA: [
801807
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,10 @@ class TensorNameMap:
10011001
"multi_modal_projector.mm_input_projection",
10021002
),
10031003

1004+
MODEL_TENSOR.V_MM_INP_NORM: (
1005+
"multi_modal_projector.norm",
1006+
),
1007+
10041008
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: (
10051009
"multi_modal_projector.mm_soft_emb_norm",
10061010
),
@@ -1052,6 +1056,10 @@ class TensorNameMap:
10521056
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: (
10531057
"v.token_embd.img_break", # for pixtral, this is a generated vector
10541058
),
1059+
1060+
MODEL_TENSOR.V_MM_PATCH_MERGER: (
1061+
"multi_modal_projector.patch_merger.merging_layer", # mistral small 3.1
1062+
),
10551063
}
10561064

10571065
# architecture-specific block mappings

0 commit comments

Comments
 (0)