From 6af5debb9ca6d6ef50ee67e1463aa8aba8aba9ea Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 27 May 2025 22:42:30 +0200 Subject: [PATCH 1/2] convert : fix tensor naming conflict for llama 4 vision --- convert_hf_to_gguf.py | 2 ++ gguf-py/gguf/tensor_mapping.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a015ecee08328..1785d471b4fc3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2169,6 +2169,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # process vision tensors if "positional_embedding_vlm" in name and ".weight" not in name: name += ".weight" + if "multi_modal_projector.linear_1" in name: + return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)] return [(self.map_tensor_name(name), data_torch)] return [] diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 000ffd00615b5..48167dd648c0a 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -902,7 +902,6 @@ class TensorNameMap: MODEL_TENSOR.V_MMPROJ_FC: ( "model.connector.modality_projection.proj", # SmolVLM - "multi_modal_projector.linear_1", # llama 4 ), MODEL_TENSOR.V_MMPROJ_MLP: ( From 6fd193324adb043b07dc6aee4c09232e58a4a14b Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 27 May 2025 22:48:45 +0200 Subject: [PATCH 2/2] add comment --- convert_hf_to_gguf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 1785d471b4fc3..7f935d091a70b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2170,6 +2170,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter if "positional_embedding_vlm" in name and ".weight" not in name: name += ".weight" if "multi_modal_projector.linear_1" in name: + # despite the name with number postfix, this is a single fully connected layer return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)] return [(self.map_tensor_name(name), data_torch)] return []