Skip to content

Commit 4d19698

Browse files
authored
convert : force patch_embd weights to F16 or F32 to avoid broken GGUFs (#15367)
* force patch_embd weights to f32 * use MmprojModel base tensor_force_quant instead
1 parent b143fbc commit 4d19698

File tree

1 file changed

+12
-16
lines changed

1 file changed

+12
-16
lines changed

convert_hf_to_gguf.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,12 @@ def _find_param(self, obj: dict[str, Any], keys: Iterable[str], optional: bool =
13341334
return None
13351335
raise KeyError(f"could not find any of: {keys}")
13361336

1337+
def tensor_force_quant(self, name, new_name, bid, n_dims):
1338+
del bid, name, n_dims # unused
1339+
if ".patch_embd.weight" in new_name:
1340+
return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32
1341+
return False
1342+
13371343

13381344
@ModelBase.register("GPTNeoXForCausalLM")
13391345
class GPTNeoXModel(TextModel):
@@ -2305,10 +2311,9 @@ def set_gguf_parameters(self):
23052311
self.gguf_writer.add_vision_use_gelu(True)
23062312

23072313
def tensor_force_quant(self, name, new_name, bid, n_dims):
2308-
del bid, new_name, n_dims # unused
23092314
if ".embeddings." in name:
23102315
return gguf.GGMLQuantizationType.F32
2311-
return False
2316+
return super().tensor_force_quant(name, new_name, bid, n_dims)
23122317

23132318
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
23142319
del bid # unused
@@ -3296,12 +3301,9 @@ def set_gguf_parameters(self):
32963301
self.gguf_writer.add_vision_attention_layernorm_eps(self.global_config.get("rms_norm_eps", 1e-6))
32973302

32983303
def tensor_force_quant(self, name, new_name, bid, n_dims):
3299-
del bid, name, n_dims # unused
3300-
if ".patch_embd." in new_name:
3301-
return gguf.GGMLQuantizationType.F16
33023304
if ".position_embd." in new_name:
33033305
return gguf.GGMLQuantizationType.F32
3304-
return False
3306+
return super().tensor_force_quant(name, new_name, bid, n_dims)
33053307

33063308
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
33073309
del bid # unused
@@ -3374,10 +3376,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
33743376
yield ("audio_tower.embed_positions.weight", pos_embd)
33753377

33763378
def tensor_force_quant(self, name, new_name, bid, n_dims):
3377-
del bid, new_name, n_dims # unused
33783379
if ".conv" in name and ".weight" in name:
33793380
return gguf.GGMLQuantizationType.F16
3380-
return False
3381+
return super().tensor_force_quant(name, new_name, bid, n_dims)
33813382

33823383
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
33833384
if name.startswith("thinker."):
@@ -3423,12 +3424,9 @@ def set_gguf_parameters(self):
34233424
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / downsample_ratio))
34243425

34253426
def tensor_force_quant(self, name, new_name, bid, n_dims):
3426-
del bid, name, n_dims # unused
3427-
if ".patch_embd." in new_name:
3428-
return gguf.GGMLQuantizationType.F16
34293427
if ".position_embd." in new_name:
34303428
return gguf.GGMLQuantizationType.F32
3431-
return False
3429+
return super().tensor_force_quant(name, new_name, bid, n_dims)
34323430

34333431
def _mapping_interns1_name(self, name):
34343432
names_map = {
@@ -5062,13 +5060,12 @@ def set_gguf_parameters(self):
50625060
self.gguf_writer.add_vision_projector_scale_factor(proj_scale_factor)
50635061

50645062
def tensor_force_quant(self, name, new_name, bid, n_dims):
5065-
del bid, new_name, n_dims # unused
50665063
# related to https://github.com/ggml-org/llama.cpp/issues/13025
50675064
if "input_projection" in name:
50685065
return gguf.GGMLQuantizationType.F16
50695066
if ".embeddings." in name:
50705067
return gguf.GGMLQuantizationType.F32
5071-
return False
5068+
return super().tensor_force_quant(name, new_name, bid, n_dims)
50725069

50735070
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
50745071
del bid # unused
@@ -7727,10 +7724,9 @@ def set_gguf_parameters(self):
77277724
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
77287725

77297726
def tensor_force_quant(self, name, new_name, bid, n_dims):
7730-
del bid, new_name, n_dims # unused
77317727
if ".conv" in name and ".weight" in name:
77327728
return gguf.GGMLQuantizationType.F16
7733-
return False
7729+
return super().tensor_force_quant(name, new_name, bid, n_dims)
77347730

77357731
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
77367732
del bid # unused

0 commit comments

Comments
 (0)