Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def prepare_tensors(self):
data_qtype: gguf.GGMLQuantizationType | bool = self.tensor_force_quant(name, new_name, bid, n_dims)

# Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
if n_dims <= 1 or new_name.endswith("_norm.weight"):
if n_dims <= 1 or new_name.endswith("_norm.weight") or ".patch_embd.weight" in new_name:
data_qtype = gguf.GGMLQuantizationType.F32

# Conditions should closely match those in llama_model_quantize_internal in llama.cpp
Expand Down