Skip to content

Commit b61de2b

Browse files
authored
convert : allow quantizing lora again (ggml-org#17453)
1 parent b8372ee commit b61de2b

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def prepare_tensors(self):
565565
gguf.MODEL_TENSOR.ALTUP_PREDICT_COEF,
566566
)
567567
)
568-
or not new_name.endswith(".weight")
568+
or new_name[-7:] not in (".weight", ".lora_a", ".lora_b")
569569
):
570570
data_qtype = gguf.GGMLQuantizationType.F32
571571

convert_lora_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def parse_args() -> argparse.Namespace:
242242
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
243243
)
244244
parser.add_argument(
245-
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
245+
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f32",
246246
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
247247
)
248248
parser.add_argument(

0 commit comments

Comments
 (0)