diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 0eacd3af283d5..3df9806f1d7ca 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3373,6 +3373,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._found_qkv_bias = False + self._use_qkv_bias_cfg = self.hparams.get("qkv_proj_bias", True) + + def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_context_length(self.hparams.get("n_positions", self.hparams.get("n_ctx", 0))) @@ -3411,11 +3414,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter def prepare_tensors(self): super().prepare_tensors() - if not self._found_qkv_bias: - if not self.hparams.get("qkv_proj_bias", True): - self.gguf_writer.add_use_qkv_bias(False) - return - self.gguf_writer.add_use_qkv_bias(True) + + if not self._found_qkv_bias and self._use_qkv_bias_cfg: n_embd = self.hparams["n_embd"] for bid in range(self.hparams["n_layer"]): bias = torch.zeros(3 * n_embd, dtype=torch.float32) @@ -3423,6 +3423,11 @@ def prepare_tensors(self): self.gguf_writer.add_tensor(name, bias.numpy()) + use_bias = self._found_qkv_bias or self._use_qkv_bias_cfg + self.gguf_writer.add_use_qkv_bias(use_bias) + + + @ModelBase.register("PhiForCausalLM") class Phi2Model(TextModel): model_arch = gguf.MODEL_ARCH.PHI2 diff --git a/gguf-py/tests/test_codegen_bias.py b/gguf-py/tests/test_codegen_bias.py index 9bb0507429403..4d15b16bba0b5 100644 --- a/gguf-py/tests/test_codegen_bias.py +++ b/gguf-py/tests/test_codegen_bias.py @@ -1,17 +1,19 @@ -import os + import json +import os import tempfile import unittest +from pathlib import Path import pytest + pytest.importorskip("torch") -import torch -import numpy as np -import gguf -from pathlib import Path +import torch # noqa: E402 +import gguf # noqa: E402 + +from convert_hf_to_gguf import CodeGenModel # noqa: E402 -from convert_hf_to_gguf import CodeGenModel class TestCodeGenBias(unittest.TestCase): def _create_model(self, dir_model: str, with_bias: bool): @@ -65,5 +67,7 @@ def test_bias_codegen(self): self.assertTrue(val) self.assertTrue(has_tensor) + + if __name__ == "__main__": unittest.main()