Add support for microsoft/bitnet-b1.58-2B-4T (HF to GGUF).

zoq · zoq · commit cb8128c39183 · 2025-10-16T16:27:27.000-04:00
Signed-off-by: Marcus Edel &lt;marcus.edel@collabora.com&gt;
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2641,18 +2641,47 @@ def prepare_tensors(self):
         super().prepare_tensors()
 
 
-@ModelBase.register("BitnetForCausalLM")
+@ModelBase.register("BitnetForCausalLM", "BitNetForCausalLM")
 class BitnetModel(TextModel):
     model_arch = gguf.MODEL_ARCH.BITNET
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._bitnet_weight_scales: dict[str, torch.Tensor] = {}
+
     def set_vocab(self):
-        self._set_vocab_sentencepiece()
+        if (self.dir_model / "tokenizer.model").is_file():
+            self._set_vocab_sentencepiece()
+        else:
+            self._set_vocab_gpt2()
 
     def set_gguf_parameters(self):
         super().set_gguf_parameters()
         self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
         self.gguf_writer.add_rope_scaling_factor(1.0)
 
+    @staticmethod
+    def _unpack_bitnet_weights(packed: torch.Tensor) -> torch.Tensor:
+        if packed.dtype != torch.uint8:
+            raise ValueError(f"Expected packed BitNet weights to be torch.uint8, got {packed.dtype}")
+
+        values_per_item = 4
+        rows = packed.shape[0]
+        rest = packed.shape[1:]
+
+        unpacked_chunks: list[torch.Tensor] = []
+        mapping = torch.tensor([-1.0, 0.0, 1.0, 0.0], dtype=torch.float32, device=packed.device)
+
+        for i in range(values_per_item):
+            chunk = (packed >> (2 * i)) & 0x03
+            chunk = mapping[chunk.long()].reshape((rows, *rest))
+            unpacked_chunks.append(chunk)
+
+        if not unpacked_chunks:
+            raise ValueError("Failed to unpack BitNet weights: no chunks produced")
+
+        return torch.cat(unpacked_chunks, dim=0)
+
     def weight_quant(self, weight: Tensor) -> Tensor:
         dtype = weight.dtype
         weight = weight.float()
@@ -2665,8 +2694,36 @@ def weight_quant(self, weight: Tensor) -> Tensor:
         return result.type(dtype)
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        if name.endswith(".weight_scale"):
+            weight_name = name[:-13] + ".weight"
+            mapped_weight_name = self.map_tensor_name(weight_name)
+            if isinstance(data_torch, LazyTorchTensor):
+                data_torch = LazyTorchTensor.to_eager(data_torch)
+
+            scale_tensor = data_torch.to(torch.float32)
+            self._bitnet_weight_scales[mapped_weight_name] = scale_tensor
+            return []
+
         new_name = self.map_tensor_name(name)
 
+        ternary_weight = False
+
+        if name.endswith(".weight"):
+            scale_tensor = self._bitnet_weight_scales.pop(new_name, None)
+            if scale_tensor is not None:
+                scale_tensor = scale_tensor.to(torch.float32)
+                if scale_tensor.numel() != 1:
+                    raise ValueError(f"Expected scalar weight_scale for '{name}', got shape {tuple(scale_tensor.shape)}")
+
+                if isinstance(data_torch, LazyTorchTensor):
+                    data_torch = LazyTorchTensor.to_eager(data_torch)
+
+                packed = data_torch.to(torch.uint8)
+                unpacked = self._unpack_bitnet_weights(packed)
+                scale_value = scale_tensor.reshape(-1)[0].item()
+                data_torch = unpacked * scale_value
+                ternary_weight = True
+
         if any(self.match_model_tensor_name(new_name, key, bid) for key in [
             gguf.MODEL_TENSOR.ATTN_Q,
             gguf.MODEL_TENSOR.ATTN_K,
@@ -2675,7 +2732,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
             gguf.MODEL_TENSOR.FFN_UP,
             gguf.MODEL_TENSOR.FFN_DOWN,
             gguf.MODEL_TENSOR.FFN_GATE,
-        ]):
+        ]) and not ternary_weight:
             # transform weight into 1/0/-1 (in fp32)
             data_torch = self.weight_quant(data_torch)
 
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
@@ -898,10 +898,12 @@ class TensorNameMap:
 
         MODEL_TENSOR.ATTN_SUB_NORM: (
             "model.layers.{bid}.self_attn.inner_attn_ln",  # bitnet
+            "model.layers.{bid}.self_attn.attn_sub_norm",  # microsoft-bitnet
         ),
 
         MODEL_TENSOR.FFN_SUB_NORM: (
             "model.layers.{bid}.mlp.ffn_layernorm",  # bitnet
+            "model.layers.{bid}.mlp.ffn_sub_norm",   # microsoft-bitnet
         ),
 
         MODEL_TENSOR.DEC_ATTN_NORM: (