Skip to content

Commit 3a5fc48

Browse files
rkazantsmvafingithub-actions[bot]nikita-savelyevv
authored
[OpenVINO] Support microsoft bitnet-b1.58-2B-4T (#1518)
* Support bitnet models * Apply style fixes * Fix conversion * Update optimum/exporters/openvino/model_configs.py * Fix patcher name * Add test * Fix style * Apply style fixes * Return test after merge * Apply suggestions from code review Co-authored-by: Nikita Savelyev <[email protected]> * Move model * Fix style * Update optimum/exporters/openvino/model_configs.py * Apply formatting Signed-off-by: Kazantsev, Roman <[email protected]> * Add tests * Temporarily added to test on openvino-nightly * Apply formatting Signed-off-by: Kazantsev, Roman <[email protected]> * Update tests/openvino/test_export.py * Adjust tests * Apply formatting Signed-off-by: Kazantsev, Roman <[email protected]> * Update .github/workflows/test_openvino.yml * Add smart patching for bitnet tests * Apply suggestions from code review * Update test_openvino.yml * Update .github/workflows/test_openvino.yml --------- Signed-off-by: Kazantsev, Roman <[email protected]> Co-authored-by: Maxim Vafin <[email protected]> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Nikita Savelyev <[email protected]>
1 parent 21a33fd commit 3a5fc48

File tree

6 files changed

+62
-0
lines changed

6 files changed

+62
-0
lines changed

docs/source/openvino/models.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Here is the list of the supported architectures :
2525
- Beit
2626
- Bert
2727
- BioGPT
28+
- Bitnet
2829
- BlenderBot
2930
- BlenderBotSmall
3031
- Bloom

optimum/exporters/openvino/__main__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,11 @@ def main_export(
273273
supported_quant_methods = ["gptq"]
274274
if is_openvino_version(">=", "2024.6.0"):
275275
supported_quant_methods.append("awq")
276+
if is_openvino_version(">=", "2025.4.0"):
277+
supported_quant_methods.append("bitnet")
276278
do_quant_patching = quant_method in supported_quant_methods
277279
do_gptq_patching = quant_method == "gptq"
280+
do_bitnet_patching = quant_method == "bitnet"
278281

279282
model_type = config.model_type
280283
if model_type not in TasksManager._SUPPORTED_MODEL_TYPE:
@@ -365,6 +368,22 @@ class StoreAttr(object):
365368
return model
366369

367370
GPTQQuantizer.post_init_model = post_init_model
371+
if do_bitnet_patching:
372+
from transformers.integrations.bitnet import AutoBitLinear
373+
374+
orig_load_hook = AutoBitLinear.load_hook
375+
376+
# rewrite load hook to save original weight
377+
def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
378+
if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype:
379+
self.original_weight = state_dict[prefix + "weight"]
380+
w_shape = self.original_weight.shape
381+
state_dict[prefix + "weight"] = torch.empty(
382+
(w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta"
383+
)
384+
return state_dict
385+
386+
AutoBitLinear.load_hook = bitnet_load_hook
368387
elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"):
369388
_loading_kwargs = {} if variant is None else {"variant": variant}
370389
if dtype == "auto" or dtype is None:
@@ -557,6 +576,8 @@ class StoreAttr(object):
557576
torch.cuda.is_available = orig_cuda_check
558577
if do_gptq_patching:
559578
GPTQQuantizer.post_init_model = orig_post_init_model
579+
if do_bitnet_patching:
580+
AutoBitLinear.load_hook = orig_load_hook
560581

561582

562583
def maybe_convert_tokenizers(library_name: str, output: Path, model=None, preprocessors=None, task=None):

optimum/exporters/openvino/model_configs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,19 @@ class GptOssOpenVINOConfig(LlamaOpenVINOConfig):
576576
MIN_TRANSFORMERS_VERSION = "4.55.1"
577577

578578

579+
@register_in_tasks_manager(
580+
"bitnet",
581+
*[
582+
"text-generation",
583+
"text-generation-with-past",
584+
],
585+
library_name="transformers",
586+
)
587+
class BitnetOpenVINOConfig(LlamaOnnxConfig):
588+
MIN_TRANSFORMERS_VERSION = "4.52.1"
589+
_MODEL_PATCHER = OVDecoderModelPatcher
590+
591+
579592
@register_in_tasks_manager(
580593
"exaone",
581594
*[

tests/openvino/test_decoder.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
120120
if is_transformers_version(">=", "4.53.0"):
121121
SUPPORTED_ARCHITECTURES += ("arcee",)
122122

123+
if is_transformers_version(">=", "4.52.1") and is_openvino_version(">=", "2025.4.0"):
124+
SUPPORTED_ARCHITECTURES += ("bitnet",)
125+
123126
if is_transformers_version(">=", "4.54.0"):
124127
# remote code models differs after transformers v4.54
125128
SUPPORTED_ARCHITECTURES = tuple(set(SUPPORTED_ARCHITECTURES) - {"minicpm", "minicpm3", "arctic", "deepseek"})
@@ -218,11 +221,21 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
218221
"gpt_oss": 2 if is_openvino_version(">=", "2025.4") else 0,
219222
"gpt_oss_mxfp4": 2 if is_openvino_version(">=", "2025.4") else 0,
220223
"zamba2": 1,
224+
"bitnet": 6,
221225
}
222226

227+
def mock_torch_compile(self, model_arch):
228+
if model_arch == "bitnet":
229+
# mock torch.compile to avoid compilation errors in tests
230+
original_torch_compile = torch.compile
231+
torch.compile = lambda func: func
232+
# ensure restoration happens even if test fails
233+
self.addCleanup(lambda: setattr(torch, "compile", original_torch_compile))
234+
223235
# TODO: remove gptq/awq from here
224236
@parameterized.expand(SUPPORTED_ARCHITECTURES)
225237
def test_compare_to_transformers(self, model_arch):
238+
self.mock_torch_compile(model_arch)
226239
model_id = MODEL_NAMES[model_arch]
227240

228241
not_stateful = []
@@ -377,6 +390,7 @@ def test_compare_to_transformers(self, model_arch):
377390
@pytest.mark.run_slow
378391
@slow
379392
def test_pipeline(self, model_arch):
393+
self.mock_torch_compile(model_arch)
380394
set_seed(SEED)
381395
model_kwargs = {}
382396
model_id = MODEL_NAMES[model_arch]
@@ -562,6 +576,7 @@ def test_default_filling_attention_mask_and_position_ids(self):
562576
@pytest.mark.run_slow
563577
@slow
564578
def test_beam_search(self, model_arch):
579+
self.mock_torch_compile(model_arch)
565580
model_kwargs = {}
566581
model_id = MODEL_NAMES[model_arch]
567582
if model_arch in self.REMOTE_CODE_MODELS:

tests/openvino/test_exporters_cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ class OVCLIExportTestCase(unittest.TestCase):
111111
("text-generation-with-past", "zamba2"),
112112
]
113113
)
114+
115+
if is_transformers_version(">=", "4.52.1") and is_openvino_version(">=", "2025.4.0"):
116+
SUPPORTED_ARCHITECTURES.extend(
117+
[
118+
("text-generation-with-past", "bitnet"),
119+
]
120+
)
121+
114122
EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
115123
"gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
116124
"t5": 0 if is_openvino_version("<", "2025.1") else 2, # 2025.1 brings support for unigram tokenizers
@@ -136,6 +144,7 @@ class OVCLIExportTestCase(unittest.TestCase):
136144
"falcon-mamba": 2,
137145
"qwen3": 2,
138146
"zamba2": 2,
147+
"bitnet": 2,
139148
}
140149

141150
TOKENIZER_CHAT_TEMPLATE_TESTS_MODELS = {
@@ -966,6 +975,8 @@ def test_exporters_cli_fp16(self, task: str, model_type: str):
966975

967976
@parameterized.expand(SUPPORTED_ARCHITECTURES)
968977
def test_exporters_cli_int8(self, task: str, model_type: str):
978+
if model_type in ["bitnet"]:
979+
self.skipTest("CVS-176501 INT8 compression fails for BitNet; need to compress remaining BF16 weights")
969980
with TemporaryDirectory() as tmpdir:
970981
add_ops = ""
971982
if task == "text-to-audio" and model_type == "speecht5":

tests/openvino/utils_tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"baichuan2-13b": "optimum-intel-internal-testing/tiny-random-baichuan2-13b",
4949
"bigbird_pegasus": "optimum-intel-internal-testing/tiny-random-bigbird_pegasus",
5050
"biogpt": "optimum-intel-internal-testing/tiny-random-BioGptForCausalLM",
51+
"bitnet": "optimum-intel-internal-testing/tiny-random-bitnet",
5152
"blenderbot-small": "optimum-intel-internal-testing/tiny-random-BlenderbotModel",
5253
"blenderbot": "optimum-intel-internal-testing/tiny-random-BlenderbotModel",
5354
"bloom": "optimum-intel-internal-testing/tiny-random-BloomModel",

0 commit comments

Comments
 (0)