diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index a1fcf42d49..2304702fa7 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -272,8 +272,11 @@ def main_export( supported_quant_methods = ["gptq"] if is_openvino_version(">=", "2024.6.0"): supported_quant_methods.append("awq") + if is_openvino_version(">=", "2025.4.0"): + supported_quant_methods.append("bitnet") do_quant_patching = quant_method in supported_quant_methods do_gptq_patching = quant_method == "gptq" + do_bitnet_patching = quant_method == "bitnet" model_type = config.model_type if model_type not in TasksManager._SUPPORTED_MODEL_TYPE: @@ -364,6 +367,22 @@ class StoreAttr(object): return model GPTQQuantizer.post_init_model = post_init_model + if do_bitnet_patching: + from transformers.integrations.bitnet import AutoBitLinear + + orig_load_hook = AutoBitLinear.load_hook + + # rewrite load hook to save original weight + def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs): + if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype: + self.original_weight = state_dict[prefix + "weight"] + w_shape = self.original_weight.shape + state_dict[prefix + "weight"] = torch.empty( + (w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta" + ) + return state_dict + + AutoBitLinear.load_hook = bitnet_load_hook elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"): _loading_kwargs = {} if variant is None else {"variant": variant} if dtype == "auto" or dtype is None: @@ -548,6 +567,8 @@ class StoreAttr(object): torch.cuda.is_available = orig_cuda_check if do_gptq_patching: GPTQQuantizer.post_init_model = orig_post_init_model + if do_bitnet_patching: + AutoBitLinear.load_hook = orig_load_hook def maybe_convert_tokenizers(library_name: str, output: Path, model=None, preprocessors=None, task=None): diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index b8ffcdcf26..ba3f545c83 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -578,6 +578,21 @@ class GptOssOpenVINOConfig(LlamaOpenVINOConfig): MIN_TRANSFORMERS_VERSION = "4.55.1" +@register_in_tasks_manager( + "bitnet", + *[ + "text-generation", + "text-generation-with-past", + ], + library_name="transformers", +) +class BitnetOpenVINOConfig(LlamaOnnxConfig): + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs) + + @register_in_tasks_manager( "exaone", *[ diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 660f705457..581fc4cb97 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -25,10 +25,11 @@ from optimum.intel.pipelines import pipeline as optimum_pipeline from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version - if is_transformers_version(">=", "4.55"): from transformers import Mxfp4Config +torch.compile = lambda func: func # Mock torch.compile to avoid compilation errors in tests + class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -116,6 +117,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.53.0"): SUPPORTED_ARCHITECTURES += ("arcee",) + if is_openvino_version(">=", "2025.4.0"): + SUPPORTED_ARCHITECTURES += ("bitnet",) if is_transformers_version(">=", "4.54.0"): # remote code models differs after transformers v4.54 @@ -214,6 +217,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "arcee": 2, "gpt_oss": 2 if is_openvino_version(">=", "2025.4") else 0, "gpt_oss_mxfp4": 2 if is_openvino_version(">=", "2025.4") else 0, + "bitnet": 6, } # TODO: remove gptq/awq from here diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 603770b53c..59bfae94ec 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -48,6 +48,7 @@ "baichuan2-13b": "optimum-intel-internal-testing/tiny-random-baichuan2-13b", "bigbird_pegasus": "optimum-intel-internal-testing/tiny-random-bigbird_pegasus", "biogpt": "optimum-intel-internal-testing/tiny-random-BioGptForCausalLM", + "bitnet": "optimum-intel-internal-testing/tiny-random-bitnet", "blenderbot-small": "optimum-intel-internal-testing/tiny-random-BlenderbotModel", "blenderbot": "optimum-intel-internal-testing/tiny-random-BlenderbotModel", "bloom": "optimum-intel-internal-testing/tiny-random-BloomModel",