@@ -258,8 +258,11 @@ def main_export(
258258 supported_quant_methods = ["gptq" ]
259259 if is_openvino_version (">=" , "2024.6.0" ):
260260 supported_quant_methods .append ("awq" )
261+ if is_openvino_version (">=" , "2025.3.0" ):
262+ supported_quant_methods .append ("bitnet" )
261263 do_quant_patching = quantization_config and quantization_config ["quant_method" ] in supported_quant_methods
262264 do_gptq_patching = do_quant_patching and quantization_config ["quant_method" ] == "gptq"
265+ do_bitnet_patching = do_quant_patching and quantization_config ["quant_method" ] == "bitnet"
263266 model_type = config .model_type
264267 if model_type not in TasksManager ._SUPPORTED_MODEL_TYPE :
265268 custom_architecture = True
@@ -356,6 +359,21 @@ class StoreAttr(object):
356359 return model
357360
358361 GPTQQuantizer .post_init_model = post_init_model
362+ if do_bitnet_patching :
363+ from transformers .integrations .bitnet import AutoBitLinear , unpack_weights
364+ import functools
365+
366+ orig_load_hook = AutoBitLinear .load_hook
367+
368+ # rewrite load hook to save original weight
369+ @functools .wraps (orig_load_hook )
370+ def bitnet_load_hook (self , state_dict , prefix , * args , ** kwargs ):
371+ if (prefix + "weight" ) in state_dict and state_dict [prefix + "weight" ].dtype != self .weight .dtype :
372+ self .original_weight = state_dict [prefix + "weight" ]
373+ state_dict [prefix + "weight" ] = unpack_weights (state_dict [prefix + "weight" ], dtype = self .weight .dtype ).to (torch .device ("meta" ))
374+ return state_dict
375+
376+ AutoBitLinear .load_hook = bitnet_load_hook
359377 elif library_name == "diffusers" and is_openvino_version (">=" , "2024.6" ):
360378 _loading_kwargs = {} if variant is None else {"variant" : variant }
361379 if dtype == "auto" or dtype is None :
@@ -531,6 +549,8 @@ class StoreAttr(object):
531549 torch .cuda .is_available = orig_cuda_check
532550 if do_gptq_patching :
533551 GPTQQuantizer .post_init_model = orig_post_init_model
552+ if do_bitnet_patching :
553+ AutoBitLinear .load_hook = orig_load_hook
534554
535555
536556def maybe_convert_tokenizers (library_name : str , output : Path , model = None , preprocessors = None , task = None ):
0 commit comments