8080from copy import deepcopy
8181
8282from optimum .intel .openvino .quantization import InferRequestWrapper , OVCalibrationDatasetBuilder
83- from optimum .intel .utils .import_utils import is_openvino_version , is_transformers_version
83+ from optimum .intel .utils .import_utils import is_openvino_version , is_transformers_version , is_nncf_version
8484from utils_tests import (
8585 MODEL_NAMES ,
8686 get_num_quantized_nodes ,
@@ -158,8 +158,8 @@ class OVQuantizerTest(unittest.TestCase):
158158 (
159159 OVModelForCausalLM ,
160160 "llama" ,
161- OVMixedQuantizationConfig (
162- weight_quantization_config = OVWeightQuantizationConfig (
161+ dict (
162+ weight_quantization_config = dict (
163163 bits = 4 ,
164164 dtype = "nf4" ,
165165 group_size = 16 ,
@@ -180,6 +180,31 @@ class OVQuantizerTest(unittest.TestCase):
180180 "model" : {"f8e4m3" : 8 , "nf4" : 2 },
181181 },
182182 ),
183+ (
184+ OVModelForCausalLM ,
185+ "llama" ,
186+ dict (
187+ weight_quantization_config = dict (
188+ bits = 4 ,
189+ dtype = "cb4" ,
190+ group_size = 16 ,
191+ ratio = 0.5 ,
192+ ignored_scope = {"patterns" : [f"{ pattern_prefix } .layers.0.self_attn" ]},
193+ ),
194+ full_quantization_config = OVQuantizationConfig (
195+ dtype = "f8e4m3" , ignored_scope = {"patterns" : [f"{ pattern_prefix } .layers.0.mlp" ]}
196+ ),
197+ ignored_scope = {"patterns" : [f"{ pattern_prefix } .layers.1.self_attn" ]},
198+ dataset = "wikitext2" ,
199+ num_samples = 1 ,
200+ ),
201+ {
202+ "model" : 8 ,
203+ },
204+ {
205+ "model" : {"int8" : 2 , "int4" : 2 , "f8e4m3" : 10 },
206+ },
207+ ),
183208 (
184209 OVModelForCausalLM ,
185210 "llama" ,
@@ -597,6 +622,12 @@ def test_ov_model_static_quantization_with_auto_dataset(
597622 expected_fake_nodes_per_model ,
598623 expected_num_weight_nodes_per_model ,
599624 ):
625+ if (
626+ isinstance (quantization_config , dict )
627+ and quantization_config .get ("weight_quantization_config" , {}).get ("dtype" ) == "cb4"
628+ and is_nncf_version ("<=" , "2.17" )
629+ ):
630+ pytest .skip ("Codebook quantization is supported starting from NNCF 2.18" )
600631 model_id = MODEL_NAMES [model_name ]
601632
602633 with TemporaryDirectory () as tmp_dir :
@@ -689,6 +720,13 @@ class OVWeightCompressionTest(unittest.TestCase):
689720 dict (bits = 4 , dtype = "nf4" , group_size = 32 ),
690721 {"model" : {"int8" : 4 , "nf4" : 20 }},
691722 ),
723+ (
724+ OVModelForCausalLM ,
725+ "gpt2" ,
726+ False ,
727+ dict (bits = 4 , dtype = "cb4" , group_size = 32 ),
728+ {"model" : {"int8" : 24 , "int4" : 20 , "f8e4m3" : 20 }},
729+ ),
692730 (
693731 OVModelForCausalLM ,
694732 "gpt2" ,
@@ -1345,6 +1383,13 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
13451383 def test_ovmodel_4bit_auto_compression_with_config (
13461384 self , model_cls , model_name , trust_remote_code , quantization_config , expected_num_weight_nodes_per_model
13471385 ):
1386+ if (
1387+ isinstance (quantization_config , dict )
1388+ and quantization_config .get ("dtype" ) == "cb4"
1389+ and is_nncf_version ("<=" , "2.17" )
1390+ ):
1391+ pytest .skip ("Codebook quantization is supported starting from NNCF 2.18" )
1392+
13481393 model_id = MODEL_NAMES [model_name ]
13491394 with TemporaryDirectory () as tmp_dir :
13501395 quantization_config = OVWeightQuantizationConfig .from_dict (quantization_config )
0 commit comments