2121from utils_tests import (
2222 _ARCHITECTURES_TO_EXPECTED_INT8 ,
2323 MODEL_NAMES ,
24- compare_num_quantized_nodes_per_model ,
24+ check_compression_state_per_model ,
2525 get_num_quantized_nodes ,
2626)
2727
@@ -192,27 +192,27 @@ class OVCLIExportTestCase(unittest.TestCase):
192192 "image-text-to-text" ,
193193 "llava_next" ,
194194 "int4 --group-size 16 --ratio 0.8" ,
195- [{"int8" : 14 , "int4" : 16 }, {"int8" : 9 }, {"int8" : 1 }],
195+ [{"int8" : 14 , "int4" : 16 }, {"int8" : 1 }, {"int8" : 9 }],
196196 ),
197197 (
198198 "image-text-to-text" ,
199199 "llava_next" ,
200200 'int4 --group-size 16 --ratio 0.8 --sensitivity-metric "hessian_input_activation" '
201201 "--dataset contextual --num-samples 1" ,
202- [{"int8" : 6 , "int4" : 24 }, {"int8" : 9 }, {"int8" : 1 }],
202+ [{"int8" : 6 , "int4" : 24 }, {"int8" : 1 }, {"int8" : 9 }],
203203 ),
204204 (
205205 "image-text-to-text" ,
206206 "nanollava" ,
207207 "int4 --group-size 8 --ratio 0.8 --trust-remote-code" ,
208- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
208+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
209209 ),
210210 (
211211 "image-text-to-text" ,
212212 "nanollava" ,
213213 'int4 --group-size 8 --ratio 0.8 --sensitivity-metric "mean_activation_variance" '
214214 "--dataset contextual --num-samples 1 --trust-remote-code" ,
215- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
215+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
216216 ),
217217 ]
218218 )
@@ -224,40 +224,40 @@ class OVCLIExportTestCase(unittest.TestCase):
224224 "image-text-to-text" ,
225225 "minicpmv" ,
226226 "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
227- [{"int8" : 10 , "int4" : 20 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
227+ [{"int8" : 10 , "int4" : 20 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
228228 ),
229229 (
230230 "image-text-to-text" ,
231231 "minicpmv" ,
232232 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
233233 "--dataset contextual --num-samples 1 --trust-remote-code" ,
234- [{"int8" : 8 , "int4" : 22 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
234+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
235235 ),
236236 (
237237 "image-text-to-text" ,
238238 "internvl2" ,
239239 "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
240- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
240+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
241241 ),
242242 (
243243 "image-text-to-text" ,
244244 "internvl2" ,
245245 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
246246 "--dataset contextual --num-samples 1 --trust-remote-code" ,
247- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
247+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
248248 ),
249249 (
250250 "image-text-to-text" ,
251251 "phi3_v" ,
252252 "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
253- [{"int8" : 8 , "int4" : 10 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
253+ [{"int8" : 8 , "int4" : 10 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
254254 ),
255255 (
256256 "image-text-to-text" ,
257257 "phi3_v" ,
258258 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
259259 "--dataset contextual --num-samples 1 --trust-remote-code" ,
260- [{"int8" : 4 , "int4" : 14 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
260+ [{"int8" : 4 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
261261 ),
262262 (
263263 "image-text-to-text" ,
@@ -369,14 +369,15 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
369369 model .text_encoder if model_type in ["stable-diffusion" , "sana" ] else model .text_encoder_2
370370 )
371371 elif task .startswith ("image-text-to-text" ):
372- models = [ model .language_model , model . vision_embeddings ]
372+ models = list ( model .submodels . values ())
373373 else :
374374 models = [model ]
375375
376376 expected_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
377- for i , model in enumerate (models ):
378- _ , num_weight_nodes = get_num_quantized_nodes (model )
379- self .assertEqual (expected_int8 [i ], num_weight_nodes ["int8" ])
377+ expected_int8 = [{"int8" : it } for it in expected_int8 ]
378+ if task .startswith ("text2text-generation" ) and (not task .endswith ("with-past" ) or model .decoder .stateful ):
379+ expected_int8 = expected_int8 [:2 ]
380+ check_compression_state_per_model (self , models , expected_int8 )
380381
381382 @parameterized .expand (SUPPORTED_SD_HYBRID_ARCHITECTURES )
382383 def test_exporters_cli_hybrid_quantization (
@@ -389,11 +390,11 @@ def test_exporters_cli_hybrid_quantization(
389390 check = True ,
390391 )
391392 model = eval (_HEAD_TO_AUTOMODELS [model_type .replace ("-refiner" , "" )]).from_pretrained (tmpdir )
392- num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (
393- model .unet if model .unet is not None else model .transformer
394- )
393+ vision_model = model .unet .model if model .unet is not None else model .transformer .model
394+ num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (vision_model )
395395 self .assertEqual (expected_int8_nodes , num_weight_nodes ["int8" ])
396396 self .assertEqual (expected_fake_nodes , num_fake_nodes )
397+ self .assertFalse (vision_model .has_rt_info (["runtime_options" , "KV_CACHE_PRECISION" ]))
397398
398399 @parameterized .expand (TEST_4BIT_CONFIGURATIONS )
399400 def test_exporters_cli_4bit (
@@ -419,10 +420,9 @@ def test_exporters_cli_4bit(
419420 if task == "text-generation-with-past" :
420421 submodels = [model ]
421422 elif task == "image-text-to-text" :
422- submodels = [model .lm_model , model .vision_embeddings_model , model .text_embeddings_model ]
423- submodels += [getattr (model , part ) for part in model .additional_parts ]
423+ submodels = list (model .submodels .values ())
424424
425- compare_num_quantized_nodes_per_model (self , submodels , expected_num_weight_nodes_per_model )
425+ check_compression_state_per_model (self , submodels , expected_num_weight_nodes_per_model )
426426
427427 self .assertTrue ("--awq" not in option or b"Applying AWQ" in result .stdout )
428428 self .assertTrue ("--scale-estimation" not in option or b"Applying Scale Estimation" in result .stdout )
0 commit comments