44import platform
55import unittest
66
7- import numpy as np
87import pytest
98import torch
109from parameterized import parameterized
11- from transformers import (
12- AutoModelForCausalLM ,
13- AutoTokenizer ,
14- GenerationConfig ,
15- PretrainedConfig ,
16- pipeline ,
17- set_seed ,
18- )
10+ from transformers import AutoModelForCausalLM , AutoTokenizer , GenerationConfig , PretrainedConfig , pipeline , set_seed
1911from transformers .models .auto .configuration_auto import CONFIG_MAPPING_NAMES
2012from transformers .testing_utils import slow
21- from utils_tests import MODEL_NAMES , get_num_sdpa , mock_torch_cuda_is_available , patch_awq_for_inference
13+ from utils_tests import (
14+ F32_CONFIG ,
15+ MODEL_NAMES ,
16+ OPENVINO_DEVICE ,
17+ SEED ,
18+ get_num_sdpa ,
19+ mock_torch_cuda_is_available ,
20+ patch_awq_for_inference ,
21+ )
2222
23- from optimum .exporters .openvino .model_configs import DeepseekOpenVINOConfig ,NemotronOnnxConfig
23+ from optimum .exporters .openvino .model_configs import DeepseekOpenVINOConfig , NemotronOnnxConfig
2424from optimum .exporters .openvino .model_patcher import patch_update_causal_mask
2525from optimum .exporters .tasks import TasksManager
2626from optimum .intel import OVModelForCausalLM , OVModelForSequenceClassification
3232if is_transformers_version (">=" , "4.55" ):
3333 from transformers import Mxfp4Config
3434
35- SEED = 42
36- F32_CONFIG = {"INFERENCE_PRECISION_HINT" : "f32" }
37- TENSOR_ALIAS_TO_TYPE = {"pt" : torch .Tensor , "np" : np .ndarray }
38-
3935
4036class OVModelForCausalLMIntegrationTest (unittest .TestCase ):
4137 SUPPORTED_ARCHITECTURES = (
@@ -287,7 +283,9 @@ def test_compare_to_transformers(self, model_arch):
287283 if model_arch == "gemma2" :
288284 model_kwargs ["attn_implementation" ] = "sdpa"
289285
290- ov_model = OVModelForCausalLM .from_pretrained (model_id , export = True , ov_config = F32_CONFIG , ** model_kwargs )
286+ ov_model = OVModelForCausalLM .from_pretrained (
287+ model_id , export = True , ov_config = F32_CONFIG , device = OPENVINO_DEVICE , ** model_kwargs
288+ )
291289 self .assertIsInstance (ov_model .config , PretrainedConfig )
292290 self .assertTrue (ov_model .use_cache )
293291 tokenizer = AutoTokenizer .from_pretrained (model_id , trust_remote_code = model_arch in self .REMOTE_CODE_MODELS )
@@ -435,7 +433,9 @@ def test_pipeline(self, model_arch):
435433 additional_args ["use_model_defaults" ] = False
436434
437435 set_seed (SEED )
438- model = OVModelForCausalLM .from_pretrained (model_id , use_cache = True , compile = False , ** model_kwargs )
436+ model = OVModelForCausalLM .from_pretrained (
437+ model_id , use_cache = True , compile = False , device = OPENVINO_DEVICE , ** model_kwargs
438+ )
439439 model .eval ()
440440 model .config .encoder_no_repeat_ngram_size = 0
441441 model .to ("cpu" )
@@ -475,7 +475,7 @@ def test_pipeline(self, model_arch):
475475
476476 def test_model_and_decoder_same_device (self ):
477477 model_id = MODEL_NAMES ["gpt2" ]
478- model = OVModelForCausalLM .from_pretrained (model_id , export = True )
478+ model = OVModelForCausalLM .from_pretrained (model_id , export = True , device = OPENVINO_DEVICE )
479479 model .to ("TEST" )
480480 self .assertEqual (model ._device , "TEST" )
481481 # Verify that request is being reset
@@ -488,13 +488,17 @@ def test_compare_with_and_without_past_key_values(self):
488488 tokenizer = AutoTokenizer .from_pretrained (model_id )
489489 tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
490490
491- model_with_pkv = OVModelForCausalLM .from_pretrained (model_id , export = True , use_cache = True , stateful = False )
491+ model_with_pkv = OVModelForCausalLM .from_pretrained (
492+ model_id , export = True , use_cache = True , stateful = False , device = OPENVINO_DEVICE
493+ )
492494 outputs_model_with_pkv = model_with_pkv .generate (
493495 ** tokens , min_length = self .GENERATION_LENGTH , max_length = self .GENERATION_LENGTH , num_beams = 1
494496 )
495497 del model_with_pkv
496498
497- model_without_pkv = OVModelForCausalLM .from_pretrained (model_id , export = True , use_cache = False )
499+ model_without_pkv = OVModelForCausalLM .from_pretrained (
500+ model_id , export = True , use_cache = False , device = OPENVINO_DEVICE
501+ )
498502 outputs_model_without_pkv = model_without_pkv .generate (
499503 ** tokens , min_length = self .GENERATION_LENGTH , max_length = self .GENERATION_LENGTH , num_beams = 1
500504 )
@@ -504,7 +508,9 @@ def test_compare_with_and_without_past_key_values(self):
504508 self .assertEqual (outputs_model_with_pkv .shape [1 ], self .GENERATION_LENGTH )
505509 self .assertEqual (outputs_model_without_pkv .shape [1 ], self .GENERATION_LENGTH )
506510
507- model_stateful = OVModelForCausalLM .from_pretrained (model_id , export = True , use_cache = True , stateful = True )
511+ model_stateful = OVModelForCausalLM .from_pretrained (
512+ model_id , export = True , use_cache = True , stateful = True , device = OPENVINO_DEVICE
513+ )
508514 outputs_model_stateful = model_stateful .generate (
509515 ** tokens , min_length = self .GENERATION_LENGTH , max_length = self .GENERATION_LENGTH , num_beams = 1
510516 )
@@ -522,7 +528,9 @@ def test_print_model_properties(self):
522528 # test setting OPENVINO_LOG_LEVEL to 3, which calls _print_compiled_model_properties
523529 openvino_log_level = os .environ .get ("OPENVINO_LOG_LEVEL" , None )
524530 os .environ ["OPENVINO_LOG_LEVEL" ] = "3"
525- model = OVModelForSequenceClassification .from_pretrained (MODEL_NAMES ["bert" ], export = True )
531+ model = OVModelForSequenceClassification .from_pretrained (
532+ MODEL_NAMES ["bert" ], export = True , device = OPENVINO_DEVICE
533+ )
526534 if openvino_log_level is not None :
527535 os .environ ["OPENVINO_LOG_LEVEL" ] = openvino_log_level
528536 # test calling function directly
@@ -543,7 +551,9 @@ def test_auto_device_loading(self):
543551
544552 def test_default_filling_attention_mask (self ):
545553 model_id = MODEL_NAMES ["gpt2" ]
546- model_with_cache = OVModelForCausalLM .from_pretrained (model_id , stateful = False , use_cache = True )
554+ model_with_cache = OVModelForCausalLM .from_pretrained (
555+ model_id , stateful = False , use_cache = True , device = OPENVINO_DEVICE
556+ )
547557 tokenizer = AutoTokenizer .from_pretrained (model_id )
548558 tokenizer .pad_token = tokenizer .eos_token
549559 texts = ["this is a simple input" ]
@@ -566,7 +576,9 @@ def test_default_filling_attention_mask(self):
566576
567577 def test_default_filling_attention_mask_and_position_ids (self ):
568578 model_id = MODEL_NAMES ["llama" ]
569- model_with_cache = OVModelForCausalLM .from_pretrained (model_id , stateful = False , use_cache = True )
579+ model_with_cache = OVModelForCausalLM .from_pretrained (
580+ model_id , stateful = False , use_cache = True , device = OPENVINO_DEVICE
581+ )
570582 tokenizer = AutoTokenizer .from_pretrained (model_id )
571583 tokenizer .pad_token = tokenizer .eos_token
572584 texts = ["this is a simple input" ]
@@ -660,11 +672,11 @@ def test_beam_search(self, model_arch):
660672 ]
661673 set_seed (SEED )
662674 ov_model_stateful = OVModelForCausalLM .from_pretrained (
663- model_id , export = True , use_cache = True , stateful = True , ** model_kwargs
675+ model_id , export = True , use_cache = True , stateful = True , device = OPENVINO_DEVICE , ** model_kwargs
664676 )
665677 set_seed (SEED )
666678 ov_model_stateless = OVModelForCausalLM .from_pretrained (
667- model_id , export = True , use_cache = True , stateful = False , ** model_kwargs
679+ model_id , export = True , use_cache = True , stateful = False , device = OPENVINO_DEVICE , ** model_kwargs
668680 )
669681 if "awq" in model_arch or "gptq" in model_arch :
670682 # infer in FP32
@@ -775,7 +787,9 @@ def test_load_with_different_dtype(self):
775787 torch_dtypes .append ("bfloat16" )
776788
777789 for dtype in torch_dtypes :
778- ov_model = OVModelForCausalLM .from_pretrained (model_id = model_id , export = True , torch_dtype = dtype )
790+ ov_model = OVModelForCausalLM .from_pretrained (
791+ model_id = model_id , export = True , torch_dtype = dtype , device = OPENVINO_DEVICE
792+ )
779793 ov_logits = ov_model (** test_input ).logits
780794 self .assertTrue (
781795 torch .allclose (torch .Tensor (ov_logits ), ref_logits , atol = 5e-3 ),
0 commit comments