Skip to content

Commit 9c3e916

Browse files
committed
merge main in branch
2 parents f0f15d3 + ddef518 commit 9c3e916

File tree

9 files changed

+535
-411
lines changed

9 files changed

+535
-411
lines changed

tests/openvino/test_decoder.py

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,23 @@
44
import platform
55
import unittest
66

7-
import numpy as np
87
import pytest
98
import torch
109
from parameterized import parameterized
11-
from transformers import (
12-
AutoModelForCausalLM,
13-
AutoTokenizer,
14-
GenerationConfig,
15-
PretrainedConfig,
16-
pipeline,
17-
set_seed,
18-
)
10+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, PretrainedConfig, pipeline, set_seed
1911
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
2012
from transformers.testing_utils import slow
21-
from utils_tests import MODEL_NAMES, get_num_sdpa, mock_torch_cuda_is_available, patch_awq_for_inference
13+
from utils_tests import (
14+
F32_CONFIG,
15+
MODEL_NAMES,
16+
OPENVINO_DEVICE,
17+
SEED,
18+
get_num_sdpa,
19+
mock_torch_cuda_is_available,
20+
patch_awq_for_inference,
21+
)
2222

23-
from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig,NemotronOnnxConfig
23+
from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig, NemotronOnnxConfig
2424
from optimum.exporters.openvino.model_patcher import patch_update_causal_mask
2525
from optimum.exporters.tasks import TasksManager
2626
from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification
@@ -32,10 +32,6 @@
3232
if is_transformers_version(">=", "4.55"):
3333
from transformers import Mxfp4Config
3434

35-
SEED = 42
36-
F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
37-
TENSOR_ALIAS_TO_TYPE = {"pt": torch.Tensor, "np": np.ndarray}
38-
3935

4036
class OVModelForCausalLMIntegrationTest(unittest.TestCase):
4137
SUPPORTED_ARCHITECTURES = (
@@ -287,7 +283,9 @@ def test_compare_to_transformers(self, model_arch):
287283
if model_arch == "gemma2":
288284
model_kwargs["attn_implementation"] = "sdpa"
289285

290-
ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
286+
ov_model = OVModelForCausalLM.from_pretrained(
287+
model_id, export=True, ov_config=F32_CONFIG, device=OPENVINO_DEVICE, **model_kwargs
288+
)
291289
self.assertIsInstance(ov_model.config, PretrainedConfig)
292290
self.assertTrue(ov_model.use_cache)
293291
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
@@ -435,7 +433,9 @@ def test_pipeline(self, model_arch):
435433
additional_args["use_model_defaults"] = False
436434

437435
set_seed(SEED)
438-
model = OVModelForCausalLM.from_pretrained(model_id, use_cache=True, compile=False, **model_kwargs)
436+
model = OVModelForCausalLM.from_pretrained(
437+
model_id, use_cache=True, compile=False, device=OPENVINO_DEVICE, **model_kwargs
438+
)
439439
model.eval()
440440
model.config.encoder_no_repeat_ngram_size = 0
441441
model.to("cpu")
@@ -475,7 +475,7 @@ def test_pipeline(self, model_arch):
475475

476476
def test_model_and_decoder_same_device(self):
477477
model_id = MODEL_NAMES["gpt2"]
478-
model = OVModelForCausalLM.from_pretrained(model_id, export=True)
478+
model = OVModelForCausalLM.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE)
479479
model.to("TEST")
480480
self.assertEqual(model._device, "TEST")
481481
# Verify that request is being reset
@@ -488,13 +488,17 @@ def test_compare_with_and_without_past_key_values(self):
488488
tokenizer = AutoTokenizer.from_pretrained(model_id)
489489
tokens = tokenizer("This is a sample input", return_tensors="pt")
490490

491-
model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=False)
491+
model_with_pkv = OVModelForCausalLM.from_pretrained(
492+
model_id, export=True, use_cache=True, stateful=False, device=OPENVINO_DEVICE
493+
)
492494
outputs_model_with_pkv = model_with_pkv.generate(
493495
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
494496
)
495497
del model_with_pkv
496498

497-
model_without_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=False)
499+
model_without_pkv = OVModelForCausalLM.from_pretrained(
500+
model_id, export=True, use_cache=False, device=OPENVINO_DEVICE
501+
)
498502
outputs_model_without_pkv = model_without_pkv.generate(
499503
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
500504
)
@@ -504,7 +508,9 @@ def test_compare_with_and_without_past_key_values(self):
504508
self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
505509
self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
506510

507-
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
511+
model_stateful = OVModelForCausalLM.from_pretrained(
512+
model_id, export=True, use_cache=True, stateful=True, device=OPENVINO_DEVICE
513+
)
508514
outputs_model_stateful = model_stateful.generate(
509515
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
510516
)
@@ -522,7 +528,9 @@ def test_print_model_properties(self):
522528
# test setting OPENVINO_LOG_LEVEL to 3, which calls _print_compiled_model_properties
523529
openvino_log_level = os.environ.get("OPENVINO_LOG_LEVEL", None)
524530
os.environ["OPENVINO_LOG_LEVEL"] = "3"
525-
model = OVModelForSequenceClassification.from_pretrained(MODEL_NAMES["bert"], export=True)
531+
model = OVModelForSequenceClassification.from_pretrained(
532+
MODEL_NAMES["bert"], export=True, device=OPENVINO_DEVICE
533+
)
526534
if openvino_log_level is not None:
527535
os.environ["OPENVINO_LOG_LEVEL"] = openvino_log_level
528536
# test calling function directly
@@ -543,7 +551,9 @@ def test_auto_device_loading(self):
543551

544552
def test_default_filling_attention_mask(self):
545553
model_id = MODEL_NAMES["gpt2"]
546-
model_with_cache = OVModelForCausalLM.from_pretrained(model_id, stateful=False, use_cache=True)
554+
model_with_cache = OVModelForCausalLM.from_pretrained(
555+
model_id, stateful=False, use_cache=True, device=OPENVINO_DEVICE
556+
)
547557
tokenizer = AutoTokenizer.from_pretrained(model_id)
548558
tokenizer.pad_token = tokenizer.eos_token
549559
texts = ["this is a simple input"]
@@ -566,7 +576,9 @@ def test_default_filling_attention_mask(self):
566576

567577
def test_default_filling_attention_mask_and_position_ids(self):
568578
model_id = MODEL_NAMES["llama"]
569-
model_with_cache = OVModelForCausalLM.from_pretrained(model_id, stateful=False, use_cache=True)
579+
model_with_cache = OVModelForCausalLM.from_pretrained(
580+
model_id, stateful=False, use_cache=True, device=OPENVINO_DEVICE
581+
)
570582
tokenizer = AutoTokenizer.from_pretrained(model_id)
571583
tokenizer.pad_token = tokenizer.eos_token
572584
texts = ["this is a simple input"]
@@ -660,11 +672,11 @@ def test_beam_search(self, model_arch):
660672
]
661673
set_seed(SEED)
662674
ov_model_stateful = OVModelForCausalLM.from_pretrained(
663-
model_id, export=True, use_cache=True, stateful=True, **model_kwargs
675+
model_id, export=True, use_cache=True, stateful=True, device=OPENVINO_DEVICE, **model_kwargs
664676
)
665677
set_seed(SEED)
666678
ov_model_stateless = OVModelForCausalLM.from_pretrained(
667-
model_id, export=True, use_cache=True, stateful=False, **model_kwargs
679+
model_id, export=True, use_cache=True, stateful=False, device=OPENVINO_DEVICE, **model_kwargs
668680
)
669681
if "awq" in model_arch or "gptq" in model_arch:
670682
# infer in FP32
@@ -775,7 +787,9 @@ def test_load_with_different_dtype(self):
775787
torch_dtypes.append("bfloat16")
776788

777789
for dtype in torch_dtypes:
778-
ov_model = OVModelForCausalLM.from_pretrained(model_id=model_id, export=True, torch_dtype=dtype)
790+
ov_model = OVModelForCausalLM.from_pretrained(
791+
model_id=model_id, export=True, torch_dtype=dtype, device=OPENVINO_DEVICE
792+
)
779793
ov_logits = ov_model(**test_input).logits
780794
self.assertTrue(
781795
torch.allclose(torch.Tensor(ov_logits), ref_logits, atol=5e-3),

0 commit comments

Comments
 (0)