Skip to content

Commit ddef518

Browse files
Introduce OPENVINO_TEST_DEVICE (#1479)
* use openvino device * using optimum-intel-internal-testing * fix * add glm4 to remote code * fix naming of models * fix missing task
1 parent 631747d commit ddef518

File tree

9 files changed

+525
-400
lines changed

9 files changed

+525
-400
lines changed

tests/openvino/test_decoder.py

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,20 @@
44
import platform
55
import unittest
66

7-
import numpy as np
87
import pytest
98
import torch
109
from parameterized import parameterized
11-
from transformers import (
12-
AutoModelForCausalLM,
13-
AutoTokenizer,
14-
GenerationConfig,
15-
PretrainedConfig,
16-
pipeline,
17-
set_seed,
18-
)
10+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, PretrainedConfig, pipeline, set_seed
1911
from transformers.testing_utils import slow
20-
from utils_tests import MODEL_NAMES, get_num_sdpa, mock_torch_cuda_is_available, patch_awq_for_inference
12+
from utils_tests import (
13+
F32_CONFIG,
14+
MODEL_NAMES,
15+
OPENVINO_DEVICE,
16+
SEED,
17+
get_num_sdpa,
18+
mock_torch_cuda_is_available,
19+
patch_awq_for_inference,
20+
)
2121

2222
from optimum.exporters.openvino.model_patcher import patch_update_causal_mask
2323
from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification
@@ -29,10 +29,6 @@
2929
if is_transformers_version(">=", "4.55"):
3030
from transformers import Mxfp4Config
3131

32-
SEED = 42
33-
F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
34-
TENSOR_ALIAS_TO_TYPE = {"pt": torch.Tensor, "np": np.ndarray}
35-
3632

3733
class OVModelForCausalLMIntegrationTest(unittest.TestCase):
3834
SUPPORTED_ARCHITECTURES = (
@@ -240,7 +236,9 @@ def test_compare_to_transformers(self, model_arch):
240236
if model_arch == "gemma2":
241237
model_kwargs["attn_implementation"] = "sdpa"
242238

243-
ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
239+
ov_model = OVModelForCausalLM.from_pretrained(
240+
model_id, export=True, ov_config=F32_CONFIG, device=OPENVINO_DEVICE, **model_kwargs
241+
)
244242
self.assertIsInstance(ov_model.config, PretrainedConfig)
245243
self.assertTrue(ov_model.use_cache)
246244
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
@@ -388,7 +386,9 @@ def test_pipeline(self, model_arch):
388386
additional_args["use_model_defaults"] = False
389387

390388
set_seed(SEED)
391-
model = OVModelForCausalLM.from_pretrained(model_id, use_cache=True, compile=False, **model_kwargs)
389+
model = OVModelForCausalLM.from_pretrained(
390+
model_id, use_cache=True, compile=False, device=OPENVINO_DEVICE, **model_kwargs
391+
)
392392
model.eval()
393393
model.config.encoder_no_repeat_ngram_size = 0
394394
model.to("cpu")
@@ -428,7 +428,7 @@ def test_pipeline(self, model_arch):
428428

429429
def test_model_and_decoder_same_device(self):
430430
model_id = MODEL_NAMES["gpt2"]
431-
model = OVModelForCausalLM.from_pretrained(model_id, export=True)
431+
model = OVModelForCausalLM.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE)
432432
model.to("TEST")
433433
self.assertEqual(model._device, "TEST")
434434
# Verify that request is being reset
@@ -441,13 +441,17 @@ def test_compare_with_and_without_past_key_values(self):
441441
tokenizer = AutoTokenizer.from_pretrained(model_id)
442442
tokens = tokenizer("This is a sample input", return_tensors="pt")
443443

444-
model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=False)
444+
model_with_pkv = OVModelForCausalLM.from_pretrained(
445+
model_id, export=True, use_cache=True, stateful=False, device=OPENVINO_DEVICE
446+
)
445447
outputs_model_with_pkv = model_with_pkv.generate(
446448
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
447449
)
448450
del model_with_pkv
449451

450-
model_without_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=False)
452+
model_without_pkv = OVModelForCausalLM.from_pretrained(
453+
model_id, export=True, use_cache=False, device=OPENVINO_DEVICE
454+
)
451455
outputs_model_without_pkv = model_without_pkv.generate(
452456
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
453457
)
@@ -457,7 +461,9 @@ def test_compare_with_and_without_past_key_values(self):
457461
self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
458462
self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
459463

460-
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
464+
model_stateful = OVModelForCausalLM.from_pretrained(
465+
model_id, export=True, use_cache=True, stateful=True, device=OPENVINO_DEVICE
466+
)
461467
outputs_model_stateful = model_stateful.generate(
462468
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
463469
)
@@ -475,7 +481,9 @@ def test_print_model_properties(self):
475481
# test setting OPENVINO_LOG_LEVEL to 3, which calls _print_compiled_model_properties
476482
openvino_log_level = os.environ.get("OPENVINO_LOG_LEVEL", None)
477483
os.environ["OPENVINO_LOG_LEVEL"] = "3"
478-
model = OVModelForSequenceClassification.from_pretrained(MODEL_NAMES["bert"], export=True)
484+
model = OVModelForSequenceClassification.from_pretrained(
485+
MODEL_NAMES["bert"], export=True, device=OPENVINO_DEVICE
486+
)
479487
if openvino_log_level is not None:
480488
os.environ["OPENVINO_LOG_LEVEL"] = openvino_log_level
481489
# test calling function directly
@@ -496,7 +504,9 @@ def test_auto_device_loading(self):
496504

497505
def test_default_filling_attention_mask(self):
498506
model_id = MODEL_NAMES["gpt2"]
499-
model_with_cache = OVModelForCausalLM.from_pretrained(model_id, stateful=False, use_cache=True)
507+
model_with_cache = OVModelForCausalLM.from_pretrained(
508+
model_id, stateful=False, use_cache=True, device=OPENVINO_DEVICE
509+
)
500510
tokenizer = AutoTokenizer.from_pretrained(model_id)
501511
tokenizer.pad_token = tokenizer.eos_token
502512
texts = ["this is a simple input"]
@@ -519,7 +529,9 @@ def test_default_filling_attention_mask(self):
519529

520530
def test_default_filling_attention_mask_and_position_ids(self):
521531
model_id = MODEL_NAMES["llama"]
522-
model_with_cache = OVModelForCausalLM.from_pretrained(model_id, stateful=False, use_cache=True)
532+
model_with_cache = OVModelForCausalLM.from_pretrained(
533+
model_id, stateful=False, use_cache=True, device=OPENVINO_DEVICE
534+
)
523535
tokenizer = AutoTokenizer.from_pretrained(model_id)
524536
tokenizer.pad_token = tokenizer.eos_token
525537
texts = ["this is a simple input"]
@@ -613,11 +625,11 @@ def test_beam_search(self, model_arch):
613625
]
614626
set_seed(SEED)
615627
ov_model_stateful = OVModelForCausalLM.from_pretrained(
616-
model_id, export=True, use_cache=True, stateful=True, **model_kwargs
628+
model_id, export=True, use_cache=True, stateful=True, device=OPENVINO_DEVICE, **model_kwargs
617629
)
618630
set_seed(SEED)
619631
ov_model_stateless = OVModelForCausalLM.from_pretrained(
620-
model_id, export=True, use_cache=True, stateful=False, **model_kwargs
632+
model_id, export=True, use_cache=True, stateful=False, device=OPENVINO_DEVICE, **model_kwargs
621633
)
622634
if "awq" in model_arch or "gptq" in model_arch:
623635
# infer in FP32
@@ -728,7 +740,9 @@ def test_load_with_different_dtype(self):
728740
torch_dtypes.append("bfloat16")
729741

730742
for dtype in torch_dtypes:
731-
ov_model = OVModelForCausalLM.from_pretrained(model_id=model_id, export=True, torch_dtype=dtype)
743+
ov_model = OVModelForCausalLM.from_pretrained(
744+
model_id=model_id, export=True, torch_dtype=dtype, device=OPENVINO_DEVICE
745+
)
732746
ov_logits = ov_model(**test_input).logits
733747
self.assertTrue(
734748
torch.allclose(torch.Tensor(ov_logits), ref_logits, atol=5e-3),

0 commit comments

Comments
 (0)