Skip to content

Commit 965540f

Browse files
Add IPEX sentence transformers support (#1034)
* add import * add IPEX sentence transformers support * style * fix style * fix for python < 3.10 * Update tests/ipex/utils_tests.py * Update tests/ipex/test_modeling.py --------- Co-authored-by: Ilyas Moutawwakil <[email protected]>
1 parent 5c73548 commit 965540f

File tree

6 files changed

+183
-19
lines changed

6 files changed

+183
-19
lines changed

optimum/intel/__init__.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,14 @@
4141
from .utils import dummy_ipex_objects
4242

4343
_import_structure["utils.dummy_ipex_objects"] = [
44-
name for name in dir(dummy_ipex_objects) if not name.startswith("_")
44+
"IPEXModelForCausalLM",
45+
"IPEXModelForSequenceClassification",
46+
"IPEXModelForMaskedLM",
47+
"IPEXModelForTokenClassification",
48+
"IPEXModelForQuestionAnswering",
49+
"IPEXModelForImageClassification",
50+
"IPEXModelForAudioClassification",
51+
"IPEXModel",
4552
]
4653
else:
4754
_import_structure["ipex"] = [
@@ -55,6 +62,15 @@
5562
"IPEXModel",
5663
]
5764

65+
try:
66+
if not (is_ipex_available() and is_sentence_transformers_available()):
67+
raise OptionalDependencyNotAvailable()
68+
except OptionalDependencyNotAvailable:
69+
_import_structure["utils.dummy_ipex_objects"].extend(["IPEXSentenceTransformer"])
70+
else:
71+
_import_structure["ipex"].extend(["IPEXSentenceTransformer"])
72+
73+
5874
try:
5975
if not (is_openvino_available() and is_nncf_available()):
6076
raise OptionalDependencyNotAvailable()
@@ -212,15 +228,9 @@
212228
if not (is_openvino_available() and is_sentence_transformers_available()):
213229
raise OptionalDependencyNotAvailable()
214230
except OptionalDependencyNotAvailable:
215-
_import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = [
216-
"OVSentenceTransformer",
217-
]
231+
_import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = ["OVSentenceTransformer"]
218232
else:
219-
_import_structure["openvino"].extend(
220-
[
221-
"OVSentenceTransformer",
222-
]
223-
)
233+
_import_structure["openvino"].extend(["OVSentenceTransformer"])
224234

225235

226236
if TYPE_CHECKING:
@@ -241,6 +251,14 @@
241251
IPEXModelForTokenClassification,
242252
)
243253

254+
try:
255+
if not (is_ipex_available() and is_sentence_transformers_available()):
256+
raise OptionalDependencyNotAvailable()
257+
except OptionalDependencyNotAvailable:
258+
from .utils.dummy_ipex_objects import IPEXSentenceTransformer
259+
else:
260+
from .ipex import IPEXSentenceTransformer
261+
244262
try:
245263
if not (is_openvino_available() and is_nncf_available()):
246264
raise OptionalDependencyNotAvailable()
@@ -372,13 +390,9 @@
372390
if not (is_openvino_available() and is_sentence_transformers_available()):
373391
raise OptionalDependencyNotAvailable()
374392
except OptionalDependencyNotAvailable:
375-
from .utils.dummy_openvino_and_sentence_transformers_objects import (
376-
OVSentenceTransformer,
377-
)
393+
from .utils.dummy_openvino_and_sentence_transformers_objects import OVSentenceTransformer
378394
else:
379-
from .openvino import (
380-
OVSentenceTransformer,
381-
)
395+
from .openvino import OVSentenceTransformer
382396

383397
else:
384398
import sys

optimum/intel/ipex/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from optimum.intel.ipex.modeling_base import (
15+
from ..utils.import_utils import is_sentence_transformers_available
16+
from .modeling_base import (
1617
IPEXModel,
1718
IPEXModelForAudioClassification,
1819
IPEXModelForCausalLM,
@@ -22,3 +23,7 @@
2223
IPEXModelForSequenceClassification,
2324
IPEXModelForTokenClassification,
2425
)
26+
27+
28+
if is_sentence_transformers_available():
29+
from .modeling_sentence_transformers import IPEXSentenceTransformer
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright 2024 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from pathlib import Path
17+
from typing import Any, Dict, Optional
18+
19+
import torch
20+
from sentence_transformers import SentenceTransformer
21+
from sentence_transformers.models import Transformer
22+
from sentence_transformers.models.Transformer import _save_pretrained_wrapper
23+
from sentence_transformers.util import import_from_string
24+
from transformers import MT5Config, T5Config
25+
from transformers.dynamic_module_utils import get_class_from_dynamic_module
26+
27+
from .modeling_base import IPEXModel
28+
29+
30+
class IPEXTransformer(Transformer):
31+
def __init__(self, *args, **kwargs):
32+
super().__init__(*args, **kwargs)
33+
self.backend = "ipex"
34+
35+
def _load_model(self, model_name_or_path, config, cache_dir, backend, **model_args) -> None:
36+
self._load_ipex_model(model_name_or_path, config, cache_dir, **model_args)
37+
38+
def _load_ipex_model(self, model_name_or_path, config, cache_dir, **model_args) -> None:
39+
if isinstance(config, T5Config) or isinstance(config, MT5Config):
40+
raise ValueError("T5 models are not yet supported by the IPEX backend.")
41+
42+
export = model_args.pop("export", None)
43+
44+
if export is None:
45+
export = not getattr(config, "torchscript", False)
46+
47+
load_path = Path(model_name_or_path)
48+
is_local = load_path.exists()
49+
50+
self.auto_model = IPEXModel.from_pretrained(
51+
model_name_or_path,
52+
config=config,
53+
cache_dir=cache_dir,
54+
export=export,
55+
**model_args,
56+
)
57+
58+
# Wrap the save_pretrained method to save the model in the correct subfolder
59+
self.auto_model._save_pretrained = _save_pretrained_wrapper(self.auto_model._save_pretrained, "ipex")
60+
61+
# Warn the user to save the model if they haven't already
62+
if export:
63+
self._backend_warn_to_save(model_name_or_path, is_local, "IPEX")
64+
65+
66+
class IPEXSentenceTransformer(SentenceTransformer):
67+
def __init__(self, *args, **kwargs):
68+
super().__init__(*args, **kwargs)
69+
70+
self.backend = "ipex"
71+
72+
def _load_module_class_from_ref(
73+
self,
74+
class_ref: str,
75+
model_name_or_path: str,
76+
trust_remote_code: bool,
77+
revision: Optional[str] = None,
78+
model_kwargs: Optional[Dict[str, Any]] = None,
79+
) -> torch.nn.Module:
80+
if class_ref.startswith("sentence_transformers."):
81+
if class_ref == "sentence_transformers.models.Transformer":
82+
class_ref = "optimum.intel.ipex.modeling_sentence_transformers.IPEXTransformer"
83+
return import_from_string(class_ref)
84+
85+
if trust_remote_code:
86+
code_revision = model_kwargs.pop("code_revision", None) if model_kwargs else None
87+
try:
88+
return get_class_from_dynamic_module(
89+
class_ref,
90+
model_name_or_path,
91+
revision=revision,
92+
code_revision=code_revision,
93+
)
94+
except OSError:
95+
# Ignore the error if the file does not exist, and fall back to the default import
96+
pass
97+
98+
return import_from_string(class_ref)

optimum/intel/utils/dummy_ipex_objects.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,14 @@ def __init__(self, *args, **kwargs):
101101
@classmethod
102102
def from_pretrained(cls, *args, **kwargs):
103103
requires_backends(cls, ["ipex"])
104+
105+
106+
class IPEXSentenceTransformer(metaclass=DummyObject):
107+
_backends = ["ipex", "sentence_transformers"]
108+
109+
def __init__(self, *args, **kwargs):
110+
requires_backends(self, ["ipex", "sentence_transformers"])
111+
112+
@classmethod
113+
def from_pretrained(cls, *args, **kwargs):
114+
requires_backends(cls, ["ipex", "sentence_transformers"])

tests/ipex/test_modeling.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import tempfile
1818
import time
1919
import unittest
20-
20+
import os
2121
import numpy as np
2222
import requests
2323
import torch
@@ -33,7 +33,6 @@
3333
pipeline,
3434
set_seed,
3535
)
36-
3736
from optimum.intel import (
3837
IPEXModel,
3938
IPEXModelForAudioClassification,
@@ -43,8 +42,13 @@
4342
IPEXModelForQuestionAnswering,
4443
IPEXModelForSequenceClassification,
4544
IPEXModelForTokenClassification,
45+
IPEXSentenceTransformer,
4646
)
47-
from optimum.utils.testing_utils import grid_parameters
47+
from optimum.utils.testing_utils import grid_parameters, require_sentence_transformers
48+
from optimum.intel.utils.import_utils import is_sentence_transformers_available
49+
50+
if is_sentence_transformers_available():
51+
from sentence_transformers import SentenceTransformer
4852
from utils_tests import MODEL_NAMES, IS_XPU_AVAILABLE
4953

5054

@@ -510,3 +514,33 @@ def test_patched_model(self):
510514
transformers_outputs = transformers_model(**inputs)
511515
outputs = ipex_model(**inputs)
512516
self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-4))
517+
518+
519+
class IPEXSTModel(unittest.TestCase):
520+
SUPPORTED_ARCHITECTURES = (
521+
"st-bert",
522+
"st-mpnet",
523+
)
524+
525+
@parameterized.expand(SUPPORTED_ARCHITECTURES)
526+
@require_sentence_transformers
527+
def test_compare_to_original_model(self, model_arch):
528+
model_id = MODEL_NAMES[model_arch]
529+
set_seed(SEED)
530+
ipex_model = IPEXSentenceTransformer(model_id)
531+
st_model = SentenceTransformer(model_id)
532+
sentences = ["This is an example sentence", "Each sentence is converted"]
533+
st_embeddings = st_model.encode(sentences)
534+
ov_embeddings = ipex_model.encode(sentences)
535+
self.assertTrue(np.allclose(ov_embeddings, st_embeddings, atol=1e-4))
536+
537+
@parameterized.expand(SUPPORTED_ARCHITECTURES)
538+
@require_sentence_transformers
539+
def test_sentence_transformers_save_and_infer(self, model_arch):
540+
model_id = MODEL_NAMES[model_arch]
541+
ipex_model = IPEXSentenceTransformer(model_id)
542+
with tempfile.TemporaryDirectory() as tmpdirname:
543+
ipex_model.save_pretrained(tmpdirname)
544+
model = IPEXSentenceTransformer(tmpdirname, model_kwargs={"subfolder": "ipex"})
545+
sentences = ["This is an example sentence", "Each sentence is converted"]
546+
model.encode(sentences)

tests/ipex/utils_tests.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
"resnet": "hf-internal-testing/tiny-random-resnet",
5454
"roberta": "hf-internal-testing/tiny-random-roberta",
5555
"roformer": "hf-internal-testing/tiny-random-roformer",
56+
"st-bert": "sentence-transformers-testing/stsb-bert-tiny-safetensors",
57+
"st-mpnet": "sentence-transformers/all-mpnet-base-v2",
5658
"squeezebert": "hf-internal-testing/tiny-random-squeezebert",
5759
"t5": "hf-internal-testing/tiny-random-t5",
5860
"unispeech": "hf-internal-testing/tiny-random-unispeech",

0 commit comments

Comments
 (0)