Skip to content

Commit bfd0767

Browse files
authored
Fix itrex WOQ model loading (#730)
* Fix loading ITREX model * add test * fix loading WOQ and quantization config * add test * add revision and subfolder parameters when loading inc config * style * update test model id
1 parent 7b4e50f commit bfd0767

File tree

5 files changed

+143
-39
lines changed

5 files changed

+143
-39
lines changed

optimum/intel/neural_compressor/modeling_base.py

Lines changed: 81 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import torch
2323
from huggingface_hub import hf_hub_download
2424
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
25+
from huggingface_hub.utils import EntryNotFoundError
2526
from neural_compressor.utils.pytorch import load
2627
from transformers import (
2728
AutoConfig,
@@ -40,14 +41,15 @@
4041
)
4142
from transformers.modeling_utils import no_init_weights
4243
from transformers.models.auto.auto_factory import _get_model_class
44+
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
4345
from transformers.utils.generic import ContextManagers
4446

4547
from optimum.intel.generation import BaseModelForCausalLM
4648

4749
from ...modeling_base import OptimizedModel
4850
from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
4951
from .configuration import INCConfig
50-
from .utils import WEIGHTS_NAME
52+
from .utils import QUANTIZATION_CONFIG_NAME
5153

5254

5355
logger = logging.getLogger(__name__)
@@ -119,33 +121,70 @@ def _from_pretrained(
119121
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
120122
token = use_auth_token
121123

122-
model_name_or_path = kwargs.pop("model_name_or_path", None)
123-
if model_name_or_path is not None:
124-
logger.warning("`model_name_or_path` is deprecated please use `model_id`")
125-
model_id = model_id or model_name_or_path
126-
127124
model_path = Path(model_id)
128-
129-
if model_path.is_dir():
130-
model_cache_path = model_path / file_name
125+
is_local = model_path.is_dir()
126+
model_cache_path = None
127+
inc_config = None
128+
msg = None
129+
if is_local:
130+
if (model_path / subfolder / SAFE_WEIGHTS_NAME).is_file():
131+
file_name = SAFE_WEIGHTS_NAME
132+
elif not (model_path / subfolder / file_name).is_file():
133+
raise EnvironmentError(
134+
f"Error no file named {SAFE_WEIGHTS_NAME} or {file_name} found in directory {model_path / subfolder}"
135+
)
136+
model_cache_path = model_path / subfolder / file_name
131137
else:
132-
model_cache_path = hf_hub_download(
133-
repo_id=model_id,
134-
filename=file_name,
135-
subfolder=subfolder,
136-
token=token,
137-
revision=revision,
138-
cache_dir=cache_dir,
139-
force_download=force_download,
140-
local_files_only=local_files_only,
141-
)
138+
# Try download safetensors if exist
139+
try:
140+
model_cache_path = hf_hub_download(
141+
repo_id=model_id,
142+
filename=SAFE_WEIGHTS_NAME,
143+
subfolder=subfolder,
144+
token=token,
145+
revision=revision,
146+
cache_dir=cache_dir,
147+
force_download=force_download,
148+
local_files_only=local_files_only,
149+
)
150+
except EntryNotFoundError:
151+
pass
152+
153+
if model_cache_path is None:
154+
model_cache_path = hf_hub_download(
155+
repo_id=model_id,
156+
filename=file_name,
157+
subfolder=subfolder,
158+
token=token,
159+
revision=revision,
160+
cache_dir=cache_dir,
161+
force_download=force_download,
162+
local_files_only=local_files_only,
163+
)
142164

143165
model_save_dir = Path(model_cache_path).parent
144-
inc_config = None
145-
msg = None
166+
146167
if is_itrex_available():
147-
try:
148-
quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
168+
quantization_config_path = None
169+
if is_local:
170+
quantization_config_path = model_path / subfolder / QUANTIZATION_CONFIG_NAME
171+
else:
172+
try:
173+
quantization_config_path = hf_hub_download(
174+
repo_id=model_id,
175+
filename=QUANTIZATION_CONFIG_NAME,
176+
subfolder=subfolder,
177+
token=token,
178+
revision=revision,
179+
cache_dir=cache_dir,
180+
force_download=force_download,
181+
local_files_only=local_files_only,
182+
)
183+
except EntryNotFoundError:
184+
pass
185+
186+
if quantization_config_path and Path(quantization_config_path).is_file():
187+
quantization_config = PretrainedConfig.from_pretrained(quantization_config_path)
149188
algorithm = getattr(quantization_config, "quant_method", None)
150189
if algorithm in {"rtn", "gptq", "awq", "autoround"}:
151190
from intel_extension_for_transformers.transformers.modeling.modeling_auto import (
@@ -154,7 +193,7 @@ def _from_pretrained(
154193

155194
_BaseQBitsAutoModelClass.ORIG_MODEL = cls.auto_model_class
156195

157-
return _BaseQBitsAutoModelClass.from_pretrained(
196+
model = _BaseQBitsAutoModelClass.from_pretrained(
158197
pretrained_model_name_or_path=model_id,
159198
token=token,
160199
revision=revision,
@@ -163,12 +202,16 @@ def _from_pretrained(
163202
local_files_only=local_files_only,
164203
subfolder=subfolder,
165204
trust_remote_code=trust_remote_code,
205+
use_neural_speed=False,
166206
**kwargs,
167207
)
168-
except EnvironmentError:
169-
msg = "The model is not quantized with weight-only quantization."
208+
209+
return cls(
210+
model, config=config, model_save_dir=model_save_dir, q_config=quantization_config, **kwargs
211+
)
212+
170213
try:
171-
inc_config = INCConfig.from_pretrained(model_id)
214+
inc_config = INCConfig.from_pretrained(model_id, subfolder=subfolder, revision=revision)
172215
if not is_torch_version("==", inc_config.torch_version):
173216
msg = f"Quantized model was obtained with torch version {inc_config.torch_version} but {_torch_version} was found."
174217
logger.warning(f"{msg}")
@@ -209,15 +252,19 @@ def _from_pretrained(
209252
)
210253

211254
def _save_pretrained(self, save_directory: Union[str, Path]):
212-
output_path = os.path.join(save_directory, WEIGHTS_NAME)
213-
214255
if isinstance(self.model, torch.nn.Module):
215-
state_dict = self.model.state_dict()
216-
if self._q_config:
217-
state_dict["best_configure"] = self._q_config
218-
torch.save(state_dict, output_path)
256+
# For ITREX model
257+
if isinstance(self._q_config, PretrainedConfig):
258+
self._q_config.to_json_file(os.path.join(save_directory, QUANTIZATION_CONFIG_NAME))
259+
self.model.save_pretrained(save_directory)
260+
# For INC model the state dictionary needs to be modified to include the quantization parameters
261+
else:
262+
state_dict = self.model.state_dict()
263+
if isinstance(self._q_config, dict):
264+
state_dict["best_configure"] = self._q_config
265+
torch.save(state_dict, os.path.join(save_directory, WEIGHTS_NAME))
219266
else:
220-
torch.jit.save(self.model, output_path)
267+
torch.jit.save(self.model, os.path.join(save_directory, WEIGHTS_NAME))
221268

222269
if self.inc_config:
223270
self.inc_config.save_pretrained(save_directory)

optimum/intel/neural_compressor/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929

3030
CONFIG_NAME = "best_configure.yaml"
31+
QUANTIZATION_CONFIG_NAME = "quantize_config.json"
3132

3233
NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
3334
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"

tests/neural_compressor/test_modeling.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
import os
1717
import tempfile
1818
import unittest
19+
from pathlib import Path
1920

2021
import torch
2122
from parameterized import parameterized
2223
from transformers import AutoTokenizer, pipeline, set_seed
24+
from transformers.utils import SAFE_WEIGHTS_NAME
2325

2426
from optimum.exporters import TasksManager
2527
from optimum.intel import ( # noqa
@@ -37,7 +39,8 @@
3739
INCStableDiffusionPipeline,
3840
INCTrainer,
3941
)
40-
from optimum.intel.neural_compressor.utils import _HEAD_TO_AUTOMODELS, WEIGHTS_NAME
42+
from optimum.intel.neural_compressor.utils import _HEAD_TO_AUTOMODELS, QUANTIZATION_CONFIG_NAME, WEIGHTS_NAME
43+
from optimum.intel.utils.import_utils import is_itrex_available
4144

4245

4346
os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -52,7 +55,7 @@
5255

5356

5457
MODEL_NAMES_TO_TASK = (
55-
("hf-internal-testing/tiny-random-gpt2", "text-generation"),
58+
("hf-internal-testing/tiny-random-GPT2LMHeadModel", "text-generation"),
5659
("hf-internal-testing/tiny-random-BertForMaskedLM", "fill-mask"),
5760
("hf-internal-testing/tiny-random-DistilBertForSequenceClassification", "text-classification"),
5861
("hf-internal-testing/tiny-random-DebertaV2Model", "feature-extraction"),
@@ -86,7 +89,7 @@ def test_compare_to_transformers(self, model_id, task):
8689
outputs = inc_model(**model_inputs)
8790
with tempfile.TemporaryDirectory() as tmpdirname:
8891
inc_model.save_pretrained(tmpdirname)
89-
loaded_model = model_class.from_pretrained(tmpdirname, file_name=WEIGHTS_NAME)
92+
loaded_model = model_class.from_pretrained(tmpdirname)
9093
outputs_loaded = loaded_model(**model_inputs)
9194

9295
if task == "feature-extraction":
@@ -143,3 +146,57 @@ def test_compare_with_and_without_past_key_values(self):
143146
self.assertEqual(outputs_with_pkv.shape[1], self.GENERATION_LENGTH)
144147
self.assertEqual(outputs_without_pkv.shape[1], self.GENERATION_LENGTH)
145148
self.assertTrue(torch.equal(outputs_with_pkv, outputs_without_pkv))
149+
150+
@unittest.skipIf(not is_itrex_available(), reason="ITREX not available")
151+
def test_saving_loading_woq_itrex_model(self):
152+
model_name = "echarlaix/tiny-random-PhiForCausalLM"
153+
subfolder = "itrex"
154+
model = INCModelForCausalLM.from_pretrained(model_name, revision="itrex", subfolder=subfolder)
155+
tokenizer = AutoTokenizer.from_pretrained(model_name, revision="itrex")
156+
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
157+
tokens = tokenizer("This is a sample output", return_tensors="pt")
158+
159+
with tempfile.TemporaryDirectory() as tmp_dir:
160+
model_save_dir = Path(tmp_dir) / subfolder
161+
model.save_pretrained(model_save_dir)
162+
folder_contents = os.listdir(model_save_dir)
163+
self.assertIn(SAFE_WEIGHTS_NAME, folder_contents)
164+
self.assertIn(QUANTIZATION_CONFIG_NAME, folder_contents)
165+
loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir, subfolder=subfolder)
166+
167+
with torch.no_grad():
168+
outputs = model(**tokens)
169+
loaded_outputs = loaded_model(**tokens)
170+
171+
self.assertTrue("logits" in loaded_outputs)
172+
self.assertIsInstance(loaded_outputs.logits, torch.Tensor)
173+
self.assertTrue("past_key_values" in loaded_outputs)
174+
self.assertIsInstance(loaded_outputs.past_key_values, tuple)
175+
self.assertTrue(torch.allclose(outputs.logits, loaded_outputs.logits, atol=1e-5))
176+
177+
def test_saving_loading_inc_model(self):
178+
model_name = "echarlaix/tiny-random-PhiForCausalLM"
179+
subfolder = "inc"
180+
model = INCModelForCausalLM.from_pretrained(model_name, revision="inc", subfolder=subfolder)
181+
tokenizer = AutoTokenizer.from_pretrained(model_name, revision="inc")
182+
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
183+
tokens = tokenizer("This is a sample output", return_tensors="pt")
184+
185+
with tempfile.TemporaryDirectory() as tmp_dir:
186+
model_save_dir = Path(tmp_dir) / subfolder
187+
model.save_pretrained(model_save_dir)
188+
folder_contents = os.listdir(model_save_dir)
189+
self.assertIn(WEIGHTS_NAME, folder_contents)
190+
self.assertIn("inc_config.json", folder_contents)
191+
loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir, subfolder=subfolder)
192+
self.assertIsInstance(loaded_model.inc_config, INCConfig)
193+
194+
with torch.no_grad():
195+
outputs = model(**tokens)
196+
loaded_outputs = loaded_model(**tokens)
197+
198+
self.assertTrue("logits" in loaded_outputs)
199+
self.assertIsInstance(loaded_outputs.logits, torch.Tensor)
200+
self.assertTrue("past_key_values" in loaded_outputs)
201+
self.assertIsInstance(loaded_outputs.past_key_values, tuple)
202+
self.assertTrue(torch.allclose(outputs.logits, loaded_outputs.logits, atol=1e-5))

tests/neural_compressor/test_optimization.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset
4848
from optimum.intel.utils.import_utils import is_torch_version, is_itrex_available
4949

50-
5150
from optimum.intel import (
5251
INCConfig,
5352
INCModelForCausalLM,

tests/neural_compressor/utils_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
"electra": "hf-internal-testing/tiny-random-electra",
8282
"flaubert": "hf-internal-testing/tiny-random-flaubert",
8383
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
84-
"gpt2": "hf-internal-testing/tiny-random-gpt2",
84+
"gpt2": "hf-internal-testing/tiny-random-GPT2LMHeadModel",
8585
"gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
8686
"gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
8787
"gptj": "hf-internal-testing/tiny-random-GPTJModel",

0 commit comments

Comments
 (0)