Skip to content

Commit a0c34a3

Browse files
enable "use_bettertransformer" and "torch_compile" in deepspeed initializer and enable it in huggingface pipeline approch (#51)
1 parent b0d9660 commit a0c34a3

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

llmserve/backend/llm/initializers/hf_transformers/deepspeed.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ def load_model(self, model_id: str) -> "PreTrainedModel":
176176
return model
177177

178178
def postprocess_model(self, model: "PreTrainedModel") -> "PreTrainedModel":
179+
if self.use_bettertransformer:
180+
from optimum.bettertransformer import BetterTransformer
181+
182+
logger.info("Transforming the model with BetterTransformer...")
183+
model = BetterTransformer.transform(model)
184+
179185
if self.use_meta_tensor:
180186
ds_kwargs = dict(base_dir=self._repo_root, checkpoint=self._checkpoints_json)
181187
else:

llmserve/backend/llm/pipelines/default_transformers_pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,14 @@ def from_initializer(
101101
logger.info(
102102
f"DefaultTransformersPipeline default_kwargs {default_kwargs}")
103103
logger.info(f"DefaultTransformersPipeline model_kwargs {extral_kwargs}")
104-
104+
105105
transformers_pipe = pipeline(
106106
**default_kwargs,
107107
**extral_kwargs,
108108
)
109109

110+
# use initializer to handle "use_bettertransformer" and "torch_compile"
111+
transformers_pipe.model = initializer.postprocess_model(transformers_pipe.model)
110112
pipe = cls(
111113
model=transformers_pipe.model,
112114
tokenizer=transformers_pipe.tokenizer,

0 commit comments

Comments
 (0)