Skip to content

Commit 1756a6d

Browse files
committed
wip stable diffusion for neuron
Signed-off-by: Raphael Glon <[email protected]>
1 parent cfe316e commit 1756a6d

File tree

2 files changed

+82
-13
lines changed

2 files changed

+82
-13
lines changed

src/huggingface_inference_toolkit/diffusers_utils.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import importlib.util
22
import logging
3+
import os
34

45
from transformers.utils.import_utils import is_torch_bf16_gpu_available
6+
from optimum import neuron
57

68
logger = logging.getLogger(__name__)
79
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO)
@@ -49,14 +51,64 @@ def __call__(
4951
out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs)
5052
return out.images[0]
5153

52-
53-
DIFFUSERS_TASKS = {
54-
"text-to-image": IEAutoPipelineForText2Image,
55-
}
56-
57-
58-
def get_diffusers_pipeline(task=None, model_dir=None, device=-1, **kwargs):
54+
#
55+
# DIFFUSERS_TASKS = {
56+
# "text-to-image": [NeuronStableDiffusionXLPipeline],
57+
# }
58+
59+
60+
def load_optimum_diffusion_pipeline(task, model_dir):
61+
62+
# Step 1: load config and look for _class_name
63+
try:
64+
config = StableDiffusionPipeline.load_config(pretrained_model_name_or_path=model_dir)
65+
except OSError as e:
66+
logger.error("Unable to load config file for repository %s", model_dir)
67+
logger.exception(e)
68+
raise
69+
70+
pipeline_class_name = config['_class_name']
71+
72+
logger.debug("Repository pipeline class name %s", pipeline_class_name)
73+
if pipeline_class_name.contains("Diffusion") and pipeline_class_name.contains("XL"):
74+
if task == "image-to-image":
75+
pipeline_class = neuron.NeuronStableDiffusionXLImg2ImgPipeline
76+
else:
77+
pipeline_class = neuron.NeuronStableDiffusionXLPipeline
78+
else:
79+
if task == "image-to-image":
80+
pipeline_class = neuron.NeuronStableDiffusionImg2ImgPipeline
81+
else:
82+
pipeline_class = neuron.NeuronStableDiffusionPipeline
83+
84+
logger.debug("Pipeline class %s", pipeline_class.__class__)
85+
86+
# if is neuron model, no need for additional kwargs
87+
if pipeline_class_name.contains("Neuron"):
88+
kwargs = {}
89+
else:
90+
# Model will be compiled and exported on the flight as the cached models cause a performance drop
91+
# for diffusion models, unless otherwise specified through an explicit env variable
92+
93+
# Image shapes need to be frozen at loading/compilation time
94+
compiler_args = {
95+
"auto_cast": "matmul",
96+
"auto_cast_type": "bf16",
97+
"inline_weights_to_neff": os.environ.get("INLINE_WEIGHTS_TO_NEFF",
98+
"false").lower() in ["false", "no", "0"],
99+
"data_parallel_mode": os.environ.get("DATA_PARALLEL_MODE", "unet")
100+
}
101+
input_shapes = {"batch_size": 1,
102+
"height": int(os.environ("IMAGE_HEIGHT", 512)),
103+
"width": int(os.environ("IMAGE_WIDTH", 512))}
104+
kwargs = {**compiler_args, **input_shapes, "export": True}
105+
106+
# In the second case, exporting can take a huge amount of time, which makes endpoints not a really suited solution
107+
# at least as long as the cache is not really an option for diffusion
108+
return pipeline_class(kwargs)
109+
110+
111+
def get_diffusers_pipeline(task=None, model_dir=None, **kwargs):
59112
"""Get a pipeline for Diffusers models."""
60-
device = "cuda" if device == 0 else "cpu"
61-
pipeline = DIFFUSERS_TASKS[task](model_dir=model_dir, device=device)
113+
pipeline = load_optimum_diffusion_pipeline(task=task, model_dir=model_dir)
62114
return pipeline

src/huggingface_inference_toolkit/utils.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434

3535
def is_optimum_available():
36-
return False
36+
return True
3737
# TODO: change when supported
3838
# return _optimum_available
3939

@@ -229,7 +229,7 @@ def get_pipeline(
229229
create pipeline class for a specific task based on local saved model
230230
"""
231231
device = get_device()
232-
logger.info(f"Using device { 'GPU' if device == 0 else 'CPU'}")
232+
logger.info(f"Using device { 'GPU' if device == 0 else 'CPU/TPU/Neuron...)'}")
233233

234234
if task is None:
235235
raise EnvironmentError(
@@ -265,11 +265,10 @@ def get_pipeline(
265265
device=device,
266266
**kwargs
267267
)
268-
elif is_diffusers_available() and task == "text-to-image":
268+
elif is_diffusers_available() and task in ["text-to-image", "image-to-image"]:
269269
hf_pipeline = get_diffusers_pipeline(
270270
task=task,
271271
model_dir=model_dir,
272-
device=device,
273272
**kwargs
274273
)
275274
else:
@@ -308,3 +307,21 @@ def convert_params_to_int_or_bool(params):
308307
if v == "true":
309308
params[k] = True
310309
return params
310+
311+
312+
# def local_model_card(model_dir: str) -> Optional[ModelCard]:
313+
#
314+
# logger.debug("Rebuilding offline model info for repo %s", model_dir)
315+
#
316+
# # Let's rebuild some partial model info from what we see in cache, info extracted should be enough
317+
# # for most use cases
318+
#
319+
# card_path = Path(model_dir) / "README.md"
320+
# if not card_path.exists():
321+
# logger.debug("Unable to build model info for directory %s", model_dir)
322+
# return None
323+
#
324+
# logger.debug("Loading model card from model readme %s", card_path)
325+
# model_card = ModelCard.load(card_path)
326+
# logger.info("Local repo %s, model card data %s", model_dir, model_card.data.to_dict())
327+
# return model_card

0 commit comments

Comments
 (0)