|
1 | 1 | import importlib.util |
2 | 2 | import logging |
| 3 | +import os |
3 | 4 |
|
4 | 5 | from transformers.utils.import_utils import is_torch_bf16_gpu_available |
| 6 | +from optimum import neuron |
5 | 7 |
|
6 | 8 | logger = logging.getLogger(__name__) |
7 | 9 | logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) |
@@ -49,14 +51,64 @@ def __call__( |
49 | 51 | out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs) |
50 | 52 | return out.images[0] |
51 | 53 |
|
52 | | - |
53 | | -DIFFUSERS_TASKS = { |
54 | | - "text-to-image": IEAutoPipelineForText2Image, |
55 | | -} |
56 | | - |
57 | | - |
58 | | -def get_diffusers_pipeline(task=None, model_dir=None, device=-1, **kwargs): |
| 54 | +# |
| 55 | +# DIFFUSERS_TASKS = { |
| 56 | +# "text-to-image": [NeuronStableDiffusionXLPipeline], |
| 57 | +# } |
| 58 | + |
| 59 | + |
| 60 | +def load_optimum_diffusion_pipeline(task, model_dir): |
| 61 | + |
| 62 | + # Step 1: load config and look for _class_name |
| 63 | + try: |
| 64 | + config = StableDiffusionPipeline.load_config(pretrained_model_name_or_path=model_dir) |
| 65 | + except OSError as e: |
| 66 | + logger.error("Unable to load config file for repository %s", model_dir) |
| 67 | + logger.exception(e) |
| 68 | + raise |
| 69 | + |
| 70 | + pipeline_class_name = config['_class_name'] |
| 71 | + |
| 72 | + logger.debug("Repository pipeline class name %s", pipeline_class_name) |
| 73 | + if pipeline_class_name.contains("Diffusion") and pipeline_class_name.contains("XL"): |
| 74 | + if task == "image-to-image": |
| 75 | + pipeline_class = neuron.NeuronStableDiffusionXLImg2ImgPipeline |
| 76 | + else: |
| 77 | + pipeline_class = neuron.NeuronStableDiffusionXLPipeline |
| 78 | + else: |
| 79 | + if task == "image-to-image": |
| 80 | + pipeline_class = neuron.NeuronStableDiffusionImg2ImgPipeline |
| 81 | + else: |
| 82 | + pipeline_class = neuron.NeuronStableDiffusionPipeline |
| 83 | + |
| 84 | + logger.debug("Pipeline class %s", pipeline_class.__class__) |
| 85 | + |
| 86 | + # if is neuron model, no need for additional kwargs |
| 87 | + if pipeline_class_name.contains("Neuron"): |
| 88 | + kwargs = {} |
| 89 | + else: |
| 90 | + # Model will be compiled and exported on the flight as the cached models cause a performance drop |
| 91 | + # for diffusion models, unless otherwise specified through an explicit env variable |
| 92 | + |
| 93 | + # Image shapes need to be frozen at loading/compilation time |
| 94 | + compiler_args = { |
| 95 | + "auto_cast": "matmul", |
| 96 | + "auto_cast_type": "bf16", |
| 97 | + "inline_weights_to_neff": os.environ.get("INLINE_WEIGHTS_TO_NEFF", |
| 98 | + "false").lower() in ["false", "no", "0"], |
| 99 | + "data_parallel_mode": os.environ.get("DATA_PARALLEL_MODE", "unet") |
| 100 | + } |
| 101 | + input_shapes = {"batch_size": 1, |
| 102 | + "height": int(os.environ("IMAGE_HEIGHT", 512)), |
| 103 | + "width": int(os.environ("IMAGE_WIDTH", 512))} |
| 104 | + kwargs = {**compiler_args, **input_shapes, "export": True} |
| 105 | + |
| 106 | + # In the second case, exporting can take a huge amount of time, which makes endpoints not a really suited solution |
| 107 | + # at least as long as the cache is not really an option for diffusion |
| 108 | + return pipeline_class(kwargs) |
| 109 | + |
| 110 | + |
| 111 | +def get_diffusers_pipeline(task=None, model_dir=None, **kwargs): |
59 | 112 | """Get a pipeline for Diffusers models.""" |
60 | | - device = "cuda" if device == 0 else "cpu" |
61 | | - pipeline = DIFFUSERS_TASKS[task](model_dir=model_dir, device=device) |
| 113 | + pipeline = load_optimum_diffusion_pipeline(task=task, model_dir=model_dir) |
62 | 114 | return pipeline |
0 commit comments