Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Copyright (c) 2023 Apple Inc.
Copyright (c) 2024 MediaTek Inc.
Copyright 2023 NXP
Copyright (c) 2025 Samsung Electronics Co. LTD
Copyright (c) Intel Corporation

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Expand Down
9 changes: 9 additions & 0 deletions examples/models/stable_diffusion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Intel Corporation
#
# Licensed under the BSD License (the "License"); you may not use this file
# except in compliance with the License. See the license file found in the
# LICENSE file in the root directory of this source tree.

from .model import LCMModelLoader, TextEncoderWrapper, UNetWrapper, VAEDecoder

__all__ = ["LCMModelLoader", "TextEncoderWrapper", "UNetWrapper", "VAEDecoder"]
189 changes: 189 additions & 0 deletions examples/models/stable_diffusion/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Copyright (c) Intel Corporation
#
# Licensed under the BSD License (the "License"); you may not use this file
# except in compliance with the License. See the license file found in the
# LICENSE file in the root directory of this source tree.

"""
Stable Diffusion / LCM model definitions.

This module provides reusable model wrappers that can be used with any backend
(OpenVINO, XNNPACK, etc.) for exporting Latent Consistency Models.
"""

import logging
from typing import Any, Optional

import torch

try:
from diffusers import DiffusionPipeline
except ImportError:
raise ImportError(
"Please install diffusers and transformers: pip install diffusers transformers"
)

logger = logging.getLogger(__name__)


class TextEncoderWrapper(torch.nn.Module):
"""Wrapper for CLIP text encoder that extracts last_hidden_state"""

def __init__(self, text_encoder):
super().__init__()
self.text_encoder = text_encoder

def forward(self, input_ids):
# Call text encoder and extract last_hidden_state
output = self.text_encoder(input_ids, return_dict=True)
return output.last_hidden_state


class UNetWrapper(torch.nn.Module):
"""Wrapper for UNet that extracts sample tensor from output"""

def __init__(self, unet):
super().__init__()
self.unet = unet

def forward(self, latents, timestep, encoder_hidden_states):
# Call UNet and extract sample from the output
output = self.unet(latents, timestep, encoder_hidden_states, return_dict=True)
return output.sample


class VAEDecoder(torch.nn.Module):
"""Wrapper for VAE decoder with scaling and normalization"""

def __init__(self, vae):
super().__init__()
self.vae = vae

def forward(self, latents):
# Scale latents
latents = latents / self.vae.config.scaling_factor
# Decode
image = self.vae.decode(latents).sample
# Scale to [0, 1]
image = (image / 2 + 0.5).clamp(0, 1)
return image


class LCMModelLoader:
"""
Backend-agnostic loader for Latent Consistency Model components.

This class handles loading the LCM pipeline from HuggingFace and extracting
individual components (text_encoder, unet, vae) as PyTorch modules ready
for export to any backend.
"""

def __init__(
self,
model_id: str = "SimianLuo/LCM_Dreamshaper_v7",
dtype: torch.dtype = torch.float16,
):
"""
Initialize the LCM model loader.

Args:
model_id: HuggingFace model ID for the LCM model
dtype: Target dtype for the models (fp16 or fp32)
"""
self.model_id = model_id
self.dtype = dtype
self.pipeline: Optional[DiffusionPipeline] = None
self.text_encoder: Any = None
self.unet: Any = None
self.vae: Any = None
self.tokenizer: Any = None

def load_models(self) -> bool:
"""
Load the LCM pipeline and extract components.

Returns:
True if successful, False otherwise
"""
try:
logger.info(f"Loading LCM pipeline: {self.model_id} (dtype: {self.dtype})")
self.pipeline = DiffusionPipeline.from_pretrained(
self.model_id, use_safetensors=True
)

# Extract individual components and convert to desired dtype
self.text_encoder = self.pipeline.text_encoder.to(dtype=self.dtype)
self.unet = self.pipeline.unet.to(dtype=self.dtype)
self.vae = self.pipeline.vae.to(dtype=self.dtype)
self.tokenizer = self.pipeline.tokenizer

# Set models to evaluation mode
self.text_encoder.eval()
self.unet.eval()
self.vae.eval()

logger.info("Successfully loaded all LCM model components")
return True

except Exception as e:
logger.error(f"Failed to load models: {e}")
import traceback

traceback.print_exc()
return False

def get_text_encoder_wrapper(self) -> TextEncoderWrapper:
"""Get wrapped text encoder ready for export"""
if self.text_encoder is None:
raise ValueError("Models not loaded. Call load_models() first.")
return TextEncoderWrapper(self.text_encoder)

def get_unet_wrapper(self) -> UNetWrapper:
"""Get wrapped UNet ready for export"""
if self.unet is None:
raise ValueError("Models not loaded. Call load_models() first.")
return UNetWrapper(self.unet)

def get_vae_decoder(self) -> VAEDecoder:
"""Get wrapped VAE decoder ready for export"""
if self.vae is None:
raise ValueError("Models not loaded. Call load_models() first.")
return VAEDecoder(self.vae)

def get_dummy_inputs(self):
"""
Get dummy inputs for each model component.

Returns:
Dictionary with dummy inputs for text_encoder, unet, and vae_decoder
"""
if self.unet is None:
raise ValueError("Models not loaded. Call load_models() first.")

# Text encoder dummy input
text_encoder_input = torch.ones(1, 77, dtype=torch.long)

# UNet dummy inputs
batch_size = 1
latent_channels = 4
latent_height = 64
latent_width = 64
text_embed_dim = self.unet.config.cross_attention_dim
text_seq_len = 77

unet_inputs = (
torch.randn(
batch_size, latent_channels, latent_height, latent_width, dtype=self.dtype
),
torch.tensor([981]), # Random timestep
torch.randn(batch_size, text_seq_len, text_embed_dim, dtype=self.dtype),
)

# VAE decoder dummy input
vae_input = torch.randn(1, 4, 64, 64, dtype=self.dtype)

return {
"text_encoder": (text_encoder_input,),
"unet": unet_inputs,
"vae_decoder": (vae_input,),
}
48 changes: 48 additions & 0 deletions examples/openvino/stable_diffusion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Stable Diffusion LCM with OpenVINO Backend

This example demonstrates how to run Latent Consistency Models (LCM) for fast text-to-image generation on Intel hardware using ExecuTorch with the OpenVINO backend.

## Overview

Latent Consistency Models (LCMs) are optimized diffusion models that generate high-quality images in just 4-8 steps, compared to 25-50 steps required by traditional Stable Diffusion models.

## Environment Setup
Follow the [instructions](../../../backends/openvino/README.md) of **Prerequisites** and **Setup** in `backends/openvino/README.md` to set up the OpenVINO backend.

### Install dependencies
```bash
pip install -r requirements.txt
```

## Export the Model

Export the LCM model:

```bash
python export_lcm.py \
--model_id SimianLuo/LCM_Dreamshaper_v7 \
--output_dir ./lcm_models \
--device CPU \
--dtype fp16
```
This will create three files in `./lcm_models/`:
- `text_encoder.pte`
- `unet.pte`
- `vae_decoder.pte`

### Generate Images

Run inference with the exported model:

```bash
python openvino_lcm.py \
--models_dir ./lcm_models \
--prompt "a beautiful sunset over mountains" \
--steps 4 \
--dtype fp16
```
## Supported Models

This implementation supports LCM-based Stable Diffusion models:
- **SimianLuo/LCM_Dreamshaper_v7**
- **latent-consistency/lcm-sdxl**
Loading
Loading