Skip to content
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
bbfc5f4
Basic implementation of request scheduling
FredyRivera-dev Sep 6, 2025
a308e3e
Basic editing in SD and Flux Pipelines
FredyRivera-dev Sep 7, 2025
4799b8e
Small Fix
FredyRivera-dev Sep 7, 2025
eda5847
Fix
FredyRivera-dev Sep 7, 2025
6b5e6be
Update for more pipelines
FredyRivera-dev Sep 7, 2025
df2933f
Add examples/server-async
FredyRivera-dev Sep 7, 2025
5c7c7c6
Add examples/server-async
FredyRivera-dev Sep 7, 2025
e3cd368
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 10, 2025
09bf796
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 10, 2025
bd3e48a
Updated RequestScopedPipeline to handle a single tokenizer lock to av…
FredyRivera-dev Sep 10, 2025
534710c
Fix
FredyRivera-dev Sep 10, 2025
4d7c64f
Fix _TokenizerLockWrapper
FredyRivera-dev Sep 10, 2025
18db9e6
Fix _TokenizerLockWrapper
FredyRivera-dev Sep 10, 2025
8f0efb1
Delete _TokenizerLockWrapper
FredyRivera-dev Sep 10, 2025
b479039
Fix tokenizer
FredyRivera-dev Sep 10, 2025
e676b34
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 10, 2025
0beab1c
Update examples/server-async
FredyRivera-dev Sep 11, 2025
840f0e4
Fix server-async
FredyRivera-dev Sep 11, 2025
bb41c2b
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 12, 2025
8a238c3
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 12, 2025
ed617fe
Optimizations in examples/server-async
FredyRivera-dev Sep 13, 2025
b052d27
We keep the implementation simple in examples/server-async
FredyRivera-dev Sep 14, 2025
0f63f4d
Update examples/server-async/README.md
FredyRivera-dev Sep 14, 2025
a9666b1
Update examples/server-async/README.md for changes to tokenizer locks…
FredyRivera-dev Sep 14, 2025
06bb136
The changes to the diffusers core have been undone and all logic is b…
FredyRivera-dev Sep 15, 2025
a519915
Update examples/server-async/utils/*
FredyRivera-dev Sep 15, 2025
7cfee77
Fix BaseAsyncScheduler
FredyRivera-dev Sep 15, 2025
e574f07
Rollback in the core of the diffusers
FredyRivera-dev Sep 15, 2025
05d7936
Merge branch 'huggingface:main' into main
FredyRivera-dev Sep 15, 2025
1049663
Update examples/server-async/README.md
FredyRivera-dev Sep 15, 2025
5316620
Complete rollback of diffusers core files
FredyRivera-dev Sep 15, 2025
0ecdfc3
Simple implementation of an asynchronous server compatible with SD3-3…
FredyRivera-dev Sep 17, 2025
ac5c9e6
Update examples/server-async/README.md
FredyRivera-dev Sep 17, 2025
72e0215
Fixed import errors in 'examples/server-async/serverasync.py'
FredyRivera-dev Sep 17, 2025
edd550b
Flux Pipeline Discard
FredyRivera-dev Sep 17, 2025
6b69367
Update examples/server-async/README.md
FredyRivera-dev Sep 17, 2025
5598557
Merge branch 'main' into main
sayakpaul Sep 18, 2025
7c4f883
Apply style fixes
github-actions[bot] Sep 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions examples/server-async/DiffusersServer/Pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Pipelines.py
from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
import torch
import os
import logging
from pydantic import BaseModel

logger = logging.getLogger(__name__)

class TextToImageInput(BaseModel):
model: str
prompt: str
size: str | None = None
n: int | None = None

class TextToImagePipelineSD3:
def __init__(self, model_path: str | None = None):
self.model_path = model_path or os.getenv("MODEL_PATH")
self.pipeline: StableDiffusion3Pipeline | None = None
self.device: str | None = None

def start(self):
if torch.cuda.is_available():
model_path = self.model_path or "stabilityai/stable-diffusion-3.5-large"
logger.info("Loading CUDA")
self.device = "cuda"
self.pipeline = StableDiffusion3Pipeline.from_pretrained(
model_path,
torch_dtype=torch.float16,
).to(device=self.device)
elif torch.backends.mps.is_available():
model_path = self.model_path or "stabilityai/stable-diffusion-3.5-medium"
logger.info("Loading MPS for Mac M Series")
self.device = "mps"
self.pipeline = StableDiffusion3Pipeline.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
).to(device=self.device)
else:
raise Exception("No CUDA or MPS device available")

class TextToImagePipelineFlux:
def __init__(self, model_path: str | None = None, low_vram: bool = False):
self.model_path = model_path or os.getenv("MODEL_PATH")
self.pipeline: FluxPipeline | None = None
self.device: str | None = None
self.low_vram = low_vram

def start(self):
if torch.cuda.is_available():
model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
logger.info("Loading CUDA")
self.device = "cuda"
self.pipeline = FluxPipeline.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
).to(device=self.device)
if self.low_vram:
self.pipeline.enable_model_cpu_offload()
else:
pass
elif torch.backends.mps.is_available():
model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
logger.info("Loading MPS for Mac M Series")
self.device = "mps"
self.pipeline = FluxPipeline.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
).to(device=self.device)
else:
raise Exception("No CUDA or MPS device available")

class TextToImagePipelineSD:
def __init__(self, model_path: str | None = None):
self.model_path = model_path or os.getenv("MODEL_PATH")
self.pipeline: StableDiffusionPipeline | None = None
self.device: str | None = None

def start(self):
if torch.cuda.is_available():
model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
logger.info("Loading CUDA")
self.device = "cuda"
self.pipeline = StableDiffusionPipeline.from_pretrained(
model_path,
torch_dtype=torch.float16,
).to(device=self.device)
elif torch.backends.mps.is_available():
model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
logger.info("Loading MPS for Mac M Series")
self.device = "mps"
self.pipeline = StableDiffusionPipeline.from_pretrained(
model_path,
torch_dtype=torch.float16,
).to(device=self.device)
else:
raise Exception("No CUDA or MPS device available")
2 changes: 2 additions & 0 deletions examples/server-async/DiffusersServer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .Pipelines import TextToImagePipelineSD3
from .create_server import create_inference_server_Async as DiffusersServerApp
45 changes: 45 additions & 0 deletions examples/server-async/DiffusersServer/create_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# create_server.py

from .Pipelines import *
from .serverasync import *
from .uvicorn_diffu import *
import asyncio

def create_inference_server_Async(
model:str,
type_model: str = 't2im',
host: str = '0.0.0.0',
port: int = 8500,
threads=5,
enable_memory_monitor=True,
custom_model: bool = False,
custom_pipeline: Optional[Type] | None = None,
constructor_pipeline: Optional[Type] | None = None,
components: Optional[Dict[str, Any]] = None,
api_name: Optional[str] = 'custom_api',
torch_dtype = torch.bfloat16
):
config = ServerConfigModels(
model=model,
type_models=type_model,
custom_model=custom_model,
custom_pipeline=custom_pipeline,
constructor_pipeline=constructor_pipeline,
components=components,
api_name=api_name,
torch_dtype=torch_dtype,
host=host,
port=port
)

app = create_app_fastapi(config)

asyncio.run(run_uvicorn_server(
app,
host=host,
port=port,
workers=threads,
enable_memory_monitor=enable_memory_monitor
))

return app
Loading