huggingface · sayakpaul · Sep 18, 2025 · Sep 6, 2025 · Sep 7, 2025 · Sep 7, 2025
diff --git a/examples/server-async/DiffusersServer/Pipelines.py b/examples/server-async/DiffusersServer/Pipelines.py
@@ -0,0 +1,99 @@
+# Pipelines.py
+from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
+from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
+import torch
+import os
+import logging
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+class TextToImageInput(BaseModel):
+    model: str
+    prompt: str
+    size: str | None = None
+    n: int | None = None
+
+class TextToImagePipelineSD3:
+    def __init__(self, model_path: str | None = None):
+        self.model_path = model_path or os.getenv("MODEL_PATH")
+        self.pipeline: StableDiffusion3Pipeline | None = None
+        self.device: str | None = None
+
+    def start(self):
+        if torch.cuda.is_available():
+            model_path = self.model_path or "stabilityai/stable-diffusion-3.5-large"
+            logger.info("Loading CUDA")
+            self.device = "cuda"
+            self.pipeline = StableDiffusion3Pipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.float16,
+            ).to(device=self.device)
+        elif torch.backends.mps.is_available():
+            model_path = self.model_path or "stabilityai/stable-diffusion-3.5-medium"
+            logger.info("Loading MPS for Mac M Series")
+            self.device = "mps"
+            self.pipeline = StableDiffusion3Pipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.bfloat16,
+            ).to(device=self.device)
+        else:
+            raise Exception("No CUDA or MPS device available")
+
+class TextToImagePipelineFlux:
+    def __init__(self, model_path: str | None = None, low_vram: bool = False):
+        self.model_path = model_path or os.getenv("MODEL_PATH")
+        self.pipeline: FluxPipeline | None = None
+        self.device: str | None = None
+        self.low_vram = low_vram
+
+    def start(self):
+        if torch.cuda.is_available():
+            model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
+            logger.info("Loading CUDA")
+            self.device = "cuda" 
+            self.pipeline = FluxPipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.bfloat16,
+            ).to(device=self.device)
+            if self.low_vram:
+                self.pipeline.enable_model_cpu_offload()
+            else:
+                pass
+        elif torch.backends.mps.is_available():
+            model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
+            logger.info("Loading MPS for Mac M Series")
+            self.device = "mps"
+            self.pipeline = FluxPipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.bfloat16,
+            ).to(device=self.device)
+        else:
+            raise Exception("No CUDA or MPS device available")
+
+class TextToImagePipelineSD:
+    def __init__(self, model_path: str | None = None):
+        self.model_path = model_path or os.getenv("MODEL_PATH")
+        self.pipeline: StableDiffusionPipeline | None = None
+        self.device: str | None = None
+
+    def start(self):
+        if torch.cuda.is_available():
+            model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
+            logger.info("Loading CUDA")
+            self.device = "cuda" 
+            self.pipeline = StableDiffusionPipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.float16,
+            ).to(device=self.device)
+        elif torch.backends.mps.is_available():
+            model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
+            logger.info("Loading MPS for Mac M Series")
+            self.device = "mps"
+            self.pipeline = StableDiffusionPipeline.from_pretrained(
+                model_path,
+                torch_dtype=torch.float16,
+            ).to(device=self.device)
+        else:
+            raise Exception("No CUDA or MPS device available")
diff --git a/examples/server-async/DiffusersServer/__init__.py b/examples/server-async/DiffusersServer/__init__.py
@@ -0,0 +1,2 @@
+from .Pipelines import TextToImagePipelineSD3
+from .create_server import create_inference_server_Async as DiffusersServerApp
diff --git a/examples/server-async/DiffusersServer/create_server.py b/examples/server-async/DiffusersServer/create_server.py
@@ -0,0 +1,45 @@
+# create_server.py
+
+from .Pipelines import *
+from .serverasync import *
+from .uvicorn_diffu import *
+import asyncio
+
+def create_inference_server_Async(
+    model:str,
+    type_model: str = 't2im',
+    host: str = '0.0.0.0',
+    port: int = 8500,
+    threads=5,
+    enable_memory_monitor=True,
+    custom_model: bool = False,
+    custom_pipeline: Optional[Type] | None = None,
+    constructor_pipeline: Optional[Type] | None = None,
+    components: Optional[Dict[str, Any]] = None,
+    api_name: Optional[str] = 'custom_api',
+    torch_dtype = torch.bfloat16
+):
+    config = ServerConfigModels(
+        model=model,
+        type_models=type_model,
+        custom_model=custom_model,
+        custom_pipeline=custom_pipeline,
+        constructor_pipeline=constructor_pipeline,
+        components=components,
+        api_name=api_name,
+        torch_dtype=torch_dtype,
+        host=host,
+        port=port
+    )
+
+    app = create_app_fastapi(config)
+
+    asyncio.run(run_uvicorn_server(
+        app, 
+        host=host, 
+        port=port, 
+        workers=threads,
+        enable_memory_monitor=enable_memory_monitor
+    ))
+
+    return app
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .Pipelines import TextToImagePipelineSD3
		from .create_server import create_inference_server_Async as DiffusersServerApp