huggingface
diff --git a/‎examples/server-async/DiffusersServer/Pipelines.py‎
Lines changed: 9 additions & 137 deletions b/‎examples/server-async/DiffusersServer/Pipelines.py‎
Lines changed: 9 additions & 137 deletions
diff --git a/‎examples/server-async/DiffusersServer/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/server-async/DiffusersServer/__init__.py‎
Lines changed: 0 additions & 1 deletion
@@ -1,13 +1,11 @@
 # Pipelines.py
-
 from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
 from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
 import torch
 import os
 import logging
 from pydantic import BaseModel
-import gc
 
 logger = logging.getLogger(__name__)
 
@@ -22,155 +20,36 @@ def __init__(self, model_path: str | None = None):
         self.model_path = model_path or os.getenv("MODEL_PATH")
         self.pipeline: StableDiffusion3Pipeline | None = None
         self.device: str | None = None
-        
+
     def start(self):
-        torch.set_float32_matmul_precision("high")
-        
-        if hasattr(torch._inductor, 'config'):
-            if hasattr(torch._inductor.config, 'conv_1x1_as_mm'):
-                torch._inductor.config.conv_1x1_as_mm = True
-            if hasattr(torch._inductor.config, 'coordinate_descent_tuning'):
-                torch._inductor.config.coordinate_descent_tuning = True
-            if hasattr(torch._inductor.config, 'epilogue_fusion'):
-                torch._inductor.config.epilogue_fusion = False
-            if hasattr(torch._inductor.config, 'coordinate_descent_check_all_directions'):
-                torch._inductor.config.coordinate_descent_check_all_directions = True
-        
-        if torch.cuda.is_available():
-            torch.backends.cudnn.benchmark = True
-            torch.backends.cuda.matmul.allow_tf32 = True
-            torch.backends.cudnn.deterministic = False
-            torch.backends.cudnn.allow_tf32 = True
-        
         if torch.cuda.is_available():
             model_path = self.model_path or "stabilityai/stable-diffusion-3.5-large"
-            logger.info(f"Loading CUDA with model: {model_path}")
+            logger.info("Loading CUDA")
             self.device = "cuda"
-            
-            torch.cuda.empty_cache()
-            gc.collect()
-            
             self.pipeline = StableDiffusion3Pipeline.from_pretrained(
                 model_path,
                 torch_dtype=torch.float16,
-                use_safetensors=True,
-                variant="fp16" if "fp16" in model_path else None,
-                low_cpu_mem_usage=True,
-            )
-            
-            self.pipeline = self.pipeline.to(device=self.device)
-            
-            if hasattr(self.pipeline, 'enable_vae_slicing'):
-                self.pipeline.enable_vae_slicing()
-                logger.info("VAE slicing enabled - will reduce memory spikes during decoding")
-            
-            if hasattr(self.pipeline, 'enable_vae_tiling'):
-                self.pipeline.enable_vae_tiling()
-                logger.info("VAE tiling enabled - will allow processing larger images")
-            
-            if hasattr(self.pipeline, 'transformer') and self.pipeline.transformer is not None:
-                self.pipeline.transformer = self.pipeline.transformer.to(
-                    memory_format=torch.channels_last
-                )
-                logger.info("Transformer optimized with channels_last format")
-            
-            if hasattr(self.pipeline, 'vae') and self.pipeline.vae is not None:
-                self.pipeline.vae = self.pipeline.vae.to(
-                    memory_format=torch.channels_last
-                )
-                
-                if hasattr(self.pipeline.vae, 'enable_slicing'):
-                    self.pipeline.vae.enable_slicing()
-                    logger.info("VAE slicing activated directly in the VAE")
-                
-                if hasattr(self.pipeline.vae, 'enable_tiling'):
-                    self.pipeline.vae.enable_tiling()
-                    logger.info("VAE tiling activated directly on the VAE")
-                
-                logger.info("VAE optimized with channels_last format")
-            
-            try:
-                self.pipeline.enable_xformers_memory_efficient_attention()
-                logger.info("XFormers memory efficient attention enabled")
-            except Exception as e:
-                logger.info(f"XFormers not available: {e}")
-            
-            logger.info("Skipping torch.compile - running without compile optimizations by design")
-            
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            
-            logger.info("CUDA pipeline fully optimized and ready")
-            
+            ).to(device=self.device)
         elif torch.backends.mps.is_available():
             model_path = self.model_path or "stabilityai/stable-diffusion-3.5-medium"
-            logger.info(f"Loading MPS for Mac M Series with model: {model_path}")
+            logger.info("Loading MPS for Mac M Series")
             self.device = "mps"
-            
             self.pipeline = StableDiffusion3Pipeline.from_pretrained(
                 model_path,
                 torch_dtype=torch.bfloat16,
-                use_safetensors=True,
-                low_cpu_mem_usage=True,
             ).to(device=self.device)
-            
-            if hasattr(self.pipeline, 'enable_vae_slicing'):
-                self.pipeline.enable_vae_slicing()
-                logger.info("VAE slicing enabled in MPS")
-            
-            if hasattr(self.pipeline, 'transformer') and self.pipeline.transformer is not None:
-                self.pipeline.transformer = self.pipeline.transformer.to(
-                    memory_format=torch.channels_last
-                )
-            
-            if hasattr(self.pipeline, 'vae') and self.pipeline.vae is not None:
-                self.pipeline.vae = self.pipeline.vae.to(
-                    memory_format=torch.channels_last
-                )
-                
-            logger.info("MPS pipeline optimized and ready")
-            
         else:
             raise Exception("No CUDA or MPS device available")
-        
-
-        self._warmup()
-        
-        logger.info("Pipeline initialization completed successfully")
-    
-    def _warmup(self):
-        if self.pipeline:
-            logger.info("Running warmup inference...")
-            with torch.no_grad():
-                _ = self.pipeline(
-                    prompt="warmup",
-                    num_inference_steps=1,
-                    height=512,
-                    width=512,
-                    guidance_scale=1.0,
-                )
-            
-            if self.device == "cuda":
-                torch.cuda.synchronize()
-                torch.cuda.empty_cache()
-            
-            gc.collect()
-            logger.info("Warmup completed with memory cleanup")
 
 class TextToImagePipelineFlux:
     def __init__(self, model_path: str | None = None, low_vram: bool = False):
-        """
-        Inicialización de la clase con la ruta del modelo.
-        Si no se proporciona, se obtiene de la variable de entorno.
-        """
         self.model_path = model_path or os.getenv("MODEL_PATH")
-        self.pipeline: FluxPipeline = None
-        self.device: str = None
+        self.pipeline: FluxPipeline | None = None
+        self.device: str | None = None
         self.low_vram = low_vram
 
     def start(self):
         if torch.cuda.is_available():
-            # Si no se definió model_path, se asigna el valor por defecto para CUDA.
             model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
             logger.info("Loading CUDA")
             self.device = "cuda" 
@@ -183,7 +62,6 @@ def start(self):
             else:
                 pass
         elif torch.backends.mps.is_available():
-            # Si no se definió model_path, se asigna el valor por defecto para MPS.
             model_path = self.model_path or "black-forest-labs/FLUX.1-schnell"
             logger.info("Loading MPS for Mac M Series")
             self.device = "mps"
@@ -196,17 +74,12 @@ def start(self):
 
 class TextToImagePipelineSD:
     def __init__(self, model_path: str | None = None):
-        """
-        Inicialización de la clase con la ruta del modelo.
-        Si no se proporciona, se obtiene de la variable de entorno.
-        """
         self.model_path = model_path or os.getenv("MODEL_PATH")
-        self.pipeline: StableDiffusionPipeline = None
-        self.device: str = None
+        self.pipeline: StableDiffusionPipeline | None = None
+        self.device: str | None = None
 
     def start(self):
         if torch.cuda.is_available():
-            # Si no se definió model_path, se asigna el valor por defecto para CUDA.
             model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
             logger.info("Loading CUDA")
             self.device = "cuda" 
@@ -215,7 +88,6 @@ def start(self):
                 torch_dtype=torch.float16,
             ).to(device=self.device)
         elif torch.backends.mps.is_available():
-            # Si no se definió model_path, se asigna el valor por defecto para MPS.
             model_path = self.model_path or "sd-legacy/stable-diffusion-v1-5"
             logger.info("Loading MPS for Mac M Series")
             self.device = "mps"
@@ -224,4 +96,4 @@ def start(self):
                 torch_dtype=torch.float16,
             ).to(device=self.device)
         else:
-            raise Exception("No CUDA or MPS device available")
+            raise Exception("No CUDA or MPS device available")
@@ -1,3 +1,2 @@
 from .Pipelines import TextToImagePipelineSD3
-from .superpipeline import SuperPipelinesT2Img
 from .create_server import create_inference_server_Async as DiffusersServerApp
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`from .Pipelines import TextToImagePipelineSD3`
`2`		`-from .superpipeline import SuperPipelinesT2Img`
`3`	`2`	`from .create_server import create_inference_server_Async as DiffusersServerApp`