jkoelker
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 19 additions & 2 deletions b/‎AGENTS.md‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎src/oneiro/device.py‎
Lines changed: 93 additions & 0 deletions b/‎src/oneiro/device.py‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎src/oneiro/pipelines/base.py‎
Lines changed: 5 additions & 7 deletions b/‎src/oneiro/pipelines/base.py‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎src/oneiro/pipelines/civitai_checkpoint.py‎
Lines changed: 13 additions & 14 deletions b/‎src/oneiro/pipelines/civitai_checkpoint.py‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎src/oneiro/pipelines/flux1.py‎
Lines changed: 6 additions & 7 deletions b/‎src/oneiro/pipelines/flux1.py‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎src/oneiro/pipelines/flux2.py‎
Lines changed: 8 additions & 9 deletions b/‎src/oneiro/pipelines/flux2.py‎
Lines changed: 8 additions & 9 deletions
@@ -92,3 +92,6 @@ Thumbs.db
 
 # uv
 uv.lock
+
+.beads
+.gitattributes
@@ -6,8 +6,8 @@ Discord bot for image generation with Huggingface Diffusers.
 
 ```bash
 uv pip install -e ".[dev]"           # Install dev dependencies
-uv run --extra dev pytest -v         # Run all tests
-uv run --extra dev pytest tests/test_config.py -v  # Single file
+uv run --extra dev pytest            # Run all tests
+uv run --extra dev pytest tests/test_config.py   # Single file
 ruff check src/ --fix                # Lint + auto-fix
 ruff format src/                     # Format
 ```
@@ -137,3 +137,20 @@ select = ["E", "W", "F", "I", "B", "C4", "UP"]
 [tool.ruff.lint.isort]
 known-first-party = ["oneiro"]
 ```
+
+## Git Workflow
+
+- **Commits allowed** on atomic work units (single logical change)
+- **Never push** - leave pushing to the user
+- **Never `git add .`** - only stage specific files needed for the commit
+
+```bash
+# CORRECT: Stage specific files
+git add src/oneiro/config.py tests/test_config.py
+git commit -m "Add config hot reload support"
+
+# WRONG: Never do this
+git add .
+git add -A
+git push
+```
@@ -0,0 +1,93 @@
+"""Device management for pipeline placement."""
+
+from dataclasses import dataclass
+from enum import Enum
+
+import torch
+
+
+class OffloadMode(str, Enum):
+    """CPU offload behavior for CUDA pipelines."""
+
+    AUTO = "auto"  # Offload if CUDA available (default)
+    ALWAYS = "always"  # Require offload (error if not CUDA)
+    NEVER = "never"  # Never offload, use .to(device)
+
+
+@dataclass(frozen=True)
+class DevicePolicy:
+    """Immutable device configuration for pipeline placement.
+
+    Attributes:
+        device: Target device ("cuda", "mps", "cpu")
+        dtype: Torch dtype for model weights
+        offload: CPU offload behavior for large models
+    """
+
+    device: str
+    dtype: torch.dtype
+    offload: OffloadMode = OffloadMode.AUTO
+
+    @classmethod
+    def auto_detect(cls, cpu_offload: bool = True) -> "DevicePolicy":
+        """Create policy with auto-detected device and dtype.
+
+        Args:
+            cpu_offload: Enable CPU offloading when available (default: True)
+
+        Returns:
+            DevicePolicy configured for the best available device
+        """
+        if torch.cuda.is_available():
+            device = "cuda"
+            # Use bfloat16 only if supported, else float16
+            if torch.cuda.is_bf16_supported():
+                dtype = torch.bfloat16
+            else:
+                dtype = torch.float16
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            device = "mps"
+            dtype = torch.float32  # MPS works best with float32
+        else:
+            device = "cpu"
+            dtype = torch.float32
+
+        offload = OffloadMode.AUTO if cpu_offload else OffloadMode.NEVER
+        return cls(device=device, dtype=dtype, offload=offload)
+
+    def apply_to_pipeline(self, pipe) -> None:
+        """Apply device policy to a diffusers pipeline.
+
+        Args:
+            pipe: A diffusers pipeline instance
+
+        Raises:
+            ValueError: If offload=ALWAYS but device is not CUDA
+        """
+        should_offload = self.offload == OffloadMode.ALWAYS or (
+            self.offload == OffloadMode.AUTO and self.device == "cuda"
+        )
+
+        if should_offload:
+            if self.device != "cuda":
+                raise ValueError(
+                    f"CPU offload requires CUDA device, got '{self.device}'. "
+                    f"Set cpu_offload=false in config or use a CUDA-enabled system."
+                )
+            pipe.enable_model_cpu_offload()
+        elif self.device != "cpu":
+            pipe.to(self.device)
+        # CPU: no action needed, pipeline stays on CPU
+
+    @staticmethod
+    def clear_cache() -> None:
+        """Clear device memory cache if applicable."""
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+            torch.cuda.empty_cache()
+        elif (
+            hasattr(torch.backends, "mps")
+            and torch.backends.mps.is_available()
+            and hasattr(torch.mps, "empty_cache")
+        ):
+            torch.mps.empty_cache()
@@ -11,6 +11,8 @@
 import torch
 from PIL import Image
 
+from oneiro.device import DevicePolicy
+
 
 @dataclass
 class GenerationResult:
@@ -29,10 +31,9 @@ class GenerationResult:
 class BasePipeline(ABC):
     """Base class for all pipeline types."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.pipe: Any = None
-        self._device = "cuda" if torch.cuda.is_available() else "cpu"
-        self._dtype = torch.bfloat16 if self._device == "cuda" else torch.float32
+        self.policy: DevicePolicy = DevicePolicy.auto_detect()
 
     @abstractmethod
     def load(self, model_config: dict[str, Any], full_config: dict[str, Any] | None = None) -> None:
@@ -68,11 +69,8 @@ def unload(self) -> None:
             del self.pipe
             self.pipe = None
 
-        # Aggressive cleanup
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
+        DevicePolicy.clear_cache()
 
     def _prepare_seed(self, seed: int) -> tuple[int, torch.Generator]:
         """Prepare seed and generator for generation."""
 
@@ -10,8 +10,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-import torch
-
+from oneiro.device import DevicePolicy, OffloadMode
 from oneiro.pipelines.base import BasePipeline, GenerationResult
 from oneiro.pipelines.embedding import EmbeddingLoaderMixin, parse_embeddings_from_config
 from oneiro.pipelines.long_prompt import (
@@ -619,25 +618,26 @@ def _load_from_path(self, checkpoint_path: Path, model_config: dict[str, Any]) -
         print(f"  Base model: {base_model or 'unknown'}")
         print(f"  Pipeline: {self._pipeline_config.pipeline_class}")
 
+        cpu_offload = model_config.get("cpu_offload", True)
+        self.policy = DevicePolicy.auto_detect(cpu_offload=cpu_offload)
+
         # Get the pipeline class
         pipeline_class = get_diffusers_pipeline_class(self._pipeline_config.pipeline_class)
 
         # Load from single file
         self.pipe = pipeline_class.from_single_file(
             str(checkpoint_path),
-            torch_dtype=self._dtype,
+            torch_dtype=self.policy.dtype,
         )
 
         scheduler_override = model_config.get("scheduler")
         self.configure_scheduler(scheduler_override)
 
-        # Apply optimizations
-        cpu_offload = model_config.get("cpu_offload", True)
-        self._cpu_offload = cpu_offload and self._device == "cuda"
-        if self._cpu_offload:
-            self.pipe.enable_model_cpu_offload()
-        elif self._device == "cuda":
-            self.pipe.to("cuda")
+        self.policy.apply_to_pipeline(self.pipe)
+        # Track whether offload was applied (for dynamic LoRA handling)
+        self._cpu_offload = (
+            self.policy.offload != OffloadMode.NEVER and self.policy.device == "cuda"
+        )
 
         # Enable memory optimizations for VAE if available
         if hasattr(self.pipe, "vae"):
@@ -859,8 +859,7 @@ def _run_generation(
 
         result = self.pipe(**gen_kwargs)
 
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        DevicePolicy.clear_cache()
 
         output_image = result.images[0]
         return GenerationResult(
@@ -887,7 +886,7 @@ def _load_dynamic_loras(self, loras: list[LoraConfig]) -> None:
         # Only move pipeline to device manually when CPU offload is not enabled.
         # With CPU offload, diffusers manages device placement automatically.
         if not self._cpu_offload:
-            self.pipe.to(self._device)
+            self.pipe.to(self.policy.device)
 
         loaded_names: list[str] = []
         loaded_weights: list[float] = []
@@ -911,7 +910,7 @@ def _restore_static_loras(self) -> None:
             return
 
         if not self._cpu_offload:
-            self.pipe.to(self._device)
+            self.pipe.to(self.policy.device)
         self.load_loras_sync(self._static_lora_configs)
         print(f"Restored {len(self._static_lora_configs)} static LoRA(s)")
 
 
@@ -2,8 +2,7 @@
 
 from typing import Any
 
-import torch
-
+from oneiro.device import DevicePolicy
 from oneiro.pipelines.base import BasePipeline, GenerationResult
 
 
@@ -49,14 +48,15 @@ def load(self, model_config: dict[str, Any]) -> None:
         # Configure CPU threading for text encoder
         self._configure_cpu_threads(cpu_utilization)
 
+        self.policy = DevicePolicy.auto_detect(cpu_offload=cpu_offload)
+
         print("  Creating pipeline...")
         self.pipe = FluxPipeline.from_pretrained(
             repo,
-            torch_dtype=self._dtype,
+            torch_dtype=self.policy.dtype,
         )
 
-        if cpu_offload:
-            self.pipe.enable_model_cpu_offload()
+        self.policy.apply_to_pipeline(self.pipe)
 
         # Memory optimization for large T5 encoder and high-res VAE decoding
         self.pipe.vae.enable_tiling()
@@ -130,8 +130,7 @@ def generate(
                 max_sequence_length=512,
             )
 
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        DevicePolicy.clear_cache()
 
         output_image = result.images[0]
         return GenerationResult(
 
@@ -2,8 +2,7 @@
 
 from typing import Any
 
-import torch
-
+from oneiro.device import DevicePolicy
 from oneiro.pipelines.base import BasePipeline, GenerationResult
 from oneiro.pipelines.embedding import EmbeddingLoaderMixin, parse_embeddings_from_config
 from oneiro.pipelines.lora import LoraLoaderMixin, parse_loras_from_model_config
@@ -31,20 +30,22 @@ def load(self, model_config: dict[str, Any], full_config: dict[str, Any] | None
         # Configure CPU threading for text encoder
         self._configure_cpu_threads(cpu_utilization)
 
+        self.policy = DevicePolicy.auto_detect(cpu_offload=cpu_offload)
+
         # Load transformer and text encoder on CPU first
         print("  Loading transformer on CPU...")
         transformer = Flux2Transformer2DModel.from_pretrained(
             repo,
             subfolder="transformer",
-            torch_dtype=self._dtype,
+            torch_dtype=self.policy.dtype,
             device_map="cpu",
         )
 
         print("  Loading text encoder on CPU...")
         text_encoder = Mistral3ForConditionalGeneration.from_pretrained(
             repo,
             subfolder="text_encoder",
-            torch_dtype=self._dtype,
+            torch_dtype=self.policy.dtype,
             device_map="cpu",
         )
 
@@ -53,11 +54,10 @@ def load(self, model_config: dict[str, Any], full_config: dict[str, Any] | None
             repo,
             transformer=transformer,
             text_encoder=text_encoder,
-            torch_dtype=self._dtype,
+            torch_dtype=self.policy.dtype,
         )
 
-        if cpu_offload:
-            self.pipe.enable_model_cpu_offload()
+        self.policy.apply_to_pipeline(self.pipe)
 
         loras = parse_loras_from_model_config(model_config)
         if loras:
@@ -115,8 +115,7 @@ def generate(
                 generator=generator,
             )
 
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        DevicePolicy.clear_cache()
 
         output_image = result.images[0]
         return GenerationResult(
-Original file line number
+Diff line change
 # uv
 uv.lock
++
 +.beads
 +.gitattributes