Merge pull request numz#298 from AInVFX/main

adrientoupet · web-flow · commit 912ab4a5da8b · 2025-11-13T12:04:00.000-05:00
v2.5.10: Fix determinism, BlockSwap caching, and model path resolution
diff --git a/README.md b/README.md
@@ -36,6 +36,16 @@ We're actively working on improvements and new features. To stay informed:
 
 ## 🚀 Updates
 
+**2025.11.13 - Version 2.5.10**
+
+- **🎯 Fix: Deterministic generation** - Identical images with the same seed now produce identical results across different sessions and batch positions
+- **🔧 Fix: Model caching with BlockSwap** - Resolved issue where cached DiT models wouldn't properly reload when VAE caching state changed
+- **💾 Fix: Runner caching optimization** - Runner templates now correctly cache whenever both DiT and VAE are cached, regardless of caching order
+- **📁 Fix: Case-insensitive model paths** - Extra model paths in YAML config now work regardless of case (seedvr2, SEEDVR2, SeedVR2, etc.)
+- **🐛 Fix: High resolution tile debug crash** - Fixed "NoneType has no attribute log" error when using maximum resolution with VAE tiling
+- **📊 Fix: Temporal overlap logging** - Corrected frame count reporting when temporal overlap is automatically adjusted
+- **🔍 Feature: Enhanced model path debugging** - Added detailed logging to help troubleshoot model loading issues (visible in debug mode)
+
 **2025.11.12 - Version 2.5.9**
 
 - **🐛 Fix: Tile debug visualization crash** - Fixed OpenCV error when using VAE tile debug mode on certain systems.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "seedvr2_videoupscaler"
 description = "SeedVR2 official ComfyUI integration: ByteDance-Seed's one-step diffusion-based video/image upscaling with memory-efficient inference"
-version = "2.5.9"
+version = "2.5.10"
 authors = [
     {name = "numz"},
     {name = "adrientoupet"}
diff --git a/src/core/generation_phases.py b/src/core/generation_phases.py
@@ -268,6 +268,10 @@ def encode_all_batches(
     if step <= 0:
         step = batch_size
         temporal_overlap = 0
+        debug.log(f"temporal_overlap >= batch_size, resetting to 0", level="WARNING", category="setup", force=True)
+    
+    # Store actual temporal overlap used (may differ from parameter if reset)
+    ctx['actual_temporal_overlap'] = temporal_overlap
     
     # Calculate number of batches
     num_encode_batches = 0
@@ -306,6 +310,14 @@ def encode_all_batches(
                 runner.vae, ctx['cache_context']['vae_model'], debug
             )
             ctx['cache_context']['vae_newly_cached'] = True
+            
+            # If both models now cached, cache runner template
+            dit_is_cached = ctx['cache_context']['cached_dit'] or ctx['cache_context']['dit_newly_cached']
+            if dit_is_cached:
+                ctx['cache_context']['global_cache'].set_runner(
+                    ctx['cache_context']['dit_id'], ctx['cache_context']['vae_id'], 
+                    runner, debug
+                )
         
         # Set deterministic seed for VAE encoding (separate from diffusion noise)
         # Uses seed + 1,000,000 to avoid collision with upscaling batch seeds
@@ -620,6 +632,7 @@ def upscale_all_batches(
                 runner.dit, ctx['cache_context']['dit_model'], debug
             )
             ctx['cache_context']['dit_newly_cached'] = True
+            
             # If both models now cached, cache runner template
             vae_is_cached = ctx['cache_context']['cached_vae'] or ctx['cache_context']['vae_newly_cached']
             if vae_is_cached:
@@ -628,11 +641,6 @@ def upscale_all_batches(
                     runner, debug
                 )
         
-        # Set base seed for DiT noise generation
-        # Ensures deterministic noise across all batches in this upscaling phase
-        set_seed(seed)
-        debug.log(f"Using seed: {seed}", category="dit")
-        
         # Move DiT to GPU for upscaling (no-op if already there)
         manage_model_device(model=runner.dit, target_device=ctx['dit_device'], 
                             model_name="DiT", debug=debug, runner=runner)
@@ -646,6 +654,11 @@ def upscale_all_batches(
             check_interrupt(ctx)
             
             debug.log(f"Upscaling batch {upscale_idx+1}/{num_valid_latents}", category="generation", force=True)
+            # Reset seed for each batch to ensure identical RNG state
+            # This ensures identical inputs produce identical outputs regardless of batch position
+            set_seed(seed)
+            debug.log(f"Using seed: {seed} for deterministic generation", category="dit")
+
             debug.start_timer(f"upscale_batch_{upscale_idx+1}")
             
             # Move to DiT device with correct dtype for upscaling (no-op if already there)
@@ -1332,16 +1345,19 @@ def postprocess_all_batches(
             if total_padding_removed > 0:
                 adjustments.append(f"{total_padding_removed} padding")
             
+            # Use actual temporal overlap from encoding (may have been reset)
+            actual_overlap = ctx.get('actual_temporal_overlap', temporal_overlap)
+            
             # Calculate and include temporal overlap blending info
-            if temporal_overlap > 0:
-                frames_blended = (num_valid_samples - 1) * temporal_overlap
+            if actual_overlap > 0:
+                frames_blended = (num_valid_samples - 1) * actual_overlap
                 adjustments.append(f"{frames_blended} overlap")
 
             if adjustments:
                 # Add back all removed/blended frames to get true computed count
                 total_computed = frames_before_removal + total_padding_removed
-                if temporal_overlap > 0:
-                    total_computed += (num_valid_samples - 1) * temporal_overlap
+                if actual_overlap > 0:
+                    total_computed += (num_valid_samples - 1) * actual_overlap
                 frame_info += f" ({total_computed} computed with {' + '.join(adjustments)} removed)"
             
             debug.log(f"Final output assembled: {frame_info}, Resolution: {Wf}x{Hf}px, Channels: {channels_str}", 
diff --git a/src/models/video_vae_v3/modules/attn_video_vae.py b/src/models/video_vae_v3/modules/attn_video_vae.py
@@ -1308,7 +1308,8 @@ def tiled_encode(self, x: torch.Tensor, tile_size: Tuple[int, int] = (512, 512),
         if H <= tile_h and W <= tile_w:
             return self.slicing_encode(x)
         else:
-            self.debug.log(f"Using VAE tiled encoding (Tile: {tile_size}, Overlap: {tile_overlap})", category="vae", force=True, indent_level=1)
+            if self.debug:
+                self.debug.log(f"Using VAE tiled encoding (Tile: {tile_size}, Overlap: {tile_overlap})", category="vae", force=True, indent_level=1)
 
         # Spatial scale factor (output/latent)
         scale_factor = self.spatial_downsample_factor
@@ -1481,7 +1482,8 @@ def tiled_decode(self, z: torch.Tensor, tile_size: Tuple[int, int] = (512, 512),
         if H <= latent_tile_h and W <= latent_tile_w:
             return self.slicing_decode(z)
         else:
-            self.debug.log(f"Using VAE tiled decoding (Tile: {tile_size}, Overlap: {tile_overlap})", category="vae", force=True, indent_level=1)
+            if self.debug:
+                self.debug.log(f"Using VAE tiled decoding (Tile: {tile_size}, Overlap: {tile_overlap})", category="vae", force=True, indent_level=1)
         
         latent_overlap_h = max(0, min((overlap_h // scale_factor), latent_tile_h - 1))
         latent_overlap_w = max(0, min((overlap_w // scale_factor), latent_tile_w - 1))
diff --git a/src/optimization/blockswap.py b/src/optimization/blockswap.py
@@ -193,7 +193,7 @@ def apply_block_swap_to_dit(
     runner._blockswap_active = True
 
     # Store configuration for debugging and cleanup
-    runner._block_swap_config = {
+    model._block_swap_config = {
         "blocks_swapped": blocks_to_swap,
         "swap_io_components": swap_io_components,
         "total_blocks": total_blocks,
@@ -650,11 +650,11 @@ def _protect_model_from_move(
     
     Wraps model.to() method to prevent other code from accidentally moving
     the entire model to GPU, which would defeat BlockSwap's memory savings.
-    Allows movement only when explicitly bypassed via runner flag.
+    Allows movement only when explicitly bypassed via model flag.
     
     Args:
         model: DiT model to protect
-        runner: VideoDiffusionInfer instance (stored as weak reference)
+        runner: VideoDiffusionInfer instance (for active status check)
         debug: Debug instance for logging (required)
     """
     if not hasattr(model, '_original_to'):
@@ -665,34 +665,46 @@ def _protect_model_from_move(
         # Define the protected method without closures
         def protected_model_to(self, device, *args, **kwargs):
             # Check if protection is temporarily bypassed for offloading
+            # Flag is stored on model itself (not runner) to survive runner recreation
+            if getattr(self, "_blockswap_bypass_protection", False):
+                # Protection bypassed, allow movement
+                if hasattr(self, '_original_to'):
+                    return self._original_to(device, *args, **kwargs)
+            
+            # Get configured offload device directly from model
+            blockswap_offload_device = "cpu"  # default
+            if hasattr(self, "_block_swap_config"):
+                blockswap_offload_device = self._block_swap_config.get("offload_device", "cpu")
+            
+            # Check if BlockSwap is currently active via runner weak reference
             runner_ref = getattr(self, '_blockswap_runner_ref', None)
+            blockswap_is_active = False
             if runner_ref:
                 runner_obj = runner_ref()
-                if runner_obj and getattr(runner_obj, "_blockswap_bypass_protection", False):
-                    # Protection bypassed, allow movement
-                    if hasattr(self, '_original_to'):
-                        return self._original_to(device, *args, **kwargs)
+                if runner_obj and hasattr(runner_obj, "_blockswap_active"):
+                    blockswap_is_active = runner_obj._blockswap_active
             
-            # Check blockswap status using weak reference
-            # Get configured offload device from runner
-            blockswap_offload_device = "cpu" # default
-            if runner_ref:
-                runner_obj = runner_ref()
-                if runner_obj and hasattr(runner_obj, "_block_swap_config"):
-                    blockswap_offload_device = runner_obj._block_swap_config.get("offload_device", "cpu")
+            # Block attempts to move model away from configured offload device when active
+            if blockswap_is_active and str(device) != str(blockswap_offload_device):
+                # Get debug instance from runner if available
+                debug_instance = None
+                if runner_ref:
+                    runner_obj = runner_ref()
+                    if runner_obj and hasattr(runner_obj, 'debug'):
+                        debug_instance = runner_obj.debug
                 
-                # Block attempts to move model away from configured offload device
-                if str(device) != str(blockswap_offload_device):
-                    if runner_obj and hasattr(runner_obj, "_blockswap_active") and runner_obj._blockswap_active:
-                        debug.log(f"Blocked attempt to move blockswapped model from {blockswap_offload_device} to {device}", 
-                                level="WARNING", category="blockswap", force=True)
-                        return self
+                if debug_instance:
+                    debug_instance.log(
+                        f"Blocked attempt to move BlockSwap model from {blockswap_offload_device} to {device}",
+                        level="WARNING", category="blockswap", force=True
+                    )
+                return self
             
-            # Use original method stored as attribute
+            # Allow movement (either bypass is enabled or target is offload device)
             if hasattr(self, '_original_to'):
                 return self._original_to(device, *args, **kwargs)
             else:
-                # This shouldn't happen, but fallback to super().to()
+                # Fallback - shouldn't happen
                 return super(type(self), self).to(device, *args, **kwargs)
         
         # Bind as a method to the model instance
@@ -712,7 +724,13 @@ def set_blockswap_bypass(runner, bypass: bool, debug):
     if not hasattr(runner, "_blockswap_active") or not runner._blockswap_active:
         return
     
-    runner._blockswap_bypass_protection = bypass
+    # Get the actual model (handle FP8CompatibleDiT wrapper)
+    model = runner.dit
+    if hasattr(model, "dit_model"):
+        model = model.dit_model
+    
+    # Store on model so it survives runner recreation during caching
+    model._blockswap_bypass_protection = bypass
     
     if bypass:
         debug.log("BlockSwap protection disabled to allow model DiT offloading", category="success")
@@ -741,11 +759,16 @@ def cleanup_blockswap(runner, keep_state_for_cache=False):
     
     debug = runner.debug
     
-    # Check if there's any BlockSwap state to clean up
+    # Get the actual model (handle FP8CompatibleDiT wrapper)
+    model = runner.dit
+    if hasattr(model, "dit_model"):
+        model = model.dit_model
+    
+    # Check if there's any BlockSwap state to clean up (check both runner and model)
     has_blockswap_state = (
         hasattr(runner, "_blockswap_active") or 
-        hasattr(runner, "_block_swap_config") or
-        hasattr(runner, "_blockswap_bypass_protection")
+        hasattr(model, "_block_swap_config") or
+        hasattr(model, "_blockswap_bypass_protection")
     )
     
     if not has_blockswap_state:
@@ -757,7 +780,7 @@ def cleanup_blockswap(runner, keep_state_for_cache=False):
         # Minimal cleanup for caching - just mark as inactive and allow offloading
         # Everything else stays intact for fast reactivation
         if hasattr(runner, "_blockswap_active") and runner._blockswap_active:
-            if not getattr(runner, "_blockswap_bypass_protection", False):
+            if not getattr(model, "_blockswap_bypass_protection", False):
                 set_blockswap_bypass(runner=runner, bypass=True, debug=debug)
             runner._blockswap_active = False
         debug.log("BlockSwap deactivated for caching (configuration preserved)", category="success")
@@ -829,7 +852,7 @@ def cleanup_blockswap(runner, keep_state_for_cache=False):
 
     # 5. Clean up BlockSwap-specific attributes
     for attr in ['_blockswap_runner_ref', 'blocks_to_swap', 'main_device', 
-                 'offload_device', '_blockswap_configured']:
+                 'offload_device']:
         if hasattr(model, attr):
             delattr(model, attr)
 
diff --git a/src/optimization/memory_manager.py b/src/optimization/memory_manager.py
@@ -762,7 +762,7 @@ def _handle_blockswap_model_movement(runner: Any, model: torch.nn.Module,
     else:
         # Moving to GPU (reload)
         # Check if we're in bypass mode (coming from offload)
-        if not getattr(runner, "_blockswap_bypass_protection", False):
+        if not getattr(model, "_blockswap_bypass_protection", False):
             # Not in bypass mode, blocks are already configured
             if debug:
                 debug.log(f"{model_name} with BlockSwap active - blocks already distributed across devices, skipping movement", category="general")
@@ -787,7 +787,7 @@ def _handle_blockswap_model_movement(runner: Any, model: torch.nn.Module,
         # Restore blocks to their configured devices
         if hasattr(model, "blocks") and hasattr(model, "blocks_to_swap"):
             # Use configured offload_device from BlockSwap config
-            offload_device = runner._block_swap_config.get("offload_device")
+            offload_device = model._block_swap_config.get("offload_device")
             if not offload_device:
                 raise ValueError("BlockSwap config missing offload_device")
             
@@ -801,7 +801,7 @@ def _handle_blockswap_model_movement(runner: Any, model: torch.nn.Module,
                     block.to(offload_device)
             
             # Handle I/O components
-            if not runner._block_swap_config.get("swap_io_components", False):
+            if not model._block_swap_config.get("swap_io_components", False):
                 # I/O components should be on GPU if not offloaded
                 for name, module in model.named_children():
                     if name != "blocks":
@@ -814,10 +814,10 @@ def _handle_blockswap_model_movement(runner: Any, model: torch.nn.Module,
             
             if debug:
                 # Get actual configuration from runner
-                if hasattr(runner, '_block_swap_config'):
-                    blocks_on_gpu = runner._block_swap_config.get('total_blocks', 32) - runner._block_swap_config.get('blocks_swapped', 16)
-                    total_blocks = runner._block_swap_config.get('total_blocks', 32)
-                    main_device = runner._block_swap_config.get('main_device', 'GPU')
+                if hasattr(model, '_block_swap_config'):
+                    blocks_on_gpu = model._block_swap_config.get('total_blocks', 32) - model._block_swap_config.get('blocks_swapped', 16)
+                    total_blocks = model._block_swap_config.get('total_blocks', 32)
+                    main_device = model._block_swap_config.get('main_device', 'GPU')
                     debug.log(f"BlockSwap blocks restored to configured devices ({blocks_on_gpu}/{total_blocks} blocks on {str(main_device).upper()})", category="success")
                 else:
                     debug.log("BlockSwap blocks restored to configured devices", category="success")
diff --git a/src/utils/constants.py b/src/utils/constants.py
@@ -4,7 +4,7 @@
 """
 
 # Version information
-__version__ = "2.5.9"
+__version__ = "2.5.10"
 
 import os
 import warnings
@@ -55,14 +55,33 @@ def get_base_cache_dir() -> str:
 
 
 def get_all_model_paths() -> list:
-    """Get all registered model paths including those from extra_model_paths.yaml"""
+    """Get all registered model paths including those from extra_model_paths.yaml (case-insensitive)"""
     try:
         import folder_paths
         # Ensure default path is registered first
         get_base_cache_dir()
-        # Get all paths registered for seedvr2 model type
-        paths = folder_paths.get_folder_paths(SEEDVR2_MODEL_TYPE)
-        return paths if paths else [get_base_cache_dir()]
+        
+        # Case-insensitive lookup: search through all registered folder types
+        # This handles any case variation users might use in extra_model_paths.yaml
+        all_paths = []
+        target_lower = SEEDVR2_MODEL_TYPE.lower()
+        
+        # folder_paths.folder_names_and_paths is the underlying dict: {type: ([paths], extensions)}
+        if hasattr(folder_paths, 'folder_names_and_paths'):
+            for folder_type, (paths, _) in folder_paths.folder_names_and_paths.items():
+                if folder_type.lower() == target_lower:
+                    all_paths.extend(paths)
+        
+        # Remove duplicates while preserving order (os.path.normpath handles Windows/Linux path differences)
+        seen = set()
+        unique_paths = []
+        for path in all_paths:
+            normalized = os.path.normpath(path.lower())
+            if normalized not in seen:
+                seen.add(normalized)
+                unique_paths.append(path)
+        
+        return unique_paths if unique_paths else [get_base_cache_dir()]
     except:
         return [get_base_cache_dir()]
 
diff --git a/src/utils/downloads.py b/src/utils/downloads.py