Merge pull request #438 from AInVFX/main

adrientoupet · web-flow · commit 5a4bf428f373 · 2025-12-23T21:09:01.000-05:00
v2.5.23: Security hardening, GGUF VAE support, FFmpeg stability, MPS optimization
diff --git a/README.md b/README.md
@@ -36,6 +36,18 @@ We're actively working on improvements and new features. To stay informed:
 
 ## 🚀 Release Notes
 
+**2025.12.24 - Version 2.5.23**
+
+- **🔒 Security: Prevent code execution in model loading** - Added protection against malicious .pth files by restricting deserialization to tensors only
+- **🎥 Fix: FFmpeg video writer reliability** - Resolved ffmpeg process hanging issues by redirecting stderr and adding buffer flush, with improved error messages for debugging *(thanks [@thehhmdb](https://github.com/thehhmdb))*
+- **⚡ Fix: GGUF VAE model support** - Enabled automatic weight dequantization for convolution operations, making GGUF-quantized VAE models fully functional *(thanks [@naxci1](https://github.com/naxci1))*
+- **🛡️ Fix: VAE slicing edge cases** - Protected against division by zero crashes when using small split sizes with high temporal downsampling *(thanks [@naxci1](https://github.com/naxci1))*
+- **🎨 Fix: LAB color transfer precision** - Resolved dtype mismatch errors during video upscaling by ensuring consistent float types before matrix operations
+- **🔧 Fix: PyTorch 2.9+ compatibility** - Extended Conv3d memory workaround to all PyTorch 2.9+ versions, fixing 3x VRAM usage on newer PyTorch releases
+- **📦 Fix: Bitsandbytes compatibility** - Added ValueError exception handling for Intel Gaudi version detection failures on non-Gaudi systems
+- **🍎 MPS: Memory optimization** - Reduced memory usage during encode/decode operations on Apple Silicon *(thanks [@s-cerevisiae](https://github.com/s-cerevisiae))*
+
+
 **2025.12.13 - Version 2.5.22**
 
 - **🎬 CLI: FFmpeg video backend with 10-bit support** - New `--video_backend ffmpeg` and `--10bit` flags enable x265 encoding with 10-bit color depth, reducing banding artifacts in gradients compared to 8-bit OpenCV output *(based on PR by [@thehhmdb](https://github.com/thehhmdb) - thank you!)*
@@ -1030,7 +1042,7 @@ For detailed contribution guidelines, see [CONTRIBUTING.md](CONTRIBUTING.md).
 
 This ComfyUI implementation is a collaborative project by **[NumZ](https://github.com/numz)** and **[AInVFX](https://www.youtube.com/@AInVFX)** (Adrien Toupet), based on the original [SeedVR2](https://github.com/ByteDance-Seed/SeedVR) by ByteDance Seed Team.
 
-Special thanks to our community contributors including [naxci1](https://github.com/naxci1), [benjaminherb](https://github.com/benjaminherb), [cmeka](https://github.com/cmeka), [FurkanGozukara](https://github.com/FurkanGozukara), [JohnAlcatraz](https://github.com/JohnAlcatraz), [lihaoyun6](https://github.com/lihaoyun6), [Luchuanzhao](https://github.com/Luchuanzhao), [Luke2642](https://github.com/Luke2642), [proxyid](https://github.com/proxyid), [q5sys](https://github.com/q5sys), and many others for their improvements, bug fixes, and testing.
+Special thanks to our community contributors including [naxci1](https://github.com/naxci1), [thehhmdb](https://github.com/thehhmdb), [s-cerevisiae](https://github.com/s-cerevisiae), [benjaminherb](https://github.com/benjaminherb), [cmeka](https://github.com/cmeka), [FurkanGozukara](https://github.com/FurkanGozukara), [JohnAlcatraz](https://github.com/JohnAlcatraz), [lihaoyun6](https://github.com/lihaoyun6), [Luchuanzhao](https://github.com/Luchuanzhao), [Luke2642](https://github.com/Luke2642), [proxyid](https://github.com/proxyid), [q5sys](https://github.com/q5sys), and many others for their improvements, bug fixes, and testing.
 
 ## 📜 License
 
diff --git a/inference_cli.py b/inference_cli.py
@@ -171,23 +171,41 @@ def __init__(self, path: str, width: int, height: int, fps: float, use_10bit: bo
             ['ffmpeg', '-y', '-f', 'rawvideo', '-pix_fmt', 'rgb24',
              '-s', f'{width}x{height}', '-r', str(fps), '-i', '-',
              '-c:v', codec, '-pix_fmt', pix_fmt, '-preset', 'medium', '-crf', '12', path],
-            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE
+            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
         )
     
     def write(self, frame_bgr: np.ndarray):
+        if not self.isOpened():
+            raise RuntimeError("FFMPEGVideoWriter: ffmpeg process is not running")
+        
         frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-        self.proc.stdin.write(frame_rgb.astype(np.uint8).tobytes())
+        try:
+            self.proc.stdin.write(frame_rgb.astype(np.uint8).tobytes())
+            self.proc.stdin.flush()  # Critical: prevent buffering issues
+        except BrokenPipeError:
+            raise RuntimeError(
+                "FFMPEGVideoWriter: ffmpeg process terminated unexpectedly. "
+                "Check video path, codec support, and disk space."
+            )
     
     def isOpened(self) -> bool:
         return self.proc is not None and self.proc.poll() is None
     
     def release(self):
         if self.proc:
-            self.proc.stdin.close()
+            try:
+                self.proc.stdin.close()
+            except Exception:
+                pass  # Ignore errors on close
+            
             self.proc.wait()
-            stderr = self.proc.stderr.read() if self.proc.stderr else b''
+            
             if self.proc.returncode != 0:
-                debug.log(f"ffmpeg error: {stderr.decode()}", level="WARNING", category="file")
+                debug.log(
+                    f"ffmpeg exited with code {self.proc.returncode}. "
+                    "Check output file for corruption.",
+                    level="WARNING", force=True, category="file"
+                )
             self.proc = None
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "seedvr2_videoupscaler"
 description = "SeedVR2 official ComfyUI integration: ByteDance-Seed's one-step diffusion-based video/image upscaling with memory-efficient inference"
-version = "2.5.22"
+version = "2.5.23"
 authors = [
     {name = "numz"},
     {name = "adrientoupet"}
diff --git a/src/core/model_loader.py b/src/core/model_loader.py
@@ -146,7 +146,7 @@ def load_quantized_state_dict(checkpoint_path: str, device: torch.device = torch
                     handle_prefix="model.diffusion_model."
                 )
     elif checkpoint_path.endswith('.pth'):
-        state = torch.load(checkpoint_path, map_location=device_str, mmap=True)
+        state = torch.load(checkpoint_path, map_location=device_str, mmap=True, weights_only=True)
     else:
         raise ValueError(f"Unsupported checkpoint format. Expected .safetensors or .pth, got: {checkpoint_path}")
     
@@ -393,6 +393,20 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
                 if debug:
                     debug.log(f"Error in {func.__name__} dequantization: {e}", level="WARNING", category="dit", force=True)
                 raise
+
+        # Handle conv2d/conv3d operations (critical for GGUF VAE models)
+        # Conv3d layers (InflatedCausalConv3d) are not replaced by layer replacement
+        if func in {torch.nn.functional.conv2d, torch.nn.functional.conv3d}:
+            if len(args) >= 2 and isinstance(args[1], cls):  # weight is second arg
+                try:
+                    weight_tensor = args[1]
+                    dequantized_weight = weight_tensor.dequantize(device=args[0].device, dtype=args[0].dtype)
+                    new_args = (args[0], dequantized_weight) + args[2:]
+                    return func(*new_args, **kwargs)
+                except Exception as e:
+                    if debug:
+                        debug.log(f"Error in conv dequantization: {e}", level="WARNING", category="dit", force=True)
+                    raise
         
         # For ALL other operations, delegate to parent WITHOUT dequantization
         # This includes .cpu(), .to(), .device, .dtype, .shape, etc.
diff --git a/src/models/video_vae_v3/modules/attn_video_vae.py b/src/models/video_vae_v3/modules/attn_video_vae.py
@@ -1093,7 +1093,7 @@ def __init__(
     ):
         extra_cond_dim = kwargs.pop("extra_cond_dim") if "extra_cond_dim" in kwargs else None
         self.slicing_sample_min_size = slicing_sample_min_size
-        self.slicing_latent_min_size = slicing_sample_min_size // (2**temporal_scale_num)
+        self.slicing_latent_min_size = max(1, slicing_sample_min_size // (2**temporal_scale_num))
 
         super().__init__(
             in_channels=in_channels,
@@ -1710,7 +1710,7 @@ def set_causal_slicing(
         if split_size is not None:
             self.enable_slicing()
             self.slicing_sample_min_size = split_size
-            self.slicing_latent_min_size = split_size // self.temporal_downsample_factor
+            self.slicing_latent_min_size = max(1, split_size // self.temporal_downsample_factor)
         else:
             self.disable_slicing()
         for module in self.modules():
diff --git a/src/models/video_vae_v3/modules/video_vae.py b/src/models/video_vae_v3/modules/video_vae.py
@@ -733,7 +733,7 @@ def __init__(
         if slicing_sample_min_size is None:
             slicing_sample_min_size = temporal_downsample_factor
         self.slicing_sample_min_size = slicing_sample_min_size
-        self.slicing_latent_min_size = slicing_sample_min_size // (2**temporal_scale_num)
+        self.slicing_latent_min_size = max(1, slicing_sample_min_size // (2**temporal_scale_num))
 
         # pass init params to Encoder
         self.encoder = Encoder3D(
@@ -886,7 +886,7 @@ def set_causal_slicing(
         if split_size is not None:
             self.enable_slicing()
             self.slicing_sample_min_size = split_size
-            self.slicing_latent_min_size = split_size // self.temporal_downsample_factor
+            self.slicing_latent_min_size = max(1, split_size // self.temporal_downsample_factor)
         else:
             self.disable_slicing()
         for module in self.modules():
@@ -950,7 +950,7 @@ def set_causal_slicing(
             self.disable_slicing()
         self.slicing_sample_min_size = split_size
         if split_size is not None:
-            self.slicing_latent_min_size = split_size // self.temporal_downsample_factor
+            self.slicing_latent_min_size = max(1, split_size // self.temporal_downsample_factor)
         for module in self.modules():
             if isinstance(module, InflatedCausalConv3d):
                 module.set_memory_device(memory_device)
diff --git a/src/optimization/compatibility.py b/src/optimization/compatibility.py
@@ -98,8 +98,8 @@ def ensure_bitsandbytes_safe():
     try:
         import bitsandbytes
         # Success - bitsandbytes works, other nodes can use it
-    except (ImportError, OSError, RuntimeError):
-        # Installation broken or not present - create stub
+    except (ImportError, OSError, RuntimeError, ValueError):
+        # Installation broken, not present, or version detection failed - create stub
         stub = types.ModuleType('bitsandbytes')
         stub.__spec__ = importlib.machinery.ModuleSpec('bitsandbytes', None)
         stub.__file__ = None
@@ -592,11 +592,11 @@ def validate_gguf_availability(operation: str = "load GGUF model", debug=None) -
         raise RuntimeError(f"GGUF library required to {operation}")
 
 
-# 4. NVIDIA Conv3d Memory Bug - Workaround for PyTorch 2.9-2.10 + cuDNN >= 91002
+# 4. NVIDIA Conv3d Memory Bug - Workaround for PyTorch >= 2.9 + cuDNN >= 91002
 def _check_conv3d_memory_bug():
     """
     Check if Conv3d memory bug workaround needed.
-    Bug: PyTorch 2.9-2.10 with cuDNN >= 91002 uses 3x memory for Conv3d 
+    Bug: PyTorch 2.9+ with cuDNN >= 91002 uses 3x memory for Conv3d 
     with fp16/bfloat16 due to buggy dispatch layer.
     """
     try:
@@ -622,7 +622,8 @@ def _check_conv3d_memory_bug():
         parts = version_str.split('.')
         torch_version = tuple(int(p) for p in parts[:2])
         
-        if not ((2, 9) <= torch_version <= (2, 10)):
+        # Bug affects PyTorch 2.9 and later versions
+        if torch_version < (2, 9):
             return False
         
         if not hasattr(torch.backends.cudnn, 'version'):
diff --git a/src/utils/color_fix.py b/src/utils/color_fix.py
@@ -381,6 +381,8 @@ def _rgb_to_lab_batch(rgb: Tensor, device: torch.device, matrix: Tensor, epsilon
     rgb_flat = rgb_linear.permute(0, 2, 3, 1).reshape(-1, 3)
     del rgb_linear
     
+    # Ensure dtype consistency for matrix multiplication
+    rgb_flat = rgb_flat.to(dtype=matrix.dtype)
     xyz_flat = torch.matmul(rgb_flat, matrix.T)
     del rgb_flat
     
@@ -452,6 +454,8 @@ def _lab_to_rgb_batch(lab: Tensor, device: torch.device, matrix_inv: Tensor, eps
     xyz_flat = xyz.permute(0, 2, 3, 1).reshape(-1, 3)
     del xyz
     
+    # Ensure dtype consistency for matrix multiplication
+    xyz_flat = xyz_flat.to(dtype=matrix_inv.dtype)
     rgb_linear_flat = torch.matmul(xyz_flat, matrix_inv.T)
     del xyz_flat
     
diff --git a/src/utils/constants.py b/src/utils/constants.py
@@ -4,7 +4,7 @@
 """
 
 # Version information
-__version__ = "2.5.22"
+__version__ = "2.5.23"
 
 import os
 import warnings
diff --git a/src/utils/debug.py b/src/utils/debug.py
@@ -78,7 +78,7 @@ class Debug:
         "device": "🖥️",       # Device info
         "file": "📂",         # File operations
         "alpha": "👻",        # Alpha operations
-        "star": "⭐",         # Star
+        "starlove": "⭐💝",   # Star + love
         "dialogue": "💬",     # Dialogue
         "none" : "",
     }
@@ -259,9 +259,9 @@ def print_footer(self) -> None:
         """Print the footer with links - always displayed"""
         self.log("", category="none", force=True)
         self.log("────────────────────────", category="none", force=True)
-        self.log("Questions? Updates? Watch the videos, star the repo & join us!", category="dialogue", force=True)
+        self.log("Questions? Updates? Watch, star & sponsor if you can!", category="dialogue", force=True)
         self.log("https://www.youtube.com/@AInVFX", category="generation", force=True)
-        self.log("https://github.com/numz/ComfyUI-SeedVR2_VideoUpscaler", category="star", force=True)
+        self.log("https://github.com/numz/ComfyUI-SeedVR2_VideoUpscaler", category="starlove", force=True)
     
     @torch._dynamo.disable  # Skip tracing to avoid time.time() warnings
     def start_timer(self, name: str, force: bool = False) -> None: