comfyanonymous
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎comfy/context_windows.py‎
Lines changed: 90 additions & 14 deletions b/‎comfy/context_windows.py‎
Lines changed: 90 additions & 14 deletions
diff --git a/‎comfy/ldm/lumina/model.py‎
Lines changed: 0 additions & 1 deletion b/‎comfy/ldm/lumina/model.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎comfy/model_base.py‎
Lines changed: 1 addition & 13 deletions b/‎comfy/model_base.py‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎comfy/model_detection.py‎
Lines changed: 4 additions & 29 deletions b/‎comfy/model_detection.py‎
Lines changed: 4 additions & 29 deletions
diff --git a/‎comfy/model_patcher.py‎
Lines changed: 2 additions & 18 deletions b/‎comfy/model_patcher.py‎
Lines changed: 2 additions & 18 deletions
@@ -81,6 +81,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
    - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
    - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
    - [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
+   - [Hunyuan Video 1.5](https://docs.comfy.org/tutorials/video/hunyuan/hunyuan-video-1-5)
 - Audio Models
    - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
    - [ACE Step](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
 
@@ -51,26 +51,36 @@ def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[
 
 
 class IndexListContextWindow(ContextWindowABC):
-    def __init__(self, index_list: list[int], dim: int=0):
+    def __init__(self, index_list: list[int], dim: int=0, total_frames: int=0):
         self.index_list = index_list
         self.context_length = len(index_list)
         self.dim = dim
+        self.total_frames = total_frames
+        self.center_ratio = (min(index_list) + max(index_list)) / (2 * total_frames)
 
-    def get_tensor(self, full: torch.Tensor, device=None, dim=None) -> torch.Tensor:
+    def get_tensor(self, full: torch.Tensor, device=None, dim=None, retain_index_list=[]) -> torch.Tensor:
         if dim is None:
             dim = self.dim
         if dim == 0 and full.shape[dim] == 1:
             return full
-        idx = [slice(None)] * dim + [self.index_list]
-        return full[idx].to(device)
+        idx = tuple([slice(None)] * dim + [self.index_list])
+        window = full[idx]
+        if retain_index_list:
+            idx = tuple([slice(None)] * dim + [retain_index_list])
+            window[idx] = full[idx]
+        return window.to(device)
 
     def add_window(self, full: torch.Tensor, to_add: torch.Tensor, dim=None) -> torch.Tensor:
         if dim is None:
             dim = self.dim
-        idx = [slice(None)] * dim + [self.index_list]
+        idx = tuple([slice(None)] * dim + [self.index_list])
         full[idx] += to_add
         return full
 
+    def get_region_index(self, num_regions: int) -> int:
+        region_idx = int(self.center_ratio * num_regions)
+        return min(max(region_idx, 0), num_regions - 1)
+
 
 class IndexListCallbacks:
     EVALUATE_CONTEXT_WINDOWS = "evaluate_context_windows"
@@ -94,7 +104,8 @@ class ContextFuseMethod:
 
 ContextResults = collections.namedtuple("ContextResults", ['window_idx', 'sub_conds_out', 'sub_conds', 'window'])
 class IndexListContextHandler(ContextHandlerABC):
-    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1, closed_loop=False, dim=0):
+    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1,
+                 closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False):
         self.context_schedule = context_schedule
         self.fuse_method = fuse_method
         self.context_length = context_length
@@ -103,13 +114,18 @@ def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMe
         self.closed_loop = closed_loop
         self.dim = dim
         self._step = 0
+        self.freenoise = freenoise
+        self.cond_retain_index_list = [int(x.strip()) for x in cond_retain_index_list.split(",")] if cond_retain_index_list else []
+        self.split_conds_to_windows = split_conds_to_windows
 
         self.callbacks = {}
 
     def should_use_context(self, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]) -> bool:
         # for now, assume first dim is batch - should have stored on BaseModel in actual implementation
         if x_in.size(self.dim) > self.context_length:
-            logging.info(f"Using context windows {self.context_length} for {x_in.size(self.dim)} frames.")
+            logging.info(f"Using context windows {self.context_length} with overlap {self.context_overlap} for {x_in.size(self.dim)} frames.")
+            if self.cond_retain_index_list:
+                logging.info(f"Retaining original cond for indexes: {self.cond_retain_index_list}")
             return True
         return False
 
@@ -123,6 +139,11 @@ def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: Inde
             return None
         # reuse or resize cond items to match context requirements
         resized_cond = []
+        # if multiple conds, split based on primary region
+        if self.split_conds_to_windows and len(cond_in) > 1:
+            region = window.get_region_index(len(cond_in))
+            logging.info(f"Splitting conds to windows; using region {region} for window {window[0]}-{window[-1]} with center ratio {window.center_ratio:.3f}")
+            cond_in = [cond_in[region]]
         # cond object is a list containing a dict - outer list is irrelevant, so just loop through it
         for actual_cond in cond_in:
             resized_actual_cond = actual_cond.copy()
@@ -146,12 +167,19 @@ def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: Inde
                         # when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
                         for cond_key, cond_value in new_cond_item.items():
                             if isinstance(cond_value, torch.Tensor):
-                                if cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim):
+                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
+                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
                                     new_cond_item[cond_key] = window.get_tensor(cond_value, device)
+                            # Handle audio_embed (temporal dim is 1)
+                            elif cond_key == "audio_embed" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
+                                audio_cond = cond_value.cond
+                                if audio_cond.ndim > 1 and audio_cond.size(1) == x_in.size(self.dim):
+                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(audio_cond, device, dim=1))
                             # if has cond that is a Tensor, check if needs to be subset
                             elif hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                if cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim):
-                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device))
+                                if  (self.dim < cond_value.cond.ndim and cond_value.cond.size(self.dim) == x_in.size(self.dim)) or \
+                                    (cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim)):
+                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device, retain_index_list=self.cond_retain_index_list))
                             elif cond_key == "num_video_frames": # for SVD
                                 new_cond_item[cond_key] = cond_value._copy_with(cond_value.cond)
                                 new_cond_item[cond_key].cond = window.context_length
@@ -164,7 +192,7 @@ def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: Inde
         return resized_cond
 
     def set_step(self, timestep: torch.Tensor, model_options: dict[str]):
-        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep, rtol=0.0001)
+        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep[0], rtol=0.0001)
         matches = torch.nonzero(mask)
         if torch.numel(matches) == 0:
             raise Exception("No sample_sigmas matched current timestep; something went wrong.")
@@ -173,7 +201,7 @@ def set_step(self, timestep: torch.Tensor, model_options: dict[str]):
     def get_context_windows(self, model: BaseModel, x_in: torch.Tensor, model_options: dict[str]) -> list[IndexListContextWindow]:
         full_length = x_in.size(self.dim) # TODO: choose dim based on model
         context_windows = self.context_schedule.func(full_length, self, model_options)
-        context_windows = [IndexListContextWindow(window, dim=self.dim) for window in context_windows]
+        context_windows = [IndexListContextWindow(window, dim=self.dim, total_frames=full_length) for window in context_windows]
         return context_windows
 
     def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
@@ -250,8 +278,8 @@ def combine_context_window_results(self, x_in: torch.Tensor, sub_conds_out, sub_
                     prev_weight = (bias_total / (bias_total + bias))
                     new_weight = (bias / (bias_total + bias))
                     # account for dims of tensors
-                    idx_window = [slice(None)] * self.dim + [idx]
-                    pos_window = [slice(None)] * self.dim + [pos]
+                    idx_window = tuple([slice(None)] * self.dim + [idx])
+                    pos_window = tuple([slice(None)] * self.dim + [pos])
                     # apply new values
                     conds_final[i][idx_window] = conds_final[i][idx_window] * prev_weight + sub_conds_out[i][pos_window] * new_weight
                     biases_final[i][idx] = bias_total + bias
@@ -287,6 +315,28 @@ def create_prepare_sampling_wrapper(model: ModelPatcher):
     )
 
 
+def _sampler_sample_wrapper(executor, guider, sigmas, extra_args, callback, noise, *args, **kwargs):
+    model_options = extra_args.get("model_options", None)
+    if model_options is None:
+        raise Exception("model_options not found in sampler_sample_wrapper; this should never happen, something went wrong.")
+    handler: IndexListContextHandler = model_options.get("context_handler", None)
+    if handler is None:
+        raise Exception("context_handler not found in sampler_sample_wrapper; this should never happen, something went wrong.")
+    if not handler.freenoise:
+        return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)
+    noise = apply_freenoise(noise, handler.dim, handler.context_length, handler.context_overlap, extra_args["seed"])
+
+    return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)
+
+
+def create_sampler_sample_wrapper(model: ModelPatcher):
+    model.add_wrapper_with_key(
+        comfy.patcher_extension.WrappersMP.SAMPLER_SAMPLE,
+        "ContextWindows_sampler_sample",
+        _sampler_sample_wrapper
+    )
+
+
 def match_weights_to_dim(weights: list[float], x_in: torch.Tensor, dim: int, device=None) -> torch.Tensor:
     total_dims = len(x_in.shape)
     weights_tensor = torch.Tensor(weights).to(device=device)
@@ -538,3 +588,29 @@ def shift_window_to_end(window: list[int], num_frames: int):
     for i in range(len(window)):
         # 2) add end_delta to each val to slide windows to end
         window[i] = window[i] + end_delta
+
+
+# https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved/blob/90fb1331201a4b29488089e4fbffc0d82cc6d0a9/animatediff/sample_settings.py#L465
+def apply_freenoise(noise: torch.Tensor, dim: int, context_length: int, context_overlap: int, seed: int):
+    logging.info("Context windows: Applying FreeNoise")
+    generator = torch.Generator(device='cpu').manual_seed(seed)
+    latent_video_length = noise.shape[dim]
+    delta = context_length - context_overlap
+
+    for start_idx in range(0, latent_video_length - context_length, delta):
+        place_idx = start_idx + context_length
+
+        actual_delta = min(delta, latent_video_length - place_idx)
+        if actual_delta <= 0:
+            break
+
+        list_idx = torch.randperm(actual_delta, generator=generator, device='cpu') + start_idx
+
+        source_slice = [slice(None)] * noise.ndim
+        source_slice[dim] = list_idx
+        target_slice = [slice(None)] * noise.ndim
+        target_slice[dim] = slice(place_idx, place_idx + actual_delta)
+
+        noise[tuple(target_slice)] = noise[tuple(source_slice)]
+
+    return noise
@@ -586,7 +586,6 @@ def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, trans
         cap_feats = self.cap_embedder(cap_feats)  # (N, L, D)  # todo check if able to batchify w.o. redundant compute
 
         patches = transformer_options.get("patches", {})
-        transformer_options = kwargs.get("transformer_options", {})
         x_is_tensor = isinstance(x, torch.Tensor)
         img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, t, num_tokens, transformer_options=transformer_options)
         freqs_cis = freqs_cis.to(img.device)
 
@@ -134,7 +134,7 @@ def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_mod
         if not unet_config.get("disable_unet_model_creation", False):
             if model_config.custom_operations is None:
                 fp8 = model_config.optimizations.get("fp8", False)
-                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8, model_config=model_config)
+                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, model_config=model_config)
             else:
                 operations = model_config.custom_operations
             self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
@@ -329,18 +329,6 @@ def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_
             extra_sds.append(self.model_config.process_clip_vision_state_dict_for_saving(clip_vision_state_dict))
 
         unet_state_dict = self.diffusion_model.state_dict()
-
-        if self.model_config.scaled_fp8 is not None:
-            unet_state_dict["scaled_fp8"] = torch.tensor([], dtype=self.model_config.scaled_fp8)
-
-        # Save mixed precision metadata
-        if hasattr(self.model_config, 'layer_quant_config') and self.model_config.layer_quant_config:
-            metadata = {
-                "format_version": "1.0",
-                "layers": self.model_config.layer_quant_config
-            }
-            unet_state_dict["_quantization_metadata"] = metadata
-
         unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
 
         if self.model_type == ModelType.V_PREDICTION:
 
@@ -6,20 +6,6 @@
 import logging
 import torch
 
-
-def detect_layer_quantization(metadata):
-    quant_key = "_quantization_metadata"
-    if metadata is not None and quant_key in metadata:
-        quant_metadata = metadata.pop(quant_key)
-        quant_metadata = json.loads(quant_metadata)
-        if isinstance(quant_metadata, dict) and "layers" in quant_metadata:
-            logging.info(f"Found quantization metadata (version {quant_metadata.get('format_version', 'unknown')})")
-            return quant_metadata["layers"]
-        else:
-            raise ValueError("Invalid quantization metadata format")
-    return None
-
-
 def count_blocks(state_dict_keys, prefix_string):
     count = 0
     while True:
@@ -767,22 +753,11 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal
     if model_config is None and use_base_if_no_match:
         model_config = comfy.supported_models_base.BASE(unet_config)
 
-    scaled_fp8_key = "{}scaled_fp8".format(unet_key_prefix)
-    if scaled_fp8_key in state_dict:
-        scaled_fp8_weight = state_dict.pop(scaled_fp8_key)
-        model_config.scaled_fp8 = scaled_fp8_weight.dtype
-        if model_config.scaled_fp8 == torch.float32:
-            model_config.scaled_fp8 = torch.float8_e4m3fn
-        if scaled_fp8_weight.nelement() == 2:
-            model_config.optimizations["fp8"] = False
-        else:
-            model_config.optimizations["fp8"] = True
-
     # Detect per-layer quantization (mixed precision)
-    layer_quant_config = detect_layer_quantization(metadata)
-    if layer_quant_config:
-        model_config.layer_quant_config = layer_quant_config
-        logging.info(f"Detected mixed precision quantization: {len(layer_quant_config)} layers quantized")
+    quant_config = comfy.utils.detect_layer_quantization(state_dict, unet_key_prefix)
+    if quant_config:
+        model_config.quant_config = quant_config
+        logging.info("Detected mixed precision quantization")
 
     return model_config
 
 
@@ -126,27 +126,11 @@ class LowVramPatch:
     def __init__(self, key, patches, convert_func=None, set_func=None):
         self.key = key
         self.patches = patches
-        self.convert_func = convert_func
+        self.convert_func = convert_func # TODO: remove
         self.set_func = set_func
 
     def __call__(self, weight):
-        intermediate_dtype = weight.dtype
-        if self.convert_func is not None:
-            weight = self.convert_func(weight, inplace=False)
-
-        if intermediate_dtype not in [torch.float32, torch.float16, torch.bfloat16]: #intermediate_dtype has to be one that is supported in math ops
-            intermediate_dtype = torch.float32
-            out = comfy.lora.calculate_weight(self.patches[self.key], weight.to(intermediate_dtype), self.key, intermediate_dtype=intermediate_dtype)
-            if self.set_func is None:
-                return comfy.float.stochastic_rounding(out, weight.dtype, seed=string_to_seed(self.key))
-            else:
-                return self.set_func(out, seed=string_to_seed(self.key), return_weight=True)
-
-        out = comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype)
-        if self.set_func is not None:
-            return self.set_func(out, seed=string_to_seed(self.key), return_weight=True).to(dtype=intermediate_dtype)
-        else:
-            return out
+        return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=weight.dtype)
 
 #The above patch logic may cast up the weight to fp32, and do math. Go with fp32 x 3
 LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR = 3