Comfy-Org
diff --git a/‎comfy/cli_args.py‎
Lines changed: 1 addition & 0 deletions b/‎comfy/cli_args.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎comfy/model_patcher.py‎
Lines changed: 7 additions & 2 deletions b/‎comfy/model_patcher.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎comfy/ops.py‎
Lines changed: 5 additions & 1 deletion b/‎comfy/ops.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎comfy/quant_ops.py‎
Lines changed: 3 additions & 2 deletions b/‎comfy/quant_ops.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎comfy/sd.py‎
Lines changed: 14 additions & 0 deletions b/‎comfy/sd.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎comfy_api_nodes/apinode_utils.py‎
Lines changed: 1 addition & 108 deletions b/‎comfy_api_nodes/apinode_utils.py‎
Lines changed: 1 addition & 108 deletions
diff --git a/‎comfy_api_nodes/nodes_bfl.py‎
Lines changed: 5 additions & 38 deletions b/‎comfy_api_nodes/nodes_bfl.py‎
Lines changed: 5 additions & 38 deletions
diff --git a/‎comfy_api_nodes/nodes_bytedance.py‎
Lines changed: 6 additions & 6 deletions b/‎comfy_api_nodes/nodes_bytedance.py‎
Lines changed: 6 additions & 6 deletions
@@ -105,6 +105,7 @@ class LatentPreviewMethod(enum.Enum):
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
+cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
 
@@ -276,6 +276,9 @@ def model_size(self):
         self.size = comfy.model_management.module_size(self.model)
         return self.size
 
+    def get_ram_usage(self):
+        return self.model_size()
+
     def loaded_size(self):
         return self.model.model_loaded_weight_memory
 
@@ -655,6 +658,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
             mem_counter = 0
             patch_counter = 0
             lowvram_counter = 0
+            lowvram_mem_counter = 0
             loading = self._load_list()
 
             load_completely = []
@@ -675,6 +679,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                     if mem_counter + module_mem >= lowvram_model_memory:
                         lowvram_weight = True
                         lowvram_counter += 1
+                        lowvram_mem_counter += module_mem
                         if hasattr(m, "prev_comfy_cast_weights"): #Already lowvramed
                             continue
 
@@ -748,10 +753,10 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                     self.pin_weight_to_device("{}.{}".format(n, param))
 
             if lowvram_counter > 0:
-                logging.info("loaded partially {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), patch_counter))
                 self.model.model_lowvram = True
             else:
-                logging.info("loaded completely {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
                 self.model.model_lowvram = False
                 if full_load:
                     self.model.to(device_to)
 
@@ -421,14 +421,18 @@ def fp8_linear(self, input):
 
         if scale_input is None:
             scale_input = torch.ones((), device=input.device, dtype=torch.float32)
+            input = torch.clamp(input, min=-448, max=448, out=input)
+            input = input.reshape(-1, input_shape[2]).to(dtype).contiguous()
+            layout_params_weight = {'scale': scale_input, 'orig_dtype': input_dtype}
+            quantized_input = QuantizedTensor(input.reshape(-1, input_shape[2]).to(dtype).contiguous(), TensorCoreFP8Layout, layout_params_weight)
         else:
             scale_input = scale_input.to(input.device)
+            quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
 
         # Wrap weight in QuantizedTensor - this enables unified dispatch
         # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
         layout_params_weight = {'scale': scale_weight, 'orig_dtype': input_dtype}
         quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
-        quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
         o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
 
         uncast_bias_weight(self, w, bias, offload_stream)
 
@@ -357,9 +357,10 @@ def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn):
             scale = torch.tensor(scale)
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
-        lp_amax = torch.finfo(dtype).max
         tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
-        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
+        # TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
+        # lp_amax = torch.finfo(dtype).max
+        # torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
         qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
 
         layout_params = {
 
@@ -143,6 +143,9 @@ def clone(self):
         n.apply_hooks_to_conds = self.apply_hooks_to_conds
         return n
 
+    def get_ram_usage(self):
+        return self.patcher.get_ram_usage()
+
     def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
         return self.patcher.add_patches(patches, strength_patch, strength_model)
 
@@ -293,6 +296,7 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
         self.working_dtypes = [torch.bfloat16, torch.float32]
         self.disable_offload = False
         self.not_video = False
+        self.size = None
 
         self.downscale_index_formula = None
         self.upscale_index_formula = None
@@ -595,6 +599,16 @@ def estimate_memory(shape, dtype, num_layers = 16, kv_cache_multiplier = 2):
 
         self.patcher = comfy.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device)
         logging.info("VAE load device: {}, offload device: {}, dtype: {}".format(self.device, offload_device, self.vae_dtype))
+        self.model_size()
+
+    def model_size(self):
+        if self.size is not None:
+            return self.size
+        self.size = comfy.model_management.module_size(self.first_stage_model)
+        return self.size
+
+    def get_ram_usage(self):
+        return self.model_size()
 
     def throw_exception_if_invalid(self):
         if self.first_stage_model is None:
 
@@ -1,15 +1,12 @@
 from __future__ import annotations
 import aiohttp
 import mimetypes
-from typing import Optional, Union
-from comfy.utils import common_upscale
+from typing import Union
 from server import PromptServer
-from comfy.cli_args import args
 
 import numpy as np
 from PIL import Image
 import torch
-import math
 import base64
 from io import BytesIO
 
@@ -60,85 +57,6 @@ async def validate_and_cast_response(
     return torch.stack(image_tensors, dim=0)
 
 
-def validate_aspect_ratio(
-    aspect_ratio: str,
-    minimum_ratio: float,
-    maximum_ratio: float,
-    minimum_ratio_str: str,
-    maximum_ratio_str: str,
-) -> float:
-    """Validates and casts an aspect ratio string to a float.
-
-    Args:
-        aspect_ratio: The aspect ratio string to validate.
-        minimum_ratio: The minimum aspect ratio.
-        maximum_ratio: The maximum aspect ratio.
-        minimum_ratio_str: The minimum aspect ratio string.
-        maximum_ratio_str: The maximum aspect ratio string.
-
-    Returns:
-        The validated and cast aspect ratio.
-
-    Raises:
-        Exception: If the aspect ratio is not valid.
-    """
-    # get ratio values
-    numbers = aspect_ratio.split(":")
-    if len(numbers) != 2:
-        raise TypeError(
-            f"Aspect ratio must be in the format X:Y, such as 16:9, but was {aspect_ratio}."
-        )
-    try:
-        numerator = int(numbers[0])
-        denominator = int(numbers[1])
-    except ValueError as exc:
-        raise TypeError(
-            f"Aspect ratio must contain numbers separated by ':', such as 16:9, but was {aspect_ratio}."
-        ) from exc
-    calculated_ratio = numerator / denominator
-    # if not close to minimum and maximum, check bounds
-    if not math.isclose(calculated_ratio, minimum_ratio) or not math.isclose(
-        calculated_ratio, maximum_ratio
-    ):
-        if calculated_ratio < minimum_ratio:
-            raise TypeError(
-                f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
-            )
-        if calculated_ratio > maximum_ratio:
-            raise TypeError(
-                f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
-            )
-    return aspect_ratio
-
-
-async def download_url_to_bytesio(
-    url: str, timeout: int = None, auth_kwargs: Optional[dict[str, str]] = None
-) -> BytesIO:
-    """Downloads content from a URL using requests and returns it as BytesIO.
-
-    Args:
-        url: The URL to download.
-        timeout: Request timeout in seconds. Defaults to None (no timeout).
-
-    Returns:
-        BytesIO object containing the downloaded content.
-    """
-    headers = {}
-    if url.startswith("/proxy/"):
-        url = str(args.comfy_api_base).rstrip("/") + url
-        auth_token = auth_kwargs.get("auth_token")
-        comfy_api_key = auth_kwargs.get("comfy_api_key")
-        if auth_token:
-            headers["Authorization"] = f"Bearer {auth_token}"
-        elif comfy_api_key:
-            headers["X-API-KEY"] = comfy_api_key
-    timeout_cfg = aiohttp.ClientTimeout(total=timeout) if timeout else None
-    async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
-        async with session.get(url, headers=headers) as resp:
-            resp.raise_for_status()  # Raises HTTPError for bad responses (4XX or 5XX)
-            return BytesIO(await resp.read())
-
-
 def text_filepath_to_base64_string(filepath: str) -> str:
     """Converts a text file to a base64 string."""
     with open(filepath, "rb") as f:
@@ -153,28 +71,3 @@ def text_filepath_to_data_uri(filepath: str) -> str:
     if mime_type is None:
         mime_type = "application/octet-stream"
     return f"data:{mime_type};base64,{base64_string}"
-
-
-def resize_mask_to_image(
-    mask: torch.Tensor,
-    image: torch.Tensor,
-    upscale_method="nearest-exact",
-    crop="disabled",
-    allow_gradient=True,
-    add_channel_dim=False,
-):
-    """
-    Resize mask to be the same dimensions as an image, while maintaining proper format for API calls.
-    """
-    _, H, W, _ = image.shape
-    mask = mask.unsqueeze(-1)
-    mask = mask.movedim(-1, 1)
-    mask = common_upscale(
-        mask, width=W, height=H, upscale_method=upscale_method, crop=crop
-    )
-    mask = mask.movedim(1, -1)
-    if not add_channel_dim:
-        mask = mask.squeeze(-1)
-    if not allow_gradient:
-        mask = (mask > 0.5).float()
-    return mask
@@ -5,10 +5,6 @@
 from typing_extensions import override
 
 from comfy_api.latest import IO, ComfyExtension
-from comfy_api_nodes.apinode_utils import (
-    resize_mask_to_image,
-    validate_aspect_ratio,
-)
 from comfy_api_nodes.apis.bfl_api import (
     BFLFluxExpandImageRequest,
     BFLFluxFillImageRequest,
@@ -23,8 +19,10 @@
     ApiEndpoint,
     download_url_to_image_tensor,
     poll_op,
+    resize_mask_to_image,
     sync_op,
     tensor_to_base64_string,
+    validate_aspect_ratio_string,
     validate_string,
 )
 
@@ -43,11 +41,6 @@ class FluxProUltraImageNode(IO.ComfyNode):
     Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution.
     """
 
-    MINIMUM_RATIO = 1 / 4
-    MAXIMUM_RATIO = 4 / 1
-    MINIMUM_RATIO_STR = "1:4"
-    MAXIMUM_RATIO_STR = "4:1"
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
@@ -112,16 +105,7 @@ def define_schema(cls) -> IO.Schema:
 
     @classmethod
     def validate_inputs(cls, aspect_ratio: str):
-        try:
-            validate_aspect_ratio(
-                aspect_ratio,
-                minimum_ratio=cls.MINIMUM_RATIO,
-                maximum_ratio=cls.MAXIMUM_RATIO,
-                minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-                maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-            )
-        except Exception as e:
-            return str(e)
+        validate_aspect_ratio_string(aspect_ratio, (1, 4), (4, 1))
         return True
 
     @classmethod
@@ -145,13 +129,7 @@ async def execute(
                 prompt=prompt,
                 prompt_upsampling=prompt_upsampling,
                 seed=seed,
-                aspect_ratio=validate_aspect_ratio(
-                    aspect_ratio,
-                    minimum_ratio=cls.MINIMUM_RATIO,
-                    maximum_ratio=cls.MAXIMUM_RATIO,
-                    minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-                    maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-                ),
+                aspect_ratio=aspect_ratio,
                 raw=raw,
                 image_prompt=(image_prompt if image_prompt is None else tensor_to_base64_string(image_prompt)),
                 image_prompt_strength=(None if image_prompt is None else round(image_prompt_strength, 2)),
@@ -180,11 +158,6 @@ class FluxKontextProImageNode(IO.ComfyNode):
     Edits images using Flux.1 Kontext [pro] via api based on prompt and aspect ratio.
     """
 
-    MINIMUM_RATIO = 1 / 4
-    MAXIMUM_RATIO = 4 / 1
-    MINIMUM_RATIO_STR = "1:4"
-    MAXIMUM_RATIO_STR = "4:1"
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
@@ -261,13 +234,7 @@ async def execute(
         seed=0,
         prompt_upsampling=False,
     ) -> IO.NodeOutput:
-        aspect_ratio = validate_aspect_ratio(
-            aspect_ratio,
-            minimum_ratio=cls.MINIMUM_RATIO,
-            maximum_ratio=cls.MAXIMUM_RATIO,
-            minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-            maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-        )
+        validate_aspect_ratio_string(aspect_ratio, (1, 4), (4, 1))
         if input_image is None:
             validate_string(prompt, strip_whitespace=False)
         initial_response = await sync_op(
 
@@ -17,7 +17,7 @@
     poll_op,
     sync_op,
     upload_images_to_comfyapi,
-    validate_image_aspect_ratio_range,
+    validate_image_aspect_ratio,
     validate_image_dimensions,
     validate_string,
 )
@@ -403,7 +403,7 @@ async def execute(
         validate_string(prompt, strip_whitespace=True, min_length=1)
         if get_number_of_images(image) != 1:
             raise ValueError("Exactly one input image is required.")
-        validate_image_aspect_ratio_range(image, (1, 3), (3, 1))
+        validate_image_aspect_ratio(image, (1, 3), (3, 1))
         source_url = (await upload_images_to_comfyapi(cls, image, max_images=1, mime_type="image/png"))[0]
         payload = Image2ImageTaskCreationRequest(
             model=model,
@@ -565,7 +565,7 @@ async def execute(
         reference_images_urls = []
         if n_input_images:
             for i in image:
-                validate_image_aspect_ratio_range(i, (1, 3), (3, 1))
+                validate_image_aspect_ratio(i, (1, 3), (3, 1))
             reference_images_urls = await upload_images_to_comfyapi(
                 cls,
                 image,
@@ -798,7 +798,7 @@ async def execute(
         validate_string(prompt, strip_whitespace=True, min_length=1)
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
         validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
-        validate_image_aspect_ratio_range(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+        validate_image_aspect_ratio(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         image_url = (await upload_images_to_comfyapi(cls, image, max_images=1))[0]
         prompt = (
@@ -923,7 +923,7 @@ async def execute(
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
         for i in (first_frame, last_frame):
             validate_image_dimensions(i, min_width=300, min_height=300, max_width=6000, max_height=6000)
-            validate_image_aspect_ratio_range(i, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+            validate_image_aspect_ratio(i, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         download_urls = await upload_images_to_comfyapi(
             cls,
@@ -1045,7 +1045,7 @@ async def execute(
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "watermark"])
         for image in images:
             validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
-            validate_image_aspect_ratio_range(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+            validate_image_aspect_ratio(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         image_urls = await upload_images_to_comfyapi(cls, images, max_images=4, mime_type="image/png")
         prompt = (