comfyanonymous · city96 · Aug 19, 2025
@@ -252,6 +252,9 @@ def __init__(self, model, load_device, offload_device, size=0, weight_inplace_up
         if not hasattr(self.model, 'model_lowvram'):
             self.model.model_lowvram = False
 
+        if not hasattr(self.model, 'lowvram_hints'):
+            self.model.lowvram_hints = []
+
         if not hasattr(self.model, 'current_weight_patches_uuid'):
             self.model.current_weight_patches_uuid = None
 
@@ -596,6 +599,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
             patch_counter = 0
             lowvram_counter = 0
             loading = self._load_list()
+            hints = self.get_model_object("lowvram_hints")
 
             load_completely = []
             loading.sort(reverse=True)
@@ -611,7 +615,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                 bias_key = "{}.bias".format(n)
 
                 if not full_load and hasattr(m, "comfy_cast_weights"):
-                    if mem_counter + module_mem >= lowvram_model_memory:
+                    if (mem_counter + module_mem) >= lowvram_model_memory or any(x in n for x in hints):
                         lowvram_weight = True
                         lowvram_counter += 1
                         if hasattr(m, "prev_comfy_cast_weights"): #Already lowvramed
@@ -676,10 +680,10 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                 x[2].to(device_to)
 
             if lowvram_counter > 0:
-                logging.info("loaded partially {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), patch_counter))
+                logging.info("loaded partially {}MB {}MB {}".format(round(lowvram_model_memory / (1024 * 1024)), round(mem_counter / (1024 * 1024)), patch_counter))
                 self.model.model_lowvram = True
             else:
-                logging.info("loaded completely {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely {}MB {}MB {}".format(round(lowvram_model_memory / (1024 * 1024)), round(mem_counter / (1024 * 1024)), full_load))
                 self.model.model_lowvram = False
                 if full_load:
                     self.model.to(device_to)

@@ -316,6 +316,32 @@ def patch(self, model, dtype):
         return (m, )
 
 
+class ModelLowvramHint:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": ("MODEL", ),
+                "hints": ("STRING", {"default": "img_mlp.\ntxt_mlp.\n", "multiline": True}),
+            }
+        }
+
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "set_hints"
+    EXPERIMENTAL = True
+
+    DESCRIPTION = "Force some weights to always use lowvram. One rule per line."
+    CATEGORY = "advanced/debug/model"
+
+    def set_hints(self, model, hints=""):
+        hints = [x.strip() for x in hints.split("\n") if x.strip()]
+        if not hints:
+            return model
+
+        m = model.clone()
+        m.add_object_patch("lowvram_hints", hints)
+        return (m, )
+
 NODE_CLASS_MAPPINGS = {
     "ModelSamplingDiscrete": ModelSamplingDiscrete,
     "ModelSamplingContinuousEDM": ModelSamplingContinuousEDM,
@@ -326,4 +352,5 @@ def patch(self, model, dtype):
     "ModelSamplingFlux": ModelSamplingFlux,
     "RescaleCFG": RescaleCFG,
     "ModelComputeDtype": ModelComputeDtype,
+    "ModelLowvramHint": ModelLowvramHint,
 }