Enable async offloading by default on Nvidia. (#10953)

comfyanonymous · web-flow · commit 9d8a817985bb · 2025-11-27T17:46:12.000-05:00
Add --disable-async-offload to disable it.

If this causes OOMs that go away when you --disable-async-offload please
report it.
diff --git a/comfy/cli_args.py b/comfy/cli_args.py
@@ -131,7 +131,8 @@ class LatentPreviewMethod(enum.Enum):
 
 parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
 
-parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
+parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
+parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
 
 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")
 
diff --git a/comfy/model_management.py b/comfy/model_management.py
@@ -1013,8 +1013,17 @@ def force_channels_last():
 
 STREAMS = {}
 NUM_STREAMS = 0
-if args.async_offload:
-    NUM_STREAMS = 2
+if args.async_offload is not None:
+    NUM_STREAMS = args.async_offload
+else:
+    #  Enable by default on Nvidia
+    if is_nvidia():
+        NUM_STREAMS = 2
+
+if args.disable_async_offload:
+    NUM_STREAMS = 0
+
+if NUM_STREAMS > 0:
     logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
 
 def current_stream(device):