Skip to content

Commit 7b9343c

Browse files
Merge branch 'master' into update-frontend-1.32.10
2 parents d2b5aa4 + f8b981a commit 7b9343c

File tree

25 files changed

+3197
-611
lines changed

25 files changed

+3197
-611
lines changed

.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
As of the time of writing this you need this preview driver for best results:
2-
https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-PREVIEW.html
1+
As of the time of writing this you need this driver for best results:
2+
https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-7-1-1.html
33

44
HOW TO RUN:
55

@@ -25,3 +25,4 @@ In the ComfyUI directory you will find a file: extra_model_paths.yaml.example
2525
Rename this file to: extra_model_paths.yaml and edit it with your favorite text editor.
2626

2727

28+

.github/workflows/release-stable-all.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ jobs:
6565
contents: "write"
6666
packages: "write"
6767
pull-requests: "read"
68-
name: "Release AMD ROCm 6.4.4"
68+
name: "Release AMD ROCm 7.1.1"
6969
uses: ./.github/workflows/stable-release.yml
7070
with:
7171
git_tag: ${{ inputs.git_tag }}
72-
cache_tag: "rocm644"
72+
cache_tag: "rocm711"
7373
python_minor: "12"
7474
python_patch: "10"
7575
rel_name: "amd"

app/user_manager.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,26 @@ def get_request_user_id(self, request):
5959
user = "default"
6060
if args.multi_user and "comfy-user" in request.headers:
6161
user = request.headers["comfy-user"]
62+
# Block System Users (use same error message to prevent probing)
63+
if user.startswith(folder_paths.SYSTEM_USER_PREFIX):
64+
raise KeyError("Unknown user: " + user)
6265

6366
if user not in self.users:
6467
raise KeyError("Unknown user: " + user)
6568

6669
return user
6770

6871
def get_request_user_filepath(self, request, file, type="userdata", create_dir=True):
69-
user_directory = folder_paths.get_user_directory()
70-
7172
if type == "userdata":
72-
root_dir = user_directory
73+
root_dir = folder_paths.get_user_directory()
7374
else:
7475
raise KeyError("Unknown filepath type:" + type)
7576

7677
user = self.get_request_user_id(request)
77-
path = user_root = os.path.abspath(os.path.join(root_dir, user))
78+
user_root = folder_paths.get_public_user_directory(user)
79+
if user_root is None:
80+
return None
81+
path = user_root
7882

7983
# prevent leaving /{type}
8084
if os.path.commonpath((root_dir, user_root)) != root_dir:
@@ -101,7 +105,11 @@ def add_user(self, name):
101105
name = name.strip()
102106
if not name:
103107
raise ValueError("username not provided")
108+
if name.startswith(folder_paths.SYSTEM_USER_PREFIX):
109+
raise ValueError("System User prefix not allowed")
104110
user_id = re.sub("[^a-zA-Z0-9-_]+", '-', name)
111+
if user_id.startswith(folder_paths.SYSTEM_USER_PREFIX):
112+
raise ValueError("System User prefix not allowed")
105113
user_id = user_id + "_" + str(uuid.uuid4())
106114

107115
self.users[user_id] = name
@@ -132,7 +140,10 @@ async def post_users(request):
132140
if username in self.users.values():
133141
return web.json_response({"error": "Duplicate username."}, status=400)
134142

135-
user_id = self.add_user(username)
143+
try:
144+
user_id = self.add_user(username)
145+
except ValueError as e:
146+
return web.json_response({"error": str(e)}, status=400)
136147
return web.json_response(user_id)
137148

138149
@routes.get("/userdata")
@@ -424,7 +435,7 @@ async def move_userdata(request):
424435
return source
425436

426437
dest = get_user_data_path(request, check_exists=False, param="dest")
427-
if not isinstance(source, str):
438+
if not isinstance(dest, str):
428439
return dest
429440

430441
overwrite = request.query.get("overwrite", 'true') != "false"

comfy/cli_args.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ class LatentPreviewMethod(enum.Enum):
131131

132132
parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
133133

134-
parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
134+
parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
135+
parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
135136

136137
parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")
137138

comfy/latent_formats.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ class HunyuanVideo(LatentFormat):
431431
]
432432

433433
latent_rgb_factors_bias = [ 0.0259, -0.0192, -0.0761]
434+
taesd_decoder_name = "taehv"
434435

435436
class Cosmos1CV8x8x8(LatentFormat):
436437
latent_channels = 16
@@ -494,7 +495,7 @@ def __init__(self):
494495
]).view(1, self.latent_channels, 1, 1, 1)
495496

496497

497-
self.taesd_decoder_name = None #TODO
498+
self.taesd_decoder_name = "lighttaew2_1"
498499

499500
def process_in(self, latent):
500501
latents_mean = self.latents_mean.to(latent.device, latent.dtype)
@@ -565,6 +566,7 @@ class Wan22(Wan21):
565566

566567
def __init__(self):
567568
self.scale_factor = 1.0
569+
self.taesd_decoder_name = "lighttaew2_2"
568570
self.latents_mean = torch.tensor([
569571
-0.2289, -0.0052, -0.1323, -0.2339, -0.2799, 0.0174, 0.1838, 0.1557,
570572
-0.1382, 0.0542, 0.2813, 0.0891, 0.1570, -0.0098, 0.0375, -0.1825,
@@ -719,6 +721,7 @@ class HunyuanVideo15(LatentFormat):
719721
latent_channels = 32
720722
latent_dimensions = 3
721723
scale_factor = 1.03682
724+
taesd_decoder_name = "lighttaehy1_5"
722725

723726
class Hunyuan3Dv2(LatentFormat):
724727
latent_channels = 64

comfy/ldm/flux/model.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,10 @@ def forward_orig(
171171
pe = None
172172

173173
blocks_replace = patches_replace.get("dit", {})
174+
transformer_options["total_blocks"] = len(self.double_blocks)
175+
transformer_options["block_type"] = "double"
174176
for i, block in enumerate(self.double_blocks):
177+
transformer_options["block_index"] = i
175178
if ("double_block", i) in blocks_replace:
176179
def block_wrap(args):
177180
out = {}
@@ -215,7 +218,10 @@ def block_wrap(args):
215218
if self.params.global_modulation:
216219
vec, _ = self.single_stream_modulation(vec_orig)
217220

221+
transformer_options["total_blocks"] = len(self.single_blocks)
222+
transformer_options["block_type"] = "single"
218223
for i, block in enumerate(self.single_blocks):
224+
transformer_options["block_index"] = i
219225
if ("single_block", i) in blocks_replace:
220226
def block_wrap(args):
221227
out = {}

comfy/ldm/lumina/model.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,23 +509,35 @@ def patchify_and_embed(
509509

510510
if self.pad_tokens_multiple is not None:
511511
pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
512-
cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
512+
cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype, copy=True).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
513513

514514
cap_pos_ids = torch.zeros(bsz, cap_feats.shape[1], 3, dtype=torch.float32, device=device)
515515
cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0
516516

517517
B, C, H, W = x.shape
518518
x = self.x_embedder(x.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
519519

520+
rope_options = transformer_options.get("rope_options", None)
521+
h_scale = 1.0
522+
w_scale = 1.0
523+
h_start = 0
524+
w_start = 0
525+
if rope_options is not None:
526+
h_scale = rope_options.get("scale_y", 1.0)
527+
w_scale = rope_options.get("scale_x", 1.0)
528+
529+
h_start = rope_options.get("shift_y", 0.0)
530+
w_start = rope_options.get("shift_x", 0.0)
531+
520532
H_tokens, W_tokens = H // pH, W // pW
521533
x_pos_ids = torch.zeros((bsz, x.shape[1], 3), dtype=torch.float32, device=device)
522534
x_pos_ids[:, :, 0] = cap_feats.shape[1] + 1
523-
x_pos_ids[:, :, 1] = torch.arange(H_tokens, dtype=torch.float32, device=device).view(-1, 1).repeat(1, W_tokens).flatten()
524-
x_pos_ids[:, :, 2] = torch.arange(W_tokens, dtype=torch.float32, device=device).view(1, -1).repeat(H_tokens, 1).flatten()
535+
x_pos_ids[:, :, 1] = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
536+
x_pos_ids[:, :, 2] = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
525537

526538
if self.pad_tokens_multiple is not None:
527539
pad_extra = (-x.shape[1]) % self.pad_tokens_multiple
528-
x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
540+
x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
529541
x_pos_ids = torch.nn.functional.pad(x_pos_ids, (0, 0, 0, pad_extra))
530542

531543
freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)

comfy/lora.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,15 @@ def model_lora_keys_unet(model, key_map={}):
313313
key_map["transformer.{}".format(key_lora)] = k
314314
key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format
315315

316+
if isinstance(model, comfy.model_base.Lumina2):
317+
diffusers_keys = comfy.utils.z_image_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
318+
for k in diffusers_keys:
319+
if k.endswith(".weight"):
320+
to = diffusers_keys[k]
321+
key_lora = k[:-len(".weight")]
322+
key_map["diffusion_model.{}".format(key_lora)] = to
323+
key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = to
324+
316325
return key_map
317326

318327

comfy/model_management.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
689689
loaded_memory = loaded_model.model_loaded_memory()
690690
current_free_mem = get_free_memory(torch_dev) + loaded_memory
691691

692-
lowvram_model_memory = max(128 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
692+
lowvram_model_memory = max(0, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
693693
lowvram_model_memory = lowvram_model_memory - loaded_memory
694694

695695
if lowvram_model_memory == 0:
@@ -1012,9 +1012,18 @@ def force_channels_last():
10121012

10131013

10141014
STREAMS = {}
1015-
NUM_STREAMS = 1
1016-
if args.async_offload:
1017-
NUM_STREAMS = 2
1015+
NUM_STREAMS = 0
1016+
if args.async_offload is not None:
1017+
NUM_STREAMS = args.async_offload
1018+
else:
1019+
# Enable by default on Nvidia
1020+
if is_nvidia():
1021+
NUM_STREAMS = 2
1022+
1023+
if args.disable_async_offload:
1024+
NUM_STREAMS = 0
1025+
1026+
if NUM_STREAMS > 0:
10181027
logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
10191028

10201029
def current_stream(device):
@@ -1030,7 +1039,10 @@ def current_stream(device):
10301039
stream_counters = {}
10311040
def get_offload_stream(device):
10321041
stream_counter = stream_counters.get(device, 0)
1033-
if NUM_STREAMS <= 1:
1042+
if NUM_STREAMS == 0:
1043+
return None
1044+
1045+
if torch.compiler.is_compiling():
10341046
return None
10351047

10361048
if device in STREAMS:
@@ -1043,15 +1055,19 @@ def get_offload_stream(device):
10431055
elif is_device_cuda(device):
10441056
ss = []
10451057
for k in range(NUM_STREAMS):
1046-
ss.append(torch.cuda.Stream(device=device, priority=0))
1058+
s1 = torch.cuda.Stream(device=device, priority=0)
1059+
s1.as_context = torch.cuda.stream
1060+
ss.append(s1)
10471061
STREAMS[device] = ss
10481062
s = ss[stream_counter]
10491063
stream_counters[device] = stream_counter
10501064
return s
10511065
elif is_device_xpu(device):
10521066
ss = []
10531067
for k in range(NUM_STREAMS):
1054-
ss.append(torch.xpu.Stream(device=device, priority=0))
1068+
s1 = torch.xpu.Stream(device=device, priority=0)
1069+
s1.as_context = torch.xpu.stream
1070+
ss.append(s1)
10551071
STREAMS[device] = ss
10561072
s = ss[stream_counter]
10571073
stream_counters[device] = stream_counter
@@ -1069,12 +1085,19 @@ def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, str
10691085
if dtype is None or weight.dtype == dtype:
10701086
return weight
10711087
if stream is not None:
1072-
with stream:
1088+
wf_context = stream
1089+
if hasattr(wf_context, "as_context"):
1090+
wf_context = wf_context.as_context(stream)
1091+
with wf_context:
10731092
return weight.to(dtype=dtype, copy=copy)
10741093
return weight.to(dtype=dtype, copy=copy)
10751094

1095+
10761096
if stream is not None:
1077-
with stream:
1097+
wf_context = stream
1098+
if hasattr(wf_context, "as_context"):
1099+
wf_context = wf_context.as_context(stream)
1100+
with wf_context:
10781101
r = torch.empty_like(weight, dtype=dtype, device=device)
10791102
r.copy_(weight, non_blocking=non_blocking)
10801103
else:

0 commit comments

Comments
 (0)