Skip to content

Commit 0f21df8

Browse files
committed
Merge branch 'master' into portable-manager-update
2 parents 334e74b + 70541d4 commit 0f21df8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1812
-845
lines changed

.ci/update_windows/update.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ def pull(repo, remote_name='origin', branch='master'):
5353
repo.stash(ident)
5454
except KeyError:
5555
print("nothing to stash") # noqa: T201
56+
except:
57+
print("Could not stash, cleaning index and trying again.") # noqa: T201
58+
repo.state_cleanup()
59+
repo.index.read_tree(repo.head.peel().tree)
60+
repo.index.write()
61+
try:
62+
repo.stash(ident)
63+
except KeyError:
64+
print("nothing to stash.") # noqa: T201
65+
5666
backup_branch_name = 'backup_branch_{}'.format(datetime.today().strftime('%Y-%m-%d_%H_%M_%S'))
5767
print("creating backup branch: {}".format(backup_branch_name)) # noqa: T201
5868
try:

api_server/routes/internal/internal_routes.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,13 @@ async def get_files(request: web.Request) -> web.Response:
5858
return web.json_response({"error": "Invalid directory type"}, status=400)
5959

6060
directory = get_directory_by_type(directory_type)
61+
62+
def is_visible_file(entry: os.DirEntry) -> bool:
63+
"""Filter out hidden files (e.g., .DS_Store on macOS)."""
64+
return entry.is_file() and not entry.name.startswith('.')
65+
6166
sorted_files = sorted(
62-
(entry for entry in os.scandir(directory) if entry.is_file()),
67+
(entry for entry in os.scandir(directory) if is_visible_file(entry)),
6368
key=lambda entry: -entry.stat().st_mtime
6469
)
6570
return web.json_response([entry.name for entry in sorted_files], status=200)

comfy/cli_args.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ class LatentPreviewMethod(enum.Enum):
9797
Latent2RGB = "latent2rgb"
9898
TAESD = "taesd"
9999

100+
@classmethod
101+
def from_string(cls, value: str):
102+
for member in cls:
103+
if member.value == value:
104+
return member
105+
return None
106+
100107
parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction)
101108

102109
parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")

comfy/context_windows.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class IndexListCallbacks:
8787
COMBINE_CONTEXT_WINDOW_RESULTS = "combine_context_window_results"
8888
EXECUTE_START = "execute_start"
8989
EXECUTE_CLEANUP = "execute_cleanup"
90+
RESIZE_COND_ITEM = "resize_cond_item"
9091

9192
def init_callbacks(self):
9293
return {}
@@ -166,6 +167,18 @@ def get_resized_cond(self, cond_in: list[dict], x_in: torch.Tensor, window: Inde
166167
new_cond_item = cond_item.copy()
167168
# when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
168169
for cond_key, cond_value in new_cond_item.items():
170+
# Allow callbacks to handle custom conditioning items
171+
handled = False
172+
for callback in comfy.patcher_extension.get_all_callbacks(
173+
IndexListCallbacks.RESIZE_COND_ITEM, self.callbacks
174+
):
175+
result = callback(cond_key, cond_value, window, x_in, device, new_cond_item)
176+
if result is not None:
177+
new_cond_item[cond_key] = result
178+
handled = True
179+
break
180+
if handled:
181+
continue
169182
if isinstance(cond_value, torch.Tensor):
170183
if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
171184
(cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):

comfy/k_diffusion/sampling.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,10 +1557,13 @@ def default_er_sde_noise_scaler(x):
15571557

15581558

15591559
@torch.no_grad()
1560-
def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
1560+
def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5, solver_type="phi_1"):
15611561
"""SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
15621562
arXiv: https://arxiv.org/abs/2305.14267 (NeurIPS 2023)
15631563
"""
1564+
if solver_type not in {"phi_1", "phi_2"}:
1565+
raise ValueError("solver_type must be 'phi_1' or 'phi_2'")
1566+
15641567
extra_args = {} if extra_args is None else extra_args
15651568
seed = extra_args.get("seed", None)
15661569
noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1600,8 +1603,14 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
16001603
denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
16011604

16021605
# Step 2
1603-
denoised_d = torch.lerp(denoised, denoised_2, fac)
1604-
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
1606+
if solver_type == "phi_1":
1607+
denoised_d = torch.lerp(denoised, denoised_2, fac)
1608+
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
1609+
elif solver_type == "phi_2":
1610+
b2 = ei_h_phi_2(-h_eta) / r
1611+
b1 = ei_h_phi_1(-h_eta) - b2
1612+
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * (b1 * denoised + b2 * denoised_2)
1613+
16051614
if inject_noise:
16061615
segment_factor = (r - 1) * h * eta
16071616
sde_noise = sde_noise * segment_factor.exp()

comfy/ldm/chroma_radiance/model.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class ChromaRadianceParams(ChromaParams):
3737
nerf_final_head_type: str
3838
# None means use the same dtype as the model.
3939
nerf_embedder_dtype: Optional[torch.dtype]
40-
40+
use_x0: bool
4141

4242
class ChromaRadiance(Chroma):
4343
"""
@@ -159,6 +159,9 @@ def __init__(self, image_model=None, final_layer=True, dtype=None, device=None,
159159
self.skip_dit = []
160160
self.lite = False
161161

162+
if params.use_x0:
163+
self.register_buffer("__x0__", torch.tensor([]))
164+
162165
@property
163166
def _nerf_final_layer(self) -> nn.Module:
164167
if self.params.nerf_final_head_type == "linear":
@@ -276,6 +279,12 @@ def radiance_get_override_params(self, overrides: dict) -> ChromaRadianceParams:
276279
params_dict |= overrides
277280
return params.__class__(**params_dict)
278281

282+
def _apply_x0_residual(self, predicted, noisy, timesteps):
283+
284+
# non zero during training to prevent 0 div
285+
eps = 0.0
286+
return (noisy - predicted) / (timesteps.view(-1,1,1,1) + eps)
287+
279288
def _forward(
280289
self,
281290
x: Tensor,
@@ -316,4 +325,11 @@ def _forward(
316325
transformer_options,
317326
attn_mask=kwargs.get("attention_mask", None),
318327
)
319-
return self.forward_nerf(img, img_out, params)[:, :, :h, :w]
328+
329+
out = self.forward_nerf(img, img_out, params)[:, :, :h, :w]
330+
331+
# If x0 variant → v-pred, just return this instead
332+
if hasattr(self, "__x0__"):
333+
out = self._apply_x0_residual(out, img, timestep)
334+
return out
335+

comfy/ldm/hunyuan_video/model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class HunyuanVideoParams:
4343
meanflow: bool
4444
use_cond_type_embedding: bool
4545
vision_in_dim: int
46+
meanflow_sum: bool
4647

4748

4849
class SelfAttentionRef(nn.Module):
@@ -317,7 +318,7 @@ def forward_orig(
317318
timesteps_r = transformer_options['sample_sigmas'][w[0] + 1]
318319
timesteps_r = timesteps_r.unsqueeze(0).to(device=timesteps.device, dtype=timesteps.dtype)
319320
vec_r = self.time_r_in(timestep_embedding(timesteps_r, 256, time_factor=1000.0).to(img.dtype))
320-
vec = (vec + vec_r) / 2
321+
vec = (vec + vec_r) if self.params.meanflow_sum else (vec + vec_r) / 2
321322

322323
if ref_latent is not None:
323324
ref_latent_ids = self.img_ids(ref_latent)

comfy/ldm/lumina/controlnet.py

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ def __init__(
4141
ffn_dim_multiplier: float = (8.0 / 3.0),
4242
norm_eps: float = 1e-5,
4343
qk_norm: bool = True,
44+
n_control_layers=6,
45+
control_in_dim=16,
46+
additional_in_dim=0,
47+
broken=False,
48+
refiner_control=False,
4449
dtype=None,
4550
device=None,
4651
operations=None,
@@ -49,10 +54,11 @@ def __init__(
4954
super().__init__()
5055
operation_settings = {"operations": operations, "device": device, "dtype": dtype}
5156

52-
self.additional_in_dim = 0
53-
self.control_in_dim = 16
57+
self.broken = broken
58+
self.additional_in_dim = additional_in_dim
59+
self.control_in_dim = control_in_dim
5460
n_refiner_layers = 2
55-
self.n_control_layers = 6
61+
self.n_control_layers = n_control_layers
5662
self.control_layers = nn.ModuleList(
5763
[
5864
ZImageControlTransformerBlock(
@@ -74,28 +80,49 @@ def __init__(
7480
all_x_embedder = {}
7581
patch_size = 2
7682
f_patch_size = 1
77-
x_embedder = operations.Linear(f_patch_size * patch_size * patch_size * self.control_in_dim, dim, bias=True, device=device, dtype=dtype)
83+
x_embedder = operations.Linear(f_patch_size * patch_size * patch_size * (self.control_in_dim + self.additional_in_dim), dim, bias=True, device=device, dtype=dtype)
7884
all_x_embedder[f"{patch_size}-{f_patch_size}"] = x_embedder
7985

86+
self.refiner_control = refiner_control
87+
8088
self.control_all_x_embedder = nn.ModuleDict(all_x_embedder)
81-
self.control_noise_refiner = nn.ModuleList(
82-
[
83-
JointTransformerBlock(
84-
layer_id,
85-
dim,
86-
n_heads,
87-
n_kv_heads,
88-
multiple_of,
89-
ffn_dim_multiplier,
90-
norm_eps,
91-
qk_norm,
92-
modulation=True,
93-
z_image_modulation=True,
94-
operation_settings=operation_settings,
95-
)
96-
for layer_id in range(n_refiner_layers)
97-
]
98-
)
89+
if self.refiner_control:
90+
self.control_noise_refiner = nn.ModuleList(
91+
[
92+
ZImageControlTransformerBlock(
93+
layer_id,
94+
dim,
95+
n_heads,
96+
n_kv_heads,
97+
multiple_of,
98+
ffn_dim_multiplier,
99+
norm_eps,
100+
qk_norm,
101+
block_id=layer_id,
102+
operation_settings=operation_settings,
103+
)
104+
for layer_id in range(n_refiner_layers)
105+
]
106+
)
107+
else:
108+
self.control_noise_refiner = nn.ModuleList(
109+
[
110+
JointTransformerBlock(
111+
layer_id,
112+
dim,
113+
n_heads,
114+
n_kv_heads,
115+
multiple_of,
116+
ffn_dim_multiplier,
117+
norm_eps,
118+
qk_norm,
119+
modulation=True,
120+
z_image_modulation=True,
121+
operation_settings=operation_settings,
122+
)
123+
for layer_id in range(n_refiner_layers)
124+
]
125+
)
99126

100127
def forward(self, cap_feats, control_context, x_freqs_cis, adaln_input):
101128
patch_size = 2
@@ -105,9 +132,29 @@ def forward(self, cap_feats, control_context, x_freqs_cis, adaln_input):
105132
control_context = self.control_all_x_embedder[f"{patch_size}-{f_patch_size}"](control_context.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
106133

107134
x_attn_mask = None
108-
for layer in self.control_noise_refiner:
109-
control_context = layer(control_context, x_attn_mask, x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input)
135+
if not self.refiner_control:
136+
for layer in self.control_noise_refiner:
137+
control_context = layer(control_context, x_attn_mask, x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input)
138+
110139
return control_context
111140

141+
def forward_noise_refiner_block(self, layer_id, control_context, x, x_attn_mask, x_freqs_cis, adaln_input):
142+
if self.refiner_control:
143+
if self.broken:
144+
if layer_id == 0:
145+
return self.control_layers[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
146+
if layer_id > 0:
147+
out = None
148+
for i in range(1, len(self.control_layers)):
149+
o, control_context = self.control_layers[i](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
150+
if out is None:
151+
out = o
152+
153+
return (out, control_context)
154+
else:
155+
return self.control_noise_refiner[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
156+
else:
157+
return (None, control_context)
158+
112159
def forward_control_block(self, layer_id, control_context, x, x_attn_mask, x_freqs_cis, adaln_input):
113160
return self.control_layers[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)

comfy/ldm/lumina/model.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ def patchify_and_embed(
536536
bsz = len(x)
537537
pH = pW = self.patch_size
538538
device = x[0].device
539+
orig_x = x
539540

540541
if self.pad_tokens_multiple is not None:
541542
pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
@@ -572,13 +573,21 @@ def patchify_and_embed(
572573

573574
freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)
574575

576+
patches = transformer_options.get("patches", {})
577+
575578
# refine context
576579
for layer in self.context_refiner:
577580
cap_feats = layer(cap_feats, cap_mask, freqs_cis[:, :cap_pos_ids.shape[1]], transformer_options=transformer_options)
578581

579582
padded_img_mask = None
580-
for layer in self.noise_refiner:
583+
x_input = x
584+
for i, layer in enumerate(self.noise_refiner):
581585
x = layer(x, padded_img_mask, freqs_cis[:, cap_pos_ids.shape[1]:], t, transformer_options=transformer_options)
586+
if "noise_refiner" in patches:
587+
for p in patches["noise_refiner"]:
588+
out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": freqs_cis[:, cap_pos_ids.shape[1]:], "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
589+
if "img" in out:
590+
x = out["img"]
582591

583592
padded_full_embed = torch.cat((cap_feats, x), dim=1)
584593
mask = None
@@ -622,14 +631,15 @@ def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, trans
622631

623632
patches = transformer_options.get("patches", {})
624633
x_is_tensor = isinstance(x, torch.Tensor)
625-
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, t, num_tokens, transformer_options=transformer_options)
634+
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
626635
freqs_cis = freqs_cis.to(img.device)
627636

637+
img_input = img
628638
for i, layer in enumerate(self.layers):
629639
img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
630640
if "double_block" in patches:
631641
for p in patches["double_block"]:
632-
out = p({"img": img[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
642+
out = p({"img": img[:, cap_size[0]:], "img_input": img_input[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
633643
if "img" in out:
634644
img[:, cap_size[0]:] = out["img"]
635645
if "txt" in out:

0 commit comments

Comments
 (0)