Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
254622d
init
yousef-rafat Sep 5, 2025
1cff9b8
Merge branch 'master' into yousef-higgsv2
yousef-rafat Sep 5, 2025
df4b6a2
removed test files
yousef-rafat Sep 5, 2025
6e9335d
Merge branch 'yousef-higgsv2' of https://github.com/yousef-rafat/Comf…
yousef-rafat Sep 5, 2025
57c15f9
styling fixes
yousef-rafat Sep 5, 2025
f8d4891
additional styling
yousef-rafat Sep 5, 2025
233e441
.
yousef-rafat Sep 5, 2025
6412422
bug fixes + added some features
yousef-rafat Sep 8, 2025
5191fb2
Merge branch 'master' into yousef-higgsv2
yousef-rafat Sep 9, 2025
2ac8999
final
yousef-rafat Sep 9, 2025
fee1e57
Merge branch 'yousef-higgsv2' of https://github.com/yousef-rafat/Comf…
yousef-rafat Sep 9, 2025
12824ea
init
yousef-rafat Sep 27, 2025
a480271
Delete comfy/autoregressive_sampling.py
yousef-rafat Sep 27, 2025
786c386
...
yousef-rafat Sep 27, 2025
1d24e63
Merge branch 'yousef-hunyuan-foley' of https://github.com/yousef-rafa…
yousef-rafat Sep 27, 2025
c951e8f
.
yousef-rafat Sep 27, 2025
73cdb32
.
yousef-rafat Sep 27, 2025
3773d0d
Merge branch 'master' into yousef-hunyuan-foley
yousef-rafat Sep 27, 2025
aaa3bcc
fixed a small bug
yousef-rafat Sep 27, 2025
f85e1cf
Merge branch 'yousef-hunyuan-foley' of https://github.com/yousef-rafa…
yousef-rafat Sep 27, 2025
8311b15
allowed returning frames
yousef-rafat Sep 27, 2025
2ceb9f0
added clap tokenizer
yousef-rafat Sep 28, 2025
a6dabd2
fixed clap location
yousef-rafat Sep 28, 2025
42a265c
fixed multiple errors in nodes and model loading
yousef-rafat Sep 29, 2025
ab01ace
removed additional code in video_types
yousef-rafat Sep 29, 2025
cc3a138
some fixes in model loading and nodes
yousef-rafat Sep 30, 2025
4241f10
clip vision base support + small fixes
yousef-rafat Oct 1, 2025
663d971
work on the conditioning
yousef-rafat Oct 3, 2025
4b6c081
large optimizations and some fixes
yousef-rafat Oct 4, 2025
95d2aae
syncformer fix + some fixes
yousef-rafat Oct 6, 2025
220c65d
fixed the syncform logic + condition-related fixes
yousef-rafat Oct 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions comfy/clip_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json")
elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
elif "vision_model.encoder.layers.11.layer_norm1.weight" in sd:
embed_shape = sd["vision_model.embeddings.position_embedding.weight"].shape[0]
if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
norm_weight = sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0]
if norm_weight == 1152:
if embed_shape == 729:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
elif embed_shape == 1024:
Expand All @@ -134,6 +135,8 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json")
else:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
elif embed_shape == 1024 and norm_weight == 768:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip2_base_512.json")
else:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")

Expand Down
14 changes: 14 additions & 0 deletions comfy/clip_vision_siglip2_base_512.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 768,
"image_size": 512,
"intermediate_size": 3072,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"patch_size": 16,
"image_mean": [0.5, 0.5, 0.5],
"image_std": [0.5, 0.5, 0.5]
}
4 changes: 4 additions & 0 deletions comfy/latent_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,10 @@ class ACEAudio(LatentFormat):
latent_channels = 8
latent_dimensions = 2

class HunyuanFoley(LatentFormat):
latent_dimensions = 128
latent_channels = 1024

class ChromaRadiance(LatentFormat):
latent_channels = 3

Expand Down
Loading
Loading