|
99 | 99 | "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale", |
100 | 100 | ], |
101 | 101 | "ltx-video": [ |
102 | | - ( |
103 | | - "model.diffusion_model.patchify_proj.weight", |
104 | | - "model.diffusion_model.transformer_blocks.27.scale_shift_table", |
105 | | - ), |
| 102 | + "model.diffusion_model.patchify_proj.weight", |
| 103 | + "model.diffusion_model.transformer_blocks.27.scale_shift_table", |
| 104 | + "patchify_proj.weight", |
| 105 | + "transformer_blocks.27.scale_shift_table", |
| 106 | + "vae.per_channel_statistics.mean-of-means", |
106 | 107 | ], |
107 | 108 | "autoencoder-dc": "decoder.stages.1.op_list.0.main.conv.conv.bias", |
108 | 109 | "autoencoder-dc-sana": "encoder.project_in.conv.bias", |
@@ -599,7 +600,7 @@ def infer_diffusers_model_type(checkpoint): |
599 | 600 | else: |
600 | 601 | model_type = "flux-schnell" |
601 | 602 |
|
602 | | - elif any(all(key in checkpoint for key in key_list) for key_list in CHECKPOINT_KEY_NAMES["ltx-video"]): |
| 603 | + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["ltx-video"]): |
603 | 604 | model_type = "ltx-video" |
604 | 605 |
|
605 | 606 | elif CHECKPOINT_KEY_NAMES["autoencoder-dc"] in checkpoint: |
@@ -2255,9 +2256,7 @@ def swap_scale_shift(weight): |
2255 | 2256 |
|
2256 | 2257 |
|
2257 | 2258 | def convert_ltx_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): |
2258 | | - converted_state_dict = { |
2259 | | - key: checkpoint.pop(key) for key in list(checkpoint.keys()) if "model.diffusion_model." in key |
2260 | | - } |
| 2259 | + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys()) if "vae" not in key} |
2261 | 2260 |
|
2262 | 2261 | TRANSFORMER_KEYS_RENAME_DICT = { |
2263 | 2262 | "model.diffusion_model.": "", |
|
0 commit comments