@@ -152,9 +152,19 @@ def __init__(
152152
153153 # 1. Latent and condition embedders
154154 self .x_embedder = HunyuanVideoPatchEmbed ((patch_size_t , patch_size , patch_size ), in_channels , inner_dim )
155+
156+ # Framepack history projection embedder
157+ self .clean_x_embedder = None
158+ if has_clean_x_embedder :
159+ self .clean_x_embedder = HunyuanVideoHistoryPatchEmbed (in_channels , inner_dim )
160+
155161 self .context_embedder = HunyuanVideoTokenRefiner (
156162 text_embed_dim , num_attention_heads , attention_head_dim , num_layers = num_refiner_layers
157163 )
164+
165+ # Framepack image-conditioning embedder
166+ self .image_projection = FramepackClipVisionProjection (image_proj_dim , inner_dim ) if has_image_proj else None
167+
158168 self .time_text_embed = HunyuanVideoConditionEmbedding (
159169 inner_dim , pooled_projection_dim , guidance_embeds , image_condition_type
160170 )
@@ -186,13 +196,6 @@ def __init__(
186196 self .norm_out = AdaLayerNormContinuous (inner_dim , inner_dim , elementwise_affine = False , eps = 1e-6 )
187197 self .proj_out = nn .Linear (inner_dim , patch_size_t * patch_size * patch_size * out_channels )
188198
189- # Framepack specific modules
190- self .image_projection = FramepackClipVisionProjection (image_proj_dim , inner_dim ) if has_image_proj else None
191-
192- self .clean_x_embedder = None
193- if has_clean_x_embedder :
194- self .clean_x_embedder = HunyuanVideoHistoryPatchEmbed (in_channels , inner_dim )
195-
196199 self .gradient_checkpointing = False
197200
198201 def forward (
0 commit comments