@@ -152,9 +152,19 @@ def __init__(
152152
153153        # 1. Latent and condition embedders 
154154        self .x_embedder  =  HunyuanVideoPatchEmbed ((patch_size_t , patch_size , patch_size ), in_channels , inner_dim )
155+ 
156+         # Framepack history projection embedder 
157+         self .clean_x_embedder  =  None 
158+         if  has_clean_x_embedder :
159+             self .clean_x_embedder  =  HunyuanVideoHistoryPatchEmbed (in_channels , inner_dim )
160+ 
155161        self .context_embedder  =  HunyuanVideoTokenRefiner (
156162            text_embed_dim , num_attention_heads , attention_head_dim , num_layers = num_refiner_layers 
157163        )
164+ 
165+         # Framepack image-conditioning embedder 
166+         self .image_projection  =  FramepackClipVisionProjection (image_proj_dim , inner_dim ) if  has_image_proj  else  None 
167+ 
158168        self .time_text_embed  =  HunyuanVideoConditionEmbedding (
159169            inner_dim , pooled_projection_dim , guidance_embeds , image_condition_type 
160170        )
@@ -186,13 +196,6 @@ def __init__(
186196        self .norm_out  =  AdaLayerNormContinuous (inner_dim , inner_dim , elementwise_affine = False , eps = 1e-6 )
187197        self .proj_out  =  nn .Linear (inner_dim , patch_size_t  *  patch_size  *  patch_size  *  out_channels )
188198
189-         # Framepack specific modules 
190-         self .image_projection  =  FramepackClipVisionProjection (image_proj_dim , inner_dim ) if  has_image_proj  else  None 
191- 
192-         self .clean_x_embedder  =  None 
193-         if  has_clean_x_embedder :
194-             self .clean_x_embedder  =  HunyuanVideoHistoryPatchEmbed (in_channels , inner_dim )
195- 
196199        self .use_gradient_checkpointing  =  False 
197200
198201    def  forward (
0 commit comments