2121
2222from  ...callbacks  import  MultiPipelineCallbacks , PipelineCallback 
2323from  ...loaders  import  Mochi1LoraLoaderMixin 
24- from  ...models .autoencoders  import  AutoencoderKL 
24+ from  ...models .autoencoders  import  AutoencoderKLMochi 
2525from  ...models .transformers  import  MochiTransformer3DModel 
2626from  ...schedulers  import  FlowMatchEulerDiscreteScheduler 
2727from  ...utils  import  (
@@ -151,8 +151,8 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
151151            Conditional Transformer architecture to denoise the encoded video latents. 
152152        scheduler ([`FlowMatchEulerDiscreteScheduler`]): 
153153            A scheduler to be used in combination with `transformer` to denoise the encoded image latents. 
154-         vae ([`AutoencoderKL `]): 
155-             Variational Auto-Encoder (VAE) Model to encode and decode images  to and from latent representations. 
154+         vae ([`AutoencoderKLMochi `]): 
155+             Variational Auto-Encoder (VAE) Model to encode and decode videos  to and from latent representations. 
156156        text_encoder ([`T5EncoderModel`]): 
157157            [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically 
158158            the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant. 
@@ -171,7 +171,7 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
171171    def  __init__ (
172172        self ,
173173        scheduler : FlowMatchEulerDiscreteScheduler ,
174-         vae : AutoencoderKL ,
174+         vae : AutoencoderKLMochi ,
175175        text_encoder : T5EncoderModel ,
176176        tokenizer : T5TokenizerFast ,
177177        transformer : MochiTransformer3DModel ,
0 commit comments