Skip to content

Commit 3df95b2

Browse files
committed
remove image encoder from conversion script
1 parent c1f7a80 commit 3df95b2

File tree

1 file changed

+11
-12
lines changed

1 file changed

+11
-12
lines changed

scripts/convert_cogvideox_to_diffusers.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def get_args():
241241
if args.vae_ckpt_path is not None:
242242
vae = convert_vae(args.vae_ckpt_path, args.scaling_factor, dtype)
243243

244-
text_encoder_id = "/share/official_pretrains/hf_home/t5-v1_1-xxl"
244+
text_encoder_id = "google/t5-v1_1-xxl"
245245
tokenizer = T5Tokenizer.from_pretrained(text_encoder_id, model_max_length=TOKENIZER_MAX_LENGTH)
246246
text_encoder = T5EncoderModel.from_pretrained(text_encoder_id, cache_dir=args.text_encoder_cache_dir)
247247
# Apparently, the conversion does not work any more without this :shrug:
@@ -263,18 +263,17 @@ def get_args():
263263
}
264264
)
265265
if args.i2v:
266-
pipe = CogVideoXImageToVideoPipeline(
267-
tokenizer=tokenizer,
268-
text_encoder=text_encoder,
269-
image_encoder=vae,
270-
vae=vae,
271-
transformer=transformer,
272-
scheduler=scheduler,
273-
)
266+
pipeline_cls = CogVideoXImageToVideoPipeline
274267
else:
275-
pipe = CogVideoXPipeline(
276-
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
277-
)
268+
pipeline_cls = CogVideoXPipeline
269+
270+
pipe = pipeline_cls(
271+
tokenizer=tokenizer,
272+
text_encoder=text_encoder,
273+
vae=vae,
274+
transformer=transformer,
275+
scheduler=scheduler,
276+
)
278277

279278
if args.fp16:
280279
pipe = pipe.to(dtype=torch.float16)

0 commit comments

Comments
 (0)