@@ -241,7 +241,7 @@ def get_args():
241241 if args .vae_ckpt_path is not None :
242242 vae = convert_vae (args .vae_ckpt_path , args .scaling_factor , dtype )
243243
244- text_encoder_id = "/share/official_pretrains/hf_home /t5-v1_1-xxl"
244+ text_encoder_id = "google /t5-v1_1-xxl"
245245 tokenizer = T5Tokenizer .from_pretrained (text_encoder_id , model_max_length = TOKENIZER_MAX_LENGTH )
246246 text_encoder = T5EncoderModel .from_pretrained (text_encoder_id , cache_dir = args .text_encoder_cache_dir )
247247 # Apparently, the conversion does not work any more without this :shrug:
@@ -263,18 +263,17 @@ def get_args():
263263 }
264264 )
265265 if args .i2v :
266- pipe = CogVideoXImageToVideoPipeline (
267- tokenizer = tokenizer ,
268- text_encoder = text_encoder ,
269- image_encoder = vae ,
270- vae = vae ,
271- transformer = transformer ,
272- scheduler = scheduler ,
273- )
266+ pipeline_cls = CogVideoXImageToVideoPipeline
274267 else :
275- pipe = CogVideoXPipeline (
276- tokenizer = tokenizer , text_encoder = text_encoder , vae = vae , transformer = transformer , scheduler = scheduler
277- )
268+ pipeline_cls = CogVideoXPipeline
269+
270+ pipe = pipeline_cls (
271+ tokenizer = tokenizer ,
272+ text_encoder = text_encoder ,
273+ vae = vae ,
274+ transformer = transformer ,
275+ scheduler = scheduler ,
276+ )
278277
279278 if args .fp16 :
280279 pipe = pipe .to (dtype = torch .float16 )
0 commit comments