|
3 | 3 |
|
4 | 4 | import torch |
5 | 5 | from transformers import T5EncoderModel, T5Tokenizer |
6 | | -from diffusers import AutoencoderKL, CogVideoXDDIMScheduler |
| 6 | +from diffusers import AutoencoderKL, DDPMScheduler |
7 | 7 | from diffusers.loaders.single_file_utils import convert_ldm_vae_checkpoint |
8 | 8 |
|
9 | 9 | from diffusers import ( |
@@ -185,21 +185,17 @@ def get_args(): |
185 | 185 | if args.vae_ckpt_path is not None: |
186 | 186 | vae = convert_vae(args.vae_ckpt_path, args.scaling_factor, dtype) |
187 | 187 |
|
188 | | - text_encoder_id = "/share/official_pretrains/hf_home/t5-v1_1-xxl" |
| 188 | + text_encoder_id = "google/t5-v1_1-xxl" |
189 | 189 | tokenizer = T5Tokenizer.from_pretrained(text_encoder_id, model_max_length=TOKENIZER_MAX_LENGTH) |
190 | 190 | text_encoder = T5EncoderModel.from_pretrained(text_encoder_id, cache_dir=args.text_encoder_cache_dir) |
191 | 191 |
|
192 | | - scheduler = CogVideoXDDIMScheduler.from_config( |
| 192 | + scheduler = DDPMScheduler.from_config( |
193 | 193 | { |
194 | | - "beta_end": 0.012, |
195 | | - "beta_schedule": "scaled_linear", |
196 | | - "beta_start": 0.00085, |
197 | | - "clip_sample": False, |
198 | | - "num_train_timesteps": 1000, |
199 | | - "prediction_type": "v_prediction", |
200 | | - "rescale_betas_zero_snr": True, |
201 | | - "set_alpha_to_one": True, |
202 | | - "timestep_spacing": "trailing", |
| 194 | + "num_train_timesteps": 50, |
| 195 | + "beta_start": 0.0001, |
| 196 | + "beta_end": 0.02, |
| 197 | + "beta_schedule": "linear", |
| 198 | + "prediction_type": "v_prediction" |
203 | 199 | } |
204 | 200 | ) |
205 | 201 |
|
|
0 commit comments