|
24 | 24 |
|
25 | 25 | ## Generating Videos with Wan 2.1 |
26 | 26 |
|
27 | | -We will first need to install some addtional dependencies. |
| 27 | +We will first need to install some additional dependencies. |
28 | 28 |
|
29 | 29 | ```shell |
30 | 30 | pip install -u ftfy imageio-ffmpeg imageio |
@@ -133,6 +133,60 @@ output = pipe( |
133 | 133 | export_to_video(output, "wan-i2v.mp4", fps=16) |
134 | 134 | ``` |
135 | 135 |
|
| 136 | +### First and Last Frame Interpolation |
| 137 | + |
| 138 | +```python |
| 139 | +import numpy as np |
| 140 | +import torch |
| 141 | +import torchvision.transforms.functional as TF |
| 142 | +from diffusers import AutoencoderKLWan, WanImageToVideoPipeline |
| 143 | +from diffusers.utils import export_to_video, load_image |
| 144 | +from transformers import CLIPVisionModel |
| 145 | + |
| 146 | + |
| 147 | +model_id = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers" |
| 148 | +image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32) |
| 149 | +vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) |
| 150 | +pipe = WanImageToVideoPipeline.from_pretrained( |
| 151 | + model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16 |
| 152 | +) |
| 153 | +pipe.to("cuda") |
| 154 | + |
| 155 | +first_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png") |
| 156 | +last_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png") |
| 157 | + |
| 158 | +def aspect_ratio_resize(image, pipe, max_area=720 * 1280): |
| 159 | + aspect_ratio = image.height / image.width |
| 160 | + mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1] |
| 161 | + height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value |
| 162 | + width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value |
| 163 | + image = image.resize((width, height)) |
| 164 | + return image, height, width |
| 165 | + |
| 166 | +def center_crop_resize(image, height, width): |
| 167 | + # Calculate resize ratio to match first frame dimensions |
| 168 | + resize_ratio = max(width / image.width, height / image.height) |
| 169 | + |
| 170 | + # Resize the image |
| 171 | + width = round(image.width * resize_ratio) |
| 172 | + height = round(image.height * resize_ratio) |
| 173 | + size = [width, height] |
| 174 | + image = TF.center_crop(image, size) |
| 175 | + |
| 176 | + return image, height, width |
| 177 | + |
| 178 | +first_frame, height, width = aspect_ratio_resize(first_frame, pipe) |
| 179 | +if last_frame.size != first_frame.size: |
| 180 | + last_frame, _, _ = center_crop_resize(last_frame, height, width) |
| 181 | + |
| 182 | +prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective." |
| 183 | + |
| 184 | +output = pipe( |
| 185 | + image=first_frame, last_image=last_frame, prompt=prompt, height=height, width=width, guidance_scale=5.5 |
| 186 | +).frames[0] |
| 187 | +export_to_video(output, "output.mp4", fps=16) |
| 188 | +``` |
| 189 | + |
136 | 190 | ### Video to Video Generation |
137 | 191 |
|
138 | 192 | ```python |
@@ -231,7 +285,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( |
231 | 285 | image_encoder=image_encoder, |
232 | 286 | torch_dtype=torch.bfloat16 |
233 | 287 | ) |
234 | | -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU |
| 288 | +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU |
235 | 289 | pipe.to("cuda") |
236 | 290 |
|
237 | 291 | image = load_image( |
@@ -314,7 +368,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( |
314 | 368 | image_encoder=image_encoder, |
315 | 369 | torch_dtype=torch.bfloat16 |
316 | 370 | ) |
317 | | -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU |
| 371 | +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU |
318 | 372 | pipe.to("cuda") |
319 | 373 |
|
320 | 374 | image = load_image( |
|
0 commit comments