|  | 
| 24 | 24 | 
 | 
| 25 | 25 | ## Generating Videos with Wan 2.1 | 
| 26 | 26 | 
 | 
| 27 |  | -We will first need to install some addtional dependencies. | 
|  | 27 | +We will first need to install some additional dependencies. | 
| 28 | 28 | 
 | 
| 29 | 29 | ```shell | 
| 30 | 30 | pip install -u ftfy imageio-ffmpeg imageio | 
| @@ -133,6 +133,60 @@ output = pipe( | 
| 133 | 133 | export_to_video(output, "wan-i2v.mp4", fps=16) | 
| 134 | 134 | ``` | 
| 135 | 135 | 
 | 
|  | 136 | +### First and Last Frame Interpolation | 
|  | 137 | + | 
|  | 138 | +```python | 
|  | 139 | +import numpy as np | 
|  | 140 | +import torch | 
|  | 141 | +import torchvision.transforms.functional as TF | 
|  | 142 | +from diffusers import AutoencoderKLWan, WanImageToVideoPipeline | 
|  | 143 | +from diffusers.utils import export_to_video, load_image | 
|  | 144 | +from transformers import CLIPVisionModel | 
|  | 145 | + | 
|  | 146 | + | 
|  | 147 | +model_id = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers" | 
|  | 148 | +image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32) | 
|  | 149 | +vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) | 
|  | 150 | +pipe = WanImageToVideoPipeline.from_pretrained( | 
|  | 151 | +    model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16 | 
|  | 152 | +) | 
|  | 153 | +pipe.to("cuda") | 
|  | 154 | + | 
|  | 155 | +first_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png") | 
|  | 156 | +last_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png") | 
|  | 157 | + | 
|  | 158 | +def aspect_ratio_resize(image, pipe, max_area=720 * 1280): | 
|  | 159 | +    aspect_ratio = image.height / image.width | 
|  | 160 | +    mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1] | 
|  | 161 | +    height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value | 
|  | 162 | +    width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value | 
|  | 163 | +    image = image.resize((width, height)) | 
|  | 164 | +    return image, height, width | 
|  | 165 | + | 
|  | 166 | +def center_crop_resize(image, height, width): | 
|  | 167 | +    # Calculate resize ratio to match first frame dimensions | 
|  | 168 | +    resize_ratio = max(width / image.width, height / image.height) | 
|  | 169 | +     | 
|  | 170 | +    # Resize the image | 
|  | 171 | +    width = round(image.width * resize_ratio) | 
|  | 172 | +    height = round(image.height * resize_ratio) | 
|  | 173 | +    size = [width, height] | 
|  | 174 | +    image = TF.center_crop(image, size) | 
|  | 175 | +     | 
|  | 176 | +    return image, height, width | 
|  | 177 | + | 
|  | 178 | +first_frame, height, width = aspect_ratio_resize(first_frame, pipe) | 
|  | 179 | +if last_frame.size != first_frame.size: | 
|  | 180 | +    last_frame, _, _ = center_crop_resize(last_frame, height, width) | 
|  | 181 | + | 
|  | 182 | +prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective." | 
|  | 183 | + | 
|  | 184 | +output = pipe( | 
|  | 185 | +    image=first_frame, last_image=last_frame, prompt=prompt, height=height, width=width, guidance_scale=5.5 | 
|  | 186 | +).frames[0] | 
|  | 187 | +export_to_video(output, "output.mp4", fps=16) | 
|  | 188 | +``` | 
|  | 189 | + | 
| 136 | 190 | ### Video to Video Generation | 
| 137 | 191 | 
 | 
| 138 | 192 | ```python | 
| @@ -231,7 +285,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( | 
| 231 | 285 |     image_encoder=image_encoder, | 
| 232 | 286 |     torch_dtype=torch.bfloat16 | 
| 233 | 287 | ) | 
| 234 |  | -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU | 
|  | 288 | +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU | 
| 235 | 289 | pipe.to("cuda") | 
| 236 | 290 | 
 | 
| 237 | 291 | image = load_image( | 
| @@ -314,7 +368,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( | 
| 314 | 368 |     image_encoder=image_encoder, | 
| 315 | 369 |     torch_dtype=torch.bfloat16 | 
| 316 | 370 | ) | 
| 317 |  | -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU | 
|  | 371 | +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU | 
| 318 | 372 | pipe.to("cuda") | 
| 319 | 373 | 
 | 
| 320 | 374 | image = load_image( | 
|  | 
0 commit comments