Skip to content

Commit d1d3c2a

Browse files
committed
apply reviewer feedback
1 parent b856e1f commit d1d3c2a

File tree

2 files changed

+29
-59
lines changed

2 files changed

+29
-59
lines changed

packages/tasks/src/tasks/image-to-video/about.md

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,38 @@ Expand on the narrative of an image by generating a short video that imagines wh
1414

1515
Use an input image as a strong visual anchor to guide the generation of a video, ensuring that the style, characters, or objects in the video remain consistent with the source image.
1616

17-
## Task Variants
17+
### Controllable Motion
1818

19-
Image-to-video models can have variants based on the specific type of transformation or control offered.
19+
Image-to-video models can be used to specify the direction or intensity of motion or camera control, giving more fine-grained control over the generated animation.
2020

21-
### Controllable Motion
21+
## Inference
2222

23-
Image-to-video models can be used to specify the direction or intensity of motion, giving more fine-grained control over the generated animation.
23+
Running the model Wan 2.1 T2V 1.3B with diffusers
2424

25-
### Loopable Videos
25+
```py
26+
import torch
27+
from diffusers import AutoencoderKLWan, WanPipeline
28+
from diffusers.utils import export_to_video
2629

27-
Models can be used to to create seamlessly looping videos, perfect for backgrounds or short, endlessly watchable clips.
30+
model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
31+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
32+
pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
33+
pipe.to("cuda")
2834

29-
## Inference
35+
prompt = "A cat walks on the grass, realistic"
36+
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
3037

31-
Contribute an inference snippet for image-to-video here!
38+
output = pipe(
39+
prompt=prompt,
40+
negative_prompt=negative_prompt,
41+
height=480,
42+
width=832,
43+
num_frames=81,
44+
guidance_scale=5.0
45+
).frames[0]
46+
export_to_video(output, "output.mp4", fps=15)
47+
```
3248

3349
## Useful Resources
3450

35-
In this area, you can insert useful resources about how to train or use a model for this task.
51+
To train image-to-video LoRAs check out [finetrainers](https://github.com/a-r-r-o-w/finetrainers) and [musubi trainer](https://github.com/kohya-ss/musubi-tuner).

packages/tasks/src/tasks/image-to-video/data.ts

Lines changed: 4 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@ import type { TaskDataCustom } from "../index.js";
33
const taskData: TaskDataCustom = {
44
datasets: [
55
{
6-
description: "A dataset of images and short video clips for image-to-video generation research.",
7-
id: "some/image-to-video-dataset",
8-
},
9-
{
10-
description: "A benchmark dataset for reference-based video generation.",
6+
description: "A benchmark dataset for reference image controlled video generation.",
117
id: "ali-vilab/VACE-Benchmark",
128
},
139
{
@@ -71,12 +67,8 @@ const taskData: TaskDataCustom = {
7167
id: "Lightricks/LTX-Video-0.9.7-dev",
7268
},
7369
{
74-
description: "A 1.3B parameter model for reference-based video generation",
75-
id: "Wan-AI/Wan2.1-VACE-1.3B",
76-
},
77-
{
78-
description: "An image-to-video generation model using FramePack methodology with Hunyuan-DiT architecture.",
79-
id: "lllyasviel/FramePackI2V_HY",
70+
description: "A 14B parameter model for reference image controlled video generation",
71+
id: "Wan-AI/Wan2.1-VACE-14B",
8072
},
8173
{
8274
description: "An image-to-video generation model using FramePack F1 methodology with Hunyuan-DiT architecture",
@@ -86,30 +78,14 @@ const taskData: TaskDataCustom = {
8678
description: "A distilled version of the LTX-Video-0.9.7-dev model for faster inference",
8779
id: "Lightricks/LTX-Video-0.9.7-distilled",
8880
},
89-
{
90-
description: "An image-to-video generation model by Skywork AI, 1.3B parameters, producing 540p videos.",
91-
id: "Skywork/SkyReels-V2-I2V-1.3B-540P",
92-
},
9381
{
9482
description: "An image-to-video generation model by Skywork AI, 14B parameters, producing 720p videos.",
9583
id: "Skywork/SkyReels-V2-I2V-14B-720P",
9684
},
9785
{
98-
description: "An image-to-video generation model by Skywork AI, 14B parameters, producing 540p videos.",
99-
id: "Skywork/SkyReels-V2-I2V-14B-540P",
100-
},
101-
{
102-
description: "Diffusers version of Hunyuan-DiT for image-to-video generation.",
103-
id: "hunyuanvideo-community/HunyuanVideo-I2V",
104-
},
105-
{
106-
description: "Tencent's Hunyuan-DiT model for image-to-video generation.",
86+
description: "Image-to-video variant of Tencent's HunyuanVideo.",
10787
id: "tencent/HunyuanVideo-I2V",
10888
},
109-
{
110-
description: "A 14B parameter model for 480p image-to-video generation by Wan-AI.",
111-
id: "Wan-AI/Wan2.1-I2V-14B-480P",
112-
},
11389
{
11490
description: "A 14B parameter model for 720p image-to-video generation by Wan-AI.",
11591
id: "Wan-AI/Wan2.1-I2V-14B-720P",
@@ -118,28 +94,6 @@ const taskData: TaskDataCustom = {
11894
description: "A Diffusers version of the Wan2.1-I2V-14B-720P model for 720p image-to-video generation.",
11995
id: "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers",
12096
},
121-
{
122-
description:
123-
"An image-to-video model that generates videos from frame-level features, producing 720p videos.",
124-
id: "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers",
125-
},
126-
{
127-
description: "A Diffusers version of the Wan2.1-I2V-14B-480P model for 480p image-to-video generation.",
128-
id: "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers",
129-
},
130-
131-
{
132-
description: "A video generation model based on LTX-Video-0.9, evaluated on the VACE benchmark.",
133-
id: "ali-vilab/VACE-LTX-Video-0.9",
134-
},
135-
{
136-
description: "An image-to-video model by Stability AI for generating short videos from images.",
137-
id: "stabilityai/stable-video-diffusion-img2vid",
138-
},
139-
{
140-
description: "A 5 billion parameter model for image-to-video generation by THUDM.",
141-
id: "THUDM/CogVideoX-5b-I2V",
142-
},
14397
],
14498
spaces: [
14599
{

0 commit comments

Comments
 (0)