apply reviewer feedback

apolinario · apolinario · commit d1d3c2aef810 · 2025-05-21T21:36:40.000+01:00
diff --git a/packages/tasks/src/tasks/image-to-video/about.md b/packages/tasks/src/tasks/image-to-video/about.md
@@ -14,22 +14,38 @@ Expand on the narrative of an image by generating a short video that imagines wh
 
 Use an input image as a strong visual anchor to guide the generation of a video, ensuring that the style, characters, or objects in the video remain consistent with the source image.
 
-## Task Variants
+### Controllable Motion
 
-Image-to-video models can have variants based on the specific type of transformation or control offered.
+Image-to-video models can be used to specify the direction or intensity of motion or camera control, giving more fine-grained control over the generated animation.
 
-### Controllable Motion
+## Inference
 
-Image-to-video models can be used to specify the direction or intensity of motion, giving more fine-grained control over the generated animation.
+Running the model Wan 2.1 T2V 1.3B with diffusers
 
-### Loopable Videos
+```py
+import torch
+from diffusers import AutoencoderKLWan, WanPipeline
+from diffusers.utils import export_to_video
 
-Models can be used to to create seamlessly looping videos, perfect for backgrounds or short, endlessly watchable clips.
+model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+pipe.to("cuda")
 
-## Inference
+prompt = "A cat walks on the grass, realistic"
+negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
 
-Contribute an inference snippet for image-to-video here!
+output = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    height=480,
+    width=832,
+    num_frames=81,
+    guidance_scale=5.0
+).frames[0]
+export_to_video(output, "output.mp4", fps=15)
+```
 
 ## Useful Resources
 
-In this area, you can insert useful resources about how to train or use a model for this task.
+To train image-to-video LoRAs check out [finetrainers](https://github.com/a-r-r-o-w/finetrainers) and [musubi trainer](https://github.com/kohya-ss/musubi-tuner).
diff --git a/packages/tasks/src/tasks/image-to-video/data.ts b/packages/tasks/src/tasks/image-to-video/data.ts
@@ -3,11 +3,7 @@ import type { TaskDataCustom } from "../index.js";
 const taskData: TaskDataCustom = {
 	datasets: [
 		{
-			description: "A dataset of images and short video clips for image-to-video generation research.",
-			id: "some/image-to-video-dataset",
-		},
-		{
-			description: "A benchmark dataset for reference-based video generation.",
+			description: "A benchmark dataset for reference image controlled video generation.",
 			id: "ali-vilab/VACE-Benchmark",
 		},
 		{
@@ -71,12 +67,8 @@ const taskData: TaskDataCustom = {
 			id: "Lightricks/LTX-Video-0.9.7-dev",
 		},
 		{
-			description: "A 1.3B parameter model for reference-based video generation",
-			id: "Wan-AI/Wan2.1-VACE-1.3B",
-		},
-		{
-			description: "An image-to-video generation model using FramePack methodology with Hunyuan-DiT architecture.",
-			id: "lllyasviel/FramePackI2V_HY",
+			description: "A 14B parameter model for reference image controlled video generation",
+			id: "Wan-AI/Wan2.1-VACE-14B",
 		},
 		{
 			description: "An image-to-video generation model using FramePack F1 methodology with Hunyuan-DiT architecture",
@@ -86,30 +78,14 @@ const taskData: TaskDataCustom = {
 			description: "A distilled version of the LTX-Video-0.9.7-dev model for faster inference",
 			id: "Lightricks/LTX-Video-0.9.7-distilled",
 		},
-		{
-			description: "An image-to-video generation model by Skywork AI, 1.3B parameters, producing 540p videos.",
-			id: "Skywork/SkyReels-V2-I2V-1.3B-540P",
-		},
 		{
 			description: "An image-to-video generation model by Skywork AI, 14B parameters, producing 720p videos.",
 			id: "Skywork/SkyReels-V2-I2V-14B-720P",
 		},
 		{
-			description: "An image-to-video generation model by Skywork AI, 14B parameters, producing 540p videos.",
-			id: "Skywork/SkyReels-V2-I2V-14B-540P",
-		},
-		{
-			description: "Diffusers version of Hunyuan-DiT for image-to-video generation.",
-			id: "hunyuanvideo-community/HunyuanVideo-I2V",
-		},
-		{
-			description: "Tencent's Hunyuan-DiT model for image-to-video generation.",
+			description: "Image-to-video variant of Tencent's HunyuanVideo.",
 			id: "tencent/HunyuanVideo-I2V",
 		},
-		{
-			description: "A 14B parameter model for 480p image-to-video generation by Wan-AI.",
-			id: "Wan-AI/Wan2.1-I2V-14B-480P",
-		},
 		{
 			description: "A 14B parameter model for 720p image-to-video generation by Wan-AI.",
 			id: "Wan-AI/Wan2.1-I2V-14B-720P",
@@ -118,28 +94,6 @@ const taskData: TaskDataCustom = {
 			description: "A Diffusers version of the Wan2.1-I2V-14B-720P model for 720p image-to-video generation.",
 			id: "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers",
 		},
-		{
-			description:
-				"An image-to-video model that generates videos from frame-level features, producing 720p videos.",
-			id: "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers",
-		},
-		{
-			description: "A Diffusers version of the Wan2.1-I2V-14B-480P model for 480p image-to-video generation.",
-			id: "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers",
-		},
-
-		{
-			description: "A video generation model based on LTX-Video-0.9, evaluated on the VACE benchmark.",
-			id: "ali-vilab/VACE-LTX-Video-0.9",
-		},
-		{
-			description: "An image-to-video model by Stability AI for generating short videos from images.",
-			id: "stabilityai/stable-video-diffusion-img2vid",
-		},
-		{
-			description: "A 5 billion parameter model for image-to-video generation by THUDM.",
-			id: "THUDM/CogVideoX-5b-I2V",
-		},
 	],
 	spaces: [
 		{