huggingface · MrShahzebKhoso · Sep 13, 2025 · Sep 13, 2025 · Sep 27, 2025 · Sep 27, 2025
@@ -0,0 +1,159 @@
+## Use Cases
+
+### Video Style Transfer
+Apply artistic or cinematic styles to a video while preserving motion and structure. For example, convert real footage into anime, painting, or film-like visuals. 
+
+### Frame Interpolation
+Generate intermediate frames to make videos smoother or convert 30 FPS videos to 60 FPS. This improves motion flow and enables realistic slow-motion playback.  
+
+### Video Super-Resolution
+Enhance low-resolution videos into high-definition outputs with preserved detail and sharpness. Ideal for restoring old footage or improving video quality.  
+
+### Motion Transfer
+Transfer the motion from a source video to another subject while maintaining identity and environment. This enables realistic animation or gesture replication.  
+
+### Video Editing & Synthesis
+Add, remove, or modify objects in videos while keeping lighting and motion consistent. Perfect for visual effects, object replacement, and content-aware editing. 
+
+### Temporal Modification
+Change a video’s overall time or environmental conditions, such as day to night or summer to winter. These models preserve motion dynamics and lighting continuity.
+
+### Virtual Try-on
+Simulate clothing changes or outfit fitting in videos while keeping the person’s motion and identity intact. Useful for digital fashion and e-commerce applications.
+
+
+## Inference
+We will show a few examples for different use cases.
+
+Below is an example demonstrating how to use Lucy-Edit-Dev to perform video costume editing — changing a character’s clothing while maintaining identity and motion consistency.
+```python
+
+import torch
+from PIL import Image
+
+from diffusers import AutoencoderKLWan, LucyEditPipeline
+from diffusers.utils import export_to_video, load_video
+
+
+url = "https://d2drjpuinn46lb.cloudfront.net/painter_original_edit.mp4"
+prompt = "Change the apron and blouse to a classic clown costume: satin polka-dot jumpsuit in bright primary colors, ruffled white collar, oversized pom-pom buttons, white gloves, oversized red shoes, red foam nose; soft window light from left, eye-level medium shot, natural folds and fabric highlights."
+negative_prompt = ""
+num_frames = 81
+height = 480
+width = 832
+
+def convert_video(video: List[Image.Image]) -> List[Image.Image]:
+    video = load_video(url)[:num_frames]
+    video = [video[i].resize((width, height)) for i in range(num_frames)]
+    return video
+
+video = load_video(url, convert_method=convert_video)
+
+model_id = "decart-ai/Lucy-Edit-Dev"
+vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+
+output = pipe(
+    prompt=prompt,
+    video=video,
+    negative_prompt=negative_prompt,
+    height=480,
+    width=832,
+    num_frames=81,
+    guidance_scale=5.0
+).frames[0]
+
+export_to_video(output, "output.mp4", fps=24)
+
+```
+
+
+Below is an example demonstrating how to use LTX-Video for cinematic style transfer — transforming the look and feel of an input video while preserving its motion and structure.
+
+```python
+import torch
+from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
+from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
+from diffusers.utils import export_to_video, load_image
+
+base_model_id = "Lightricks/LTX-Video-0.9.7-distilled" 
+
+pipe = LTXConditionPipeline.from_pretrained(base_model_id, torch_dtype=torch.bfloat16)
+pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained(
+    "Lightricks/ltxv-spatial-upscaler-0.9.7",
+    vae=pipe.vae, 
+    torch_dtype=torch.bfloat16
+)
+pipe.to("cuda")
+pipe_upsample.to("cuda")
+
+def round_to_nearest_resolution_acceptable_by_vae(height, width):
+    height = height - (height % pipe.vae_temporal_compression_ratio)
+    width = width - (width % pipe.vae_temporal_compression_ratio)
+    return height, width
+
+video = load_video(
+    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input-vid.mp4"
+)[:21]  
+condition1 = LTXVideoCondition(video=video, frame_index=0)
+
+prompt = "The video depicts a winding mountain road covered in snow, with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the solitude and beauty of a winter drive through a mountainous region."
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+expected_height, expected_width = 768, 1152
+downscale_factor = 2 / 3
+num_frames = 161
+
+downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
+downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
+
+latents = pipe(
+    conditions=[condition1],
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=downscaled_width,
+    height=downscaled_height,
+    num_frames=num_frames,
+    num_inference_steps=30,
+    generator=torch.Generator().manual_seed(0),
+    output_type="latent",
+).frames
+
+upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
+upscaled_latents = pipe_upsample(
+    latents=latents,
+    output_type="latent"
+).frames
+
+video = pipe(
+    conditions=[condition1],
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=upscaled_width,
+    height=upscaled_height,
+    num_frames=num_frames,
+    denoise_strength=0.4,  
+    num_inference_steps=10,
+    latents=upscaled_latents,
+    decode_timestep=0.05,
+    image_cond_noise_scale=0.025,
+    generator=torch.Generator().manual_seed(0),
+    output_type="pil",
+).frames[0]
+
+video = [frame.resize((expected_width, expected_height)) for frame in video]
+
+export_to_video(video, "output.mp4", fps=24)
+```
+
+## Useful Resources
+
+### Repositories 
+- [Lumen](https://github.com/Kunbyte-AI/Lumen) - Official implementation of Lumen for text-guided video editing.
+- [VIRES](https://github.com/suimuc/VIRES) - Implementation for sketch- and text-guided video instance repainting.
+- [ECCV2022-RIFE: Video Frame Interpolation](https://github.com/hzwer/ECCV2022-RIFE)- Real-time video frame interpolation via intermediate flow estimation.
+- [StableVSR: Enhancing Perceptual Quality in Video](https://github.com/claudiom4sir/StableVSR)- Super-resolution method to enhance perceptual video quality.
+
+
+
+
@@ -0,0 +1,76 @@
+import type { TaskDataCustom } from "../index.js";
+
+const taskData: TaskDataCustom = {
+	datasets: [
+		{
+			description: "Dataset with detailed annotations for training and benchmarking video instance editing.",
+			id: "suimu/VIRESET",
+		},
+		{
+			description: "Dataset to evaluate models on long video generation and understanding.",
+			id: "zhangsh2001/LongV-EVAL",
+		},
+		{
+			description: "Collection of 104 demo videos from the SeedVR/SeedVR2 series showcasing model outputs.",
+			id: "Iceclear/SeedVR_VideoDemos",
+		},
+	],
+	demo: {
+		inputs: [
+			{
+				filename: "input.gif",
+				type: "img",
+			},
+		],
+		outputs: [
+			{
+				filename: "output.gif",
+				type: "img",
+			},
+		],
+	},
+	metrics: [],
+	models: [
+		{
+			description:
+				"Model for editing outfits, character, and scenery in videos.",
+			id: "decart-ai/Lucy-Edit-Dev",
+		},
+		{
+			description: "Predicts next video frames for frame interpolation and higher FPS.",
+			id: "keras-io/conv-lstm",
+		},
+		{
+			description: "Video upscaling model that enhances resolution while preserving quality.",
+			id: "ByteDance-Seed/SeedVR2-7B",
+		},
+		{
+			description: "Framework that uses 3D mesh proxies for precise, consistent video editing.",
+			id: "LeoLau/Shape-for-Motion",
+		},
+		{
+			description: "A model to upscale videos at input, designed for seamless use with ComfyUI.",
+			id: "numz/SeedVR2_comfyUI",
+		},
+		{
+			description: "Model for relighting videos by manipulating illumination distributions.",
+			id: "TeslaYang123/TC-Light",
+		},
+	],
+	spaces: [
+		{
+			description: "Interactive demo space for Lucy-Edit-Dev video editing.",
+			id: "decart-ai/lucy-edit-dev",
+		},
+		{
+			description: "Demo space for SeedVR2-3B showcasing video upscaling and restoration.",
+			id: "ByteDance-Seed/SeedVR2-3B",
+		},
+	],
+	summary:
+		"Video-to-video models take one or more videos as input and generate new videos as output. They can enhance quality, interpolate frames, modify styles, or create new motion dynamics, enabling creative applications, video production, and research.",
+	widgetModels: [],
+	youtubeId: "",
+};
+
+export default taskData;