Skip to content

Commit 66b8b85

Browse files
authored
[LoRA] Support V1 LoRA inference (#451)
1 parent 6684872 commit 66b8b85

35 files changed

+720
-121
lines changed

examples/inference/basic/basic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# from fastvideo.v1.configs.sample import SamplingParam
44

5-
5+
OUTPUT_PATH = "video_samples"
66
def main():
77
# FastVideo will automatically use the optimal default arguments for the
88
# model.
@@ -11,7 +11,9 @@ def main():
1111
generator = VideoGenerator.from_pretrained(
1212
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
1313
# if num_gpus > 1, FastVideo will automatically handle distributed setup
14-
num_gpus=1,
14+
num_gpus=2,
15+
use_fsdp_inference=True,
16+
use_cpu_offload=False
1517
)
1618

1719
# sampling_param = SamplingParam.from_pretrained("Wan-AI/Wan2.1-T2V-1.3B-Diffusers")
@@ -23,7 +25,7 @@ def main():
2325
"wide with interest. The playful yet serene atmosphere is complemented by soft "
2426
"natural light filtering through the petals. Mid-shot, warm and cheerful tones."
2527
)
26-
video = generator.generate_video(prompt)
28+
video = generator.generate_video(prompt, output_path=OUTPUT_PATH, save_video=True)
2729
# video = generator.generate_video(prompt, sampling_param=sampling_param, output_path="wan_t2v_videos/")
2830

2931
# Generate another video with a different prompt, without reloading the
@@ -34,7 +36,7 @@ def main():
3436
"the breeze, enhancing the lion's commanding presence. The tone is vibrant, "
3537
"embodying the raw energy of the wild. Low angle, steady tracking shot, "
3638
"cinematic.")
37-
video2 = generator.generate_video(prompt2)
39+
video2 = generator.generate_video(prompt2, output_path=OUTPUT_PATH, save_video=True)
3840

3941

4042
if __name__ == "__main__":

examples/inference/basic/default_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from fastvideo import VideoGenerator
2-
2+
from fastvideo.v1.configs.pipelines.base import PipelineConfig
33

44
def main():
55

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from fastvideo import VideoGenerator
2+
from fastvideo.v1.configs.sample import SamplingParam
3+
4+
OUTPUT_PATH = "./lora"
5+
def main():
6+
# Initialize VideoGenerator with the Wan model
7+
generator = VideoGenerator.from_pretrained(
8+
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
9+
num_gpus=2,
10+
lora_path="benjamin-paine/steamboat-willie-1.3b",
11+
lora_nickname="steamboat"
12+
)
13+
kwargs = {
14+
"height": 480,
15+
"width": 832,
16+
"num_frames": 81,
17+
"guidance_scale": 5.0,
18+
"num_inference_steps": 32,
19+
}
20+
# Generate video with LoRA style
21+
prompt = "steamboat willie style, golden era animation, close-up of a short fluffy monster kneeling beside a melting red candle. the mood is one of wonder and curiosity, as the monster gazes at the flame with wide eyes and open mouth. Its pose and expression convey a sense of innocence and playfulness, as if it is exploring the world around it for the first time. The use of warm colors and dramatic lighting further enhances the cozy atmosphere of the image."
22+
negative_prompt = "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
23+
24+
video = generator.generate_video(
25+
prompt,
26+
# sampling_param=sampling_param,
27+
output_path=OUTPUT_PATH,
28+
save_video=True,
29+
negative_prompt=negative_prompt,
30+
**kwargs
31+
)
32+
33+
generator.set_lora_adapter(lora_nickname="flat_color", lora_path="motimalu/wan-flat-color-1.3b-v2")
34+
prompt = "flat color, no lineart, blending, negative space, artist:[john kafka|ponsuke kaikai|hara id 21|yoneyama mai|fuzichoco], 1girl, sakura miko, pink hair, cowboy shot, white shirt, floral print, off shoulder, outdoors, cherry blossom, tree shade, wariza, looking up, falling petals, half-closed eyes, white sky, clouds, live2d animation, upper body, high quality cinematic video of a woman sitting under a sakura tree. Dreamy and lonely, the camera close-ups on the face of the woman as she turns towards the viewer. The Camera is steady, This is a cowboy shot. The animation is smooth and fluid."
35+
negative_prompt = "bad quality video,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
36+
video = generator.generate_video(
37+
prompt,
38+
output_path=OUTPUT_PATH,
39+
save_video=True,
40+
negative_prompt=negative_prompt,
41+
**kwargs
42+
)
43+
44+
if __name__ == "__main__":
45+
main()

fastvideo/v1/configs/models/dits/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass, field
2-
from typing import Any, Optional, Tuple
2+
from typing import Any, List, Optional, Tuple
33

44
from fastvideo.v1.configs.models.base import ArchConfig, ModelConfig
55
from fastvideo.v1.layers.quantization import QuantizationConfig
@@ -11,6 +11,7 @@ class DiTArchConfig(ArchConfig):
1111
_fsdp_shard_conditions: list = field(default_factory=list)
1212
_compile_conditions: list = field(default_factory=list)
1313
_param_names_mapping: dict = field(default_factory=dict)
14+
_lora_param_names_mapping: dict = field(default_factory=dict)
1415
_supported_attention_backends: Tuple[_Backend,
1516
...] = (_Backend.SLIDING_TILE_ATTN,
1617
_Backend.SAGE_ATTN,
@@ -20,6 +21,7 @@ class DiTArchConfig(ArchConfig):
2021
hidden_size: int = 0
2122
num_attention_heads: int = 0
2223
num_channels_latents: int = 0
24+
exclude_lora_layers: List[str] = field(default_factory=list)
2325

2426
def __post_init__(self) -> None:
2527
if not self._compile_conditions:

fastvideo/v1/configs/models/dits/hunyuanvideo.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass, field
2-
from typing import Optional, Tuple
2+
from typing import List, Optional, Tuple
33

44
import torch
55

@@ -163,6 +163,8 @@ class HunyuanVideoArchConfig(DiTArchConfig):
163163
pooled_projection_dim: int = 768
164164
rope_theta: int = 256
165165
qk_norm: str = "rms_norm"
166+
exclude_lora_layers: List[str] = field(
167+
default_factory=lambda: ["img_in", "txt_in", "time_in", "vector_in"])
166168

167169
def __post_init__(self):
168170
super().__post_init__()

fastvideo/v1/configs/models/dits/stepvideo.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class StepVideoArchConfig(DiTArchConfig):
5151
default_factory=lambda: [6144, 1024])
5252
attention_type: Optional[str] = "torch"
5353
use_additional_conditions: Optional[bool] = False
54+
exclude_lora_layers: List[str] = field(default_factory=lambda: [])
5455

5556
def __post_init__(self):
5657
self.hidden_size = self.num_attention_heads * self.attention_head_dim

fastvideo/v1/configs/models/dits/wanvideo.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass, field
2-
from typing import Optional, Tuple
2+
from typing import List, Optional, Tuple
33

44
from fastvideo.v1.configs.models.dits.base import DiTArchConfig, DiTConfig
55

@@ -51,6 +51,23 @@ class WanVideoArchConfig(DiTArchConfig):
5151
r"blocks\.(\d+)\.norm2\.(.*)$":
5252
r"blocks.\1.self_attn_residual_norm.norm.\2",
5353
})
54+
# Some LoRA adapters use the original official layer names instead of hf layer names,
55+
# so apply this before the param_names_mapping
56+
_lora_param_names_mapping: dict = field(
57+
default_factory=lambda: {
58+
r"^blocks\.(\d+)\.self_attn\.q\.(.*)$": r"blocks.\1.attn1.to_q.\2",
59+
r"^blocks\.(\d+)\.self_attn\.k\.(.*)$": r"blocks.\1.attn1.to_k.\2",
60+
r"^blocks\.(\d+)\.self_attn\.v\.(.*)$": r"blocks.\1.attn1.to_v.\2",
61+
r"^blocks\.(\d+)\.self_attn\.o\.(.*)$":
62+
r"blocks.\1.attn1.to_out.0.\2",
63+
r"^blocks\.(\d+)\.cross_attn\.q\.(.*)$": r"blocks.\1.attn2.to_q.\2",
64+
r"^blocks\.(\d+)\.cross_attn\.k\.(.*)$": r"blocks.\1.attn2.to_k.\2",
65+
r"^blocks\.(\d+)\.cross_attn\.v\.(.*)$": r"blocks.\1.attn2.to_v.\2",
66+
r"^blocks\.(\d+)\.cross_attn\.o\.(.*)$":
67+
r"blocks.\1.attn2.to_out.0.\2",
68+
r"^blocks\.(\d+)\.ffn\.0\.(.*)$": r"blocks.\1.ffn.fc_in.\2",
69+
r"^blocks\.(\d+)\.ffn\.2\.(.*)$": r"blocks.\1.ffn.fc_out.\2",
70+
})
5471

5572
patch_size: Tuple[int, int, int] = (1, 2, 2)
5673
text_len = 512
@@ -68,6 +85,7 @@ class WanVideoArchConfig(DiTArchConfig):
6885
image_dim: Optional[int] = None
6986
added_kv_proj_dim: Optional[int] = None
7087
rope_max_seq_len: int = 1024
88+
exclude_lora_layers: List[str] = field(default_factory=lambda: ["embedder"])
7189

7290
def __post_init__(self):
7391
super().__post_init__()

fastvideo/v1/configs/pipelines/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class PipelineConfig:
2727
# Video generation parameters
2828
embedded_cfg_scale: float = 6.0
2929
flow_shift: Optional[float] = None
30-
use_cpu_offload: bool = False
3130
disable_autocast: bool = False
3231

3332
# Model configuration

fastvideo/v1/configs/pipelines/hunyuan.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ class HunyuanConfig(PipelineConfig):
6868
embedded_cfg_scale: int = 6
6969
flow_shift: int = 7
7070

71-
# Video parameters
72-
use_cpu_offload: bool = True
73-
7471
# Text encoding stage
7572
text_encoder_configs: Tuple[EncoderConfig, ...] = field(
7673
default_factory=lambda: (LlamaConfig(), CLIPTextConfig()))

fastvideo/v1/configs/pipelines/stepvideo.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ class StepVideoT2VConfig(PipelineConfig):
1818
vae_tiling: bool = False
1919
vae_sp: bool = False
2020

21-
# Video parameters
22-
use_cpu_offload: bool = True
23-
2421
# Denoising stage
2522
flow_shift: int = 13
2623
timesteps_scale: bool = False

0 commit comments

Comments
 (0)