Skip to content

Commit ff5a79e

Browse files
[Feature][Inference] Add VSA inference script (#561)
1 parent ab01dc4 commit ff5a79e

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
3+
num_gpus=1
4+
export FASTVIDEO_ATTENTION_BACKEND=VIDEO_SPARSE_ATTN
5+
# change model path to local dir if you want to inference using your checkpoint
6+
export MODEL_BASE=Wan-AI/Wan2.1-T2V-1.3B-Diffusers
7+
# export MODEL_BASE=hunyuanvideo-community/HunyuanVideo
8+
# Note that the tp_size and sp_size should be the same and equal to the number
9+
# of GPUs. They are used for different parallel groups. sp_size is used for
10+
# dit model and tp_size is used for encoder models.
11+
fastvideo generate \
12+
--model-path $MODEL_BASE \
13+
--sp-size $num_gpus \
14+
--tp-size $num_gpus \
15+
--num-gpus $num_gpus \
16+
--height 448 \
17+
--width 832 \
18+
--num-frames 77 \
19+
--num-inference-steps 50 \
20+
--fps 16 \
21+
--guidance-scale 6.0 \
22+
--flow-shift 8.0 \
23+
--VSA-sparsity 0.9 \
24+
--prompt "A beautiful woman in a red dress walking down a street" \
25+
--negative-prompt "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards" \
26+
--seed 1024 \
27+
--output-path outputs_video_1.3B_VSA/sparsity_0.9/

0 commit comments

Comments
 (0)