Skip to content

Commit 868c489

Browse files
authored
[model] support open-sora-plan v1.2 (#222)
* rename * update * update readme * update * readme * readme * polish * update
1 parent ff918ec commit 868c489

File tree

7 files changed

+3829
-58
lines changed

7 files changed

+3829
-58
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ An easy and efficient system for video generation
88
</p>
99

1010
### Latest News 🔥
11+
- [2024/09] Support [Vchitect-2.0](https://github.com/Vchitect/Vchitect-2.0) and [Open-Sora-Plan v1.2.0](https://github.com/PKU-YuanGroup/Open-Sora-Plan).
1112
- [2024/08] 🔥 Evole from [OpenDiT](https://github.com/NUS-HPC-AI-Lab/VideoSys/tree/v1.0.0) to <b>VideoSys: An easy and efficient system for video generation.</b>
12-
- [2024/08] 🔥 <b>Release PAB paper: [Real-Time Video Generation with Pyramid Attention Broadcast](https://arxiv.org/abs/2408.12588).</b>
13-
- [2024/06] Propose Pyramid Attention Broadcast (PAB)[[paper](https://arxiv.org/abs/2408.12588)][[blog](https://oahzxl.github.io/PAB/)][[doc](./docs/pab.md)], the first approach to achieve <b>real-time</b> DiT-based video generation, delivering <b>negligible quality loss</b> without <b>requiring any training</b>.
13+
- [2024/08] 🔥 Release PAB paper: <b>[Real-Time Video Generation with Pyramid Attention Broadcast](https://arxiv.org/abs/2408.12588)</b>.
14+
- [2024/06] 🔥 Propose Pyramid Attention Broadcast (PAB)[[paper](https://arxiv.org/abs/2408.12588)][[blog](https://oahzxl.github.io/PAB/)][[doc](./docs/pab.md)], the first approach to achieve <b>real-time</b> DiT-based video generation, delivering <b>negligible quality loss</b> without <b>requiring any training</b>.
1415
- [2024/06] Support [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan) and [Latte](https://github.com/Vchitect/Latte).
15-
- [2024/03] Propose Dynamic Sequence Parallel (DSP)[[paper](https://arxiv.org/abs/2403.10266)][[doc](./docs/dsp.md)], achieves **3x** speed for training and **2x** speed for inference in Open-Sora compared with sota sequence parallelism.
16-
- [2024/03] Support [Open-Sora: Democratizing Efficient Video Production for All](https://github.com/hpcaitech/Open-Sora).
16+
- [2024/03] 🔥 Propose Dynamic Sequence Parallel (DSP)[[paper](https://arxiv.org/abs/2403.10266)][[doc](./docs/dsp.md)], achieves **3x** speed for training and **2x** speed for inference in Open-Sora compared with sota sequence parallelism.
17+
- [2024/03] Support [Open-Sora](https://github.com/hpcaitech/Open-Sora).
1718
- [2024/02] 🎉 Release [OpenDiT](https://github.com/NUS-HPC-AI-Lab/VideoSys/tree/v1.0.0): An Easy, Fast and Memory-Efficent System for DiT Training and Inference.
1819

1920
# About

examples/open_sora_plan/sample.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@
22

33

44
def run_base():
5-
# num frames: 65 or 221
5+
# open-sora-plan v1.2.0
6+
# transformer_type (len, res): 93x480p 93x720p 29x480p 29x720p
67
# change num_gpus for multi-gpu inference
7-
config = OpenSoraPlanConfig(num_frames=65, num_gpus=1)
8+
config = OpenSoraPlanConfig(version="v120", transformer_type="93x480p", num_gpus=1)
89
engine = VideoSysEngine(config)
910

1011
prompt = "Sunset over the sea."
1112
# seed=-1 means random seed. >0 means fixed seed.
1213
video = engine.generate(
1314
prompt=prompt,
1415
guidance_scale=7.5,
15-
num_inference_steps=150,
16+
num_inference_steps=100,
1617
seed=-1,
1718
).video[0]
1819
engine.save_video(video, f"./outputs/{prompt}.mp4")
@@ -36,7 +37,26 @@ def run_pab():
3637
engine.save_video(video, f"./outputs/{prompt}.mp4")
3738

3839

40+
def run_v110():
41+
# open-sora-plan v1.1.0
42+
# transformer_type: 65x512x512 or 221x512x512
43+
# change num_gpus for multi-gpu inference
44+
config = OpenSoraPlanConfig(version="v110", transformer_type="65x512x512", num_gpus=1)
45+
engine = VideoSysEngine(config)
46+
47+
prompt = "Sunset over the sea."
48+
# seed=-1 means random seed. >0 means fixed seed.
49+
video = engine.generate(
50+
prompt=prompt,
51+
guidance_scale=7.5,
52+
num_inference_steps=150,
53+
seed=-1,
54+
).video[0]
55+
engine.save_video(video, f"./outputs/{prompt}.mp4")
56+
57+
3958
if __name__ == "__main__":
4059
run_base()
4160
# run_low_mem()
4261
# run_pab()
62+
# run_v110()

videosys/models/autoencoders/autoencoder_kl_open_sora_plan.py renamed to videosys/models/autoencoders/autoencoder_kl_open_sora_plan_v110.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
124124
last_ckpt_file = ckpt_files[-1]
125125
config_file = os.path.join(pretrained_model_name_or_path, cls.config_name)
126126
model = cls.from_config(config_file)
127-
print("init from {}".format(last_ckpt_file))
127+
# print("init from {}".format(last_ckpt_file))
128128
model.init_from_ckpt(last_ckpt_file)
129129
return model
130130
else:
@@ -778,7 +778,7 @@ def disable_tiling(self):
778778

779779
def init_from_ckpt(self, path, ignore_keys=list(), remove_loss=False):
780780
sd = torch.load(path, map_location="cpu")
781-
print("init from " + path)
781+
# print("init from " + path)
782782
if "state_dict" in sd:
783783
sd = sd["state_dict"]
784784
keys = list(sd.keys())

0 commit comments

Comments
 (0)