NUS-HPC-AI-Lab
diff --git a/‎README.md‎
Lines changed: 18 additions & 10 deletions b/‎README.md‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎examples/vchitect/sample.py‎
Lines changed: 46 additions & 0 deletions b/‎examples/vchitect/sample.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/examples/test_sample.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/examples/test_sample.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/pipelines/vchitect/__init__.py‎ b/‎tests/pipelines/vchitect/__init__.py‎
diff --git a/‎tests/pipelines/vchitect/test_vchitect.py‎
Lines changed: 33 additions & 0 deletions b/‎tests/pipelines/vchitect/test_vchitect.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎videosys/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎videosys/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎videosys/core/comm.py‎
Lines changed: 14 additions & 0 deletions b/‎videosys/core/comm.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎videosys/core/pab_mgr.py‎
Lines changed: 5 additions & 6 deletions b/‎videosys/core/pab_mgr.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎videosys/core/pipeline.py‎
Lines changed: 4 additions & 3 deletions b/‎videosys/core/pipeline.py‎
Lines changed: 4 additions & 3 deletions
@@ -50,7 +50,7 @@ pip install -e .
 
 VideoSys supports many diffusion models with our various acceleration techniques, enabling these models to run faster and consume less memory.
 
-<b>You can find all available models and their supported acceleration techniques in the following table. Click `Doc` to see how to use them.</b>
+<b>You can find all available models and their supported acceleration techniques in the following table. Click `Code` to see how to use them.</b>
 
 <table>
     <tr>
@@ -65,20 +65,20 @@ VideoSys supports many diffusion models with our various acceleration techniques
         <th><a href="https://github.com/NUS-HPC-AI-Lab/VideoSys?tab=readme-ov-file#pyramid-attention-broadcast-pab-blogdoc">PAB</a></th>
     </tr>
     <tr>
-        <td>Open-Sora [<a href="https://github.com/hpcaitech/Open-Sora">source</a>]</td>
-        <td align="center">🟡</td>
+        <td>Vchitect [<a href="https://github.com/Vchitect/Vchitect-2.0">source</a>]</td>
+        <td align="center">/</td>
         <td align="center">✅</td>
         <td align="center">✅</td>
         <td align="center">✅</td>
-        <td align="center"><a href="./examples/open_sora/sample.py">Code</a></td>
+        <td align="center"><a href="./examples/vchitect/sample.py">Code</a></td>
     </tr>
     <tr>
-        <td>Open-Sora-Plan [<a href="https://github.com/PKU-YuanGroup/Open-Sora-Plan">source</a>]</td>
+        <td>CogVideoX [<a href="https://github.com/THUDM/CogVideo">source</a>]</td>
         <td align="center">/</td>
         <td align="center">✅</td>
+        <td align="center">/</td>
         <td align="center">✅</td>
-        <td align="center">✅</td>
-        <td align="center"><a href="./examples/open_sora_plan/sample.py">Code</a></td>
+        <td align="center"><a href="./examples/cogvideox/sample.py">Code</a></td>
     </tr>
     <tr>
         <td>Latte [<a href="https://github.com/Vchitect/Latte">source</a>]</td>
@@ -89,12 +89,20 @@ VideoSys supports many diffusion models with our various acceleration techniques
         <td align="center"><a href="./examples/latte/sample.py">Code</a></td>
     </tr>
     <tr>
-        <td>CogVideoX [<a href="https://github.com/THUDM/CogVideo">source</a>]</td>
+        <td>Open-Sora-Plan [<a href="https://github.com/PKU-YuanGroup/Open-Sora-Plan">source</a>]</td>
         <td align="center">/</td>
         <td align="center">✅</td>
-        <td align="center">/</td>
         <td align="center">✅</td>
-        <td align="center"><a href="./examples/cogvideox/sample.py">Code</a></td>
+        <td align="center">✅</td>
+        <td align="center"><a href="./examples/open_sora_plan/sample.py">Code</a></td>
+    </tr>
+    <tr>
+        <td>Open-Sora [<a href="https://github.com/hpcaitech/Open-Sora">source</a>]</td>
+        <td align="center">🟡</td>
+        <td align="center">✅</td>
+        <td align="center">✅</td>
+        <td align="center">✅</td>
+        <td align="center"><a href="./examples/open_sora/sample.py">Code</a></td>
     </tr>
 </table>
 
 
@@ -0,0 +1,46 @@
+from videosys import VchitectConfig, VideoSysEngine
+
+
+def run_base():
+    # change num_gpus for multi-gpu inference
+    config = VchitectConfig("Vchitect/Vchitect-2.0-2B", num_gpus=1)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    # seed=-1 means random seed. >0 means fixed seed.
+    # WxH: 480x288 624x352 432x240 768x432
+    video = engine.generate(
+        prompt=prompt,
+        negative_prompt="",
+        num_inference_steps=100,
+        guidance_scale=7.5,
+        width=480,
+        height=288,
+        frames=40,
+        seed=0,
+    ).video[0]
+    engine.save_video(video, f"./outputs/{prompt}.mp4")
+
+
+def run_pab():
+    config = VchitectConfig("Vchitect/Vchitect-2.0-2B", enable_pab=True)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    video = engine.generate(prompt).video[0]
+    engine.save_video(video, f"./outputs/{prompt}.mp4")
+
+
+def run_low_mem():
+    config = VchitectConfig("Vchitect/Vchitect-2.0-2B", cpu_offload=True)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    video = engine.generate(prompt).video[0]
+    engine.save_video(video, f"./outputs/{prompt}.mp4")
+
+
+if __name__ == "__main__":
+    run_base()
+    # run_pab()
+    # run_low_mem()
@@ -1,3 +1,4 @@
+accelerate>0.17.0
 bs4
 click
 colossalai
 
@@ -11,8 +11,9 @@
 import examples.latte.sample as latte
 import examples.open_sora.sample as open_sora
 import examples.open_sora_plan.sample as open_sora_plan
+import examples.vchitect.sample as vchitect
 
-files = [cogvideox, latte, open_sora, open_sora_plan]
+files = [cogvideox, latte, open_sora, open_sora_plan, vchitect]
 members = []
 
 for file in files:
 
@@ -0,0 +1,33 @@
+import pytest
+
+from videosys import VchitectConfig, VideoSysEngine
+
+
+@pytest.mark.parametrize("num_gpus", [1, 2])
+def test_base(num_gpus):
+    config = VchitectConfig(num_gpus=num_gpus)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    video = engine.generate(prompt, seed=0).video[0]
+    engine.save_video(video, f"./test_outputs/{prompt}_vchitect_{num_gpus}.mp4")
+
+
+@pytest.mark.parametrize("num_gpus", [1])
+def test_pab(num_gpus):
+    config = VchitectConfig(num_gpus=num_gpus, enable_pab=True)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    video = engine.generate(prompt, seed=0).video[0]
+    engine.save_video(video, f"./test_outputs/{prompt}_vchitect_pab_{num_gpus}.mp4")
+
+
+@pytest.mark.parametrize("num_gpus", [1])
+def test_low_mem(num_gpus):
+    config = VchitectConfig(num_gpus=num_gpus, cpu_offload=True)
+    engine = VideoSysEngine(config)
+
+    prompt = "Sunset over the sea."
+    video = engine.generate(prompt, seed=0).video[0]
+    engine.save_video(video, f"./test_outputs/{prompt}_vchitect_low_mem_{num_gpus}.mp4")
@@ -4,12 +4,14 @@
 from .pipelines.latte import LatteConfig, LattePABConfig, LattePipeline
 from .pipelines.open_sora import OpenSoraConfig, OpenSoraPABConfig, OpenSoraPipeline
 from .pipelines.open_sora_plan import OpenSoraPlanConfig, OpenSoraPlanPABConfig, OpenSoraPlanPipeline
+from .pipelines.vchitect import VchitectConfig, VchitectXLPipeline
 
 __all__ = [
     "initialize",
     "VideoSysEngine",
     "LattePipeline", "LatteConfig", "LattePABConfig",
     "OpenSoraPlanPipeline", "OpenSoraPlanConfig", "OpenSoraPlanPABConfig",
     "OpenSoraPipeline", "OpenSoraConfig", "OpenSoraPABConfig",
-    "CogVideoXConfig", "CogVideoXPipeline", "CogVideoXPABConfig"
+    "CogVideoXPipeline", "CogVideoXConfig", "CogVideoXPABConfig",
+    "VchitectXLPipeline", "VchitectConfig",
 ]  # fmt: skip
@@ -404,3 +404,17 @@ def all_to_all_with_pad(
         input_ = input_.narrow(gather_dim, 0, input_.size(gather_dim) - gather_pad)
 
     return input_
+
+
+def split_from_second_dim(x, batch_size, parallel_group):
+    x = x.view(batch_size, -1, *x.shape[1:])
+    x = split_sequence(x, parallel_group, dim=1, grad_scale="down", pad=get_pad("temporal"))
+    x = x.reshape(-1, *x.shape[2:])
+    return x
+
+
+def gather_from_second_dim(x, batch_size, parallel_group):
+    x = x.view(batch_size, -1, *x.shape[1:])
+    x = gather_sequence(x, parallel_group, dim=1, grad_scale="up", pad=get_pad("temporal"))
+    x = x.reshape(-1, *x.shape[2:])
+    return x
@@ -6,7 +6,6 @@
 class PABConfig:
     def __init__(
         self,
-        steps: int,
         cross_broadcast: bool = False,
         cross_threshold: list = None,
         cross_range: int = None,
@@ -20,7 +19,7 @@ def __init__(
         mlp_spatial_broadcast_config: dict = None,
         mlp_temporal_broadcast_config: dict = None,
     ):
-        self.steps = steps
+        self.steps = None
 
         self.cross_broadcast = cross_broadcast
         self.cross_threshold = cross_threshold
@@ -45,7 +44,7 @@ class PABManager:
     def __init__(self, config: PABConfig):
         self.config: PABConfig = config
 
-        init_prompt = f"Init Pyramid Attention Broadcast. steps: {config.steps}."
+        init_prompt = f"Init Pyramid Attention Broadcast."
         init_prompt += f" spatial broadcast: {config.spatial_broadcast}, spatial range: {config.spatial_range}, spatial threshold: {config.spatial_threshold}."
         init_prompt += f" temporal broadcast: {config.temporal_broadcast}, temporal range: {config.temporal_range}, temporal_threshold: {config.temporal_threshold}."
         init_prompt += f" cross broadcast: {config.cross_broadcast}, cross range: {config.cross_range}, cross threshold: {config.cross_threshold}."
@@ -78,7 +77,7 @@ def if_broadcast_temporal(self, timestep: int, count: int):
         count = (count + 1) % self.config.steps
         return flag, count
 
-    def if_broadcast_spatial(self, timestep: int, count: int, block_idx: int):
+    def if_broadcast_spatial(self, timestep: int, count: int):
         if (
             self.config.spatial_broadcast
             and (timestep is not None)
@@ -213,10 +212,10 @@ def if_broadcast_temporal(timestep: int, count: int):
     return PAB_MANAGER.if_broadcast_temporal(timestep, count)
 
 
-def if_broadcast_spatial(timestep: int, count: int, block_idx: int):
+def if_broadcast_spatial(timestep: int, count: int):
     if not enable_pab():
         return False, count
-    return PAB_MANAGER.if_broadcast_spatial(timestep, count, block_idx)
+    return PAB_MANAGER.if_broadcast_spatial(timestep, count)
 
 
 def if_broadcast_mlp(timestep: int, count: int, block_idx: int, all_timesteps, is_temporal=False):
 
@@ -13,9 +13,10 @@ def __init__(self):
 
     @staticmethod
     def set_eval_and_device(device: torch.device, *modules):
-        for module in modules:
-            module.eval()
-            module.to(device)
+        modules = list(modules)
+        for i in range(len(modules)):
+            modules[i] = modules[i].eval()
+            modules[i] = modules[i].to(device)
 
     @abstractmethod
     def generate(self, *args, **kwargs):
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+accelerate>0.17.0`
`1`	`2`	`bs4`
`2`	`3`	`click`
`3`	`4`	`colossalai`