physical-superintelligence-lab
diff --git a/‎public/figures/architecture.png‎
900 KB b/‎public/figures/architecture.png‎
900 KB
diff --git a/‎public/media/psi-0/psi_0_1080p.mp4‎
39.9 MB b/‎public/media/psi-0/psi_0_1080p.mp4‎
39.9 MB
diff --git a/‎src/features/project-page/components/HeroSection.astro‎
Lines changed: 1 addition & 0 deletions b/‎src/features/project-page/components/HeroSection.astro‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/features/project-page/content/projectContent.js‎
Lines changed: 1 addition & 1 deletion b/‎src/features/project-page/content/projectContent.js‎
Lines changed: 1 addition & 1 deletion
@@ -35,6 +35,7 @@ const { hero } = Astro.props;
         src={hero.teaserVideoSrc}
         aria-label={hero.teaserVideoAlt}
         autoplay
+        controls
         muted
         loop
         playsinline
 
@@ -358,7 +358,7 @@ export const projectPageContent = {
       kicker: "MODEL ARCHITECTURE",
       title: "Three-System Foundation Model for Whole-Body Control",
       body: "Ψ₀ is a foundation model that adopts a triple-system architecture, following prior work. The high-level policy consists of two end-to-end-trained components: a vision-language backbone (system-2) and a multi-modal diffusion transformer (MM-DiT) action expert (system-1). We use the state-of-the-art vision-language foundation model Qwen3-VL-2B-Instruct as system-2. The action expert is implemented as a flow-based MM-DiT inspired by Stable Diffusion 3, containing approximately 500M parameters. Conditioned on hidden features from the VLM backbone, the action expert predicts future whole-body action chunks. The 8-DoF lower-body actions are passed to system-0, a RL-based tracking policy. We adopt the off-the-shelf controller AMO, which maps these inputs to lower-body joint angles for whole-body control.",
-      src: withBase("/figures/architecture.svg"),
+      src: withBase("/figures/architecture.png"),
       alt: "Ψ₀ architecture diagram",
       caption: "",
     },