Refine method subsections

Ausbxuse · Ausbxuse · commit 44a5f28b2ffc · 2026-03-12T00:12:10.000-07:00
diff --git a/src/features/project-page/components/MethodSection.astro b/src/features/project-page/components/MethodSection.astro
@@ -24,10 +24,21 @@ const { method } = Astro.props;
   </div>
   <div class="method-lead">
     <figure class="method-figure">
+      <div class="method-figure-copy">
+        <p class="demo-group-kicker">{method.architecture.kicker}</p>
+        <h4>{method.architecture.title}</h4>
+      </div>
       <img src={method.architecture.src} alt={method.architecture.alt} />
       <figcaption>{method.architecture.caption}</figcaption>
     </figure>
   </div>
+  <div class="method-summary method-summary--training">
+    <p class="demo-group-kicker">{method.training.kicker}</p>
+    <h3>{method.training.title}</h3>
+    <p>
+      {method.training.body}
+    </p>
+  </div>
   <div class="method-steps">
     {method.steps.map((card, index) => (
       <article class={`step ${index === 0 ? "active" : ""}`}>
@@ -54,6 +65,7 @@ const { method } = Astro.props;
     <article class="method-visual method-visual--full">
       <img src={method.supportBlocks[2].src} alt={method.supportBlocks[2].alt} />
       <div class="method-visual-copy">
+        <p class="demo-group-kicker">{method.supportBlocks[2].kicker}</p>
         <h4>{method.supportBlocks[2].title}</h4>
         <p>{method.supportBlocks[2].body}</p>
       </div>
@@ -63,6 +75,7 @@ const { method } = Astro.props;
     <article class="method-visual method-visual--full">
       <img src={method.supportBlocks[3].src} alt={method.supportBlocks[3].alt} />
       <div class="method-visual-copy">
+        <p class="demo-group-kicker">{method.supportBlocks[3].kicker}</p>
         <h4>{method.supportBlocks[3].title}</h4>
         <p>{method.supportBlocks[3].body}</p>
       </div>
diff --git a/src/features/project-page/content/projectContent.js b/src/features/project-page/content/projectContent.js
@@ -100,7 +100,14 @@ const withBase = (path) => {
  *       title: string;
  *       body: string;
  *     };
- *     architecture: FigureBlock;
+ *     architecture: FigureBlock & {
+ *       kicker: string;
+ *     };
+ *     training: {
+ *       kicker: string;
+ *       title: string;
+ *       body: string;
+ *     };
  *     steps: MethodStep[];
  *     supportBlocks: FigureBlock[];
  *   };
@@ -316,17 +323,24 @@ export const projectPageContent = {
   method: {
     sectionTitle: "Method",
     lead: {
-      kicker: "Data Collection",
+      kicker: "DATA COLLECTION",
       title:
         "Human egocentric video and real humanoid data play different roles",
       body: "Training a humanoid foundation model faces a significant data scarcity bottleneck. Human egocentric videos are much cheaper to scale than real-world robotics data and provide large-scale, high-quality, and diverse supervision, while real humanoid data is needed to learn embodiment-specific whole-body control. In Ψ₀, we therefore combine large-scale human video with high-quality humanoid teleoperation data, but use them for different learning stages rather than forcing a single monolithic policy to model both distributions at once.",
     },
     architecture: {
+      kicker: "MODEL ARCHITECTURE",
+      title: "A triple-system foundation model for whole-body control",
       src: withBase("/figures/architecture.svg"),
       alt: "Ψ₀ architecture diagram",
       caption:
         "The high-level policy consists of a vision-language backbone and a multi-modal diffusion transformer action expert, while an RL-based tracking controller executes the lower-body commands for whole-body control.",
     },
+    training: {
+      kicker: "STAGED TRAINING",
+      title: "Different learning goals for different stages",
+      body: "We present an efficient training recipe for learning humanoid loco-manipulation skills from both human videos and real robot data. The overall training procedure consists of three stages: first, pre-training the VLM backbone on large-scale high-quality and diverse human egocentric videos; second, post-training the flow-based action expert on cross-task real humanoid data; and third, fine-tuning the action expert using a small amount of in-domain task data, which enables rapid adaptation to new tasks.",
+    },
     steps: [
       {
         step: "01",
@@ -348,24 +362,28 @@ export const projectPageContent = {
       {
         src: withBase("/figures/teleoperation.png"),
         alt: "Teleoperation setup diagram",
+        kicker: "DATA COLLECTION",
         title: "Collecting Complex Humanoid Data",
         body: "Efficiently learning a long-horizon loco-manipulation task critically depends on the quality of in-domain data for fine-tuning. To address the limitations of prior systems, we propose a tailored teleoperation framework that explicitly separates upper-body pose tracking, dexterous manipulation, and locomotion commands, while enabling single-operator whole-body control. By using a small set of wearable trackers and separating locomotion from in-place whole-body actions, our framework enables single-operator humanoid teleoperation with improved locomotion stability across diverse task scenarios.",
       },
       {
         src: withBase("/figures/rtc.png"),
         alt: "Real-time chunking diagram",
+        kicker: "DEPLOYMENT AND RTC",
         title: "Real-Time Chunking for Deployment",
         body: "Humanoid robots require smooth and reactive control, particularly when executing long-horizon, dexterous manipulation tasks. However, our model comprises over 2.5 billion parameters, with a single forward pass taking approximately 160 ms. To enable smooth policy rollout despite this latency, we adopt training-time real-time chunking. With RTC, each action prediction is conditioned on the previously committed action chunk and outputs a consistent chunk of future actions, while inference runs asynchronously with execution to avoid interruptions between chunks.",
       },
       {
         src: withBase("/figures/sim-data.png"),
         alt: "Simulation and data generation figure",
+        kicker: "FAST EVALUATION IN SIMULATION",
         title: "Fast Evaluation in Simulation",
         body: "Although our primary goal is to deploy Ψ₀ in the real world, simulation is valuable for accelerating experimental iteration and enabling unified, standardized evaluation. We introduce a large-scale humanoid loco-manipulation benchmark in simulation with automated task generation across 50 indoor scenes, imported rigid objects, and randomized episode conditions, giving Ψ₀ a fast evaluation loop before the most expensive hardware experiments.",
       },
       {
         src: withBase("/figures/psi-tasks.png"),
         alt: "Eight real-world Ψ₀ benchmark tasks",
+        kicker: "REAL-WORLD TASK SETUP",
         title: "Real-World Deployment",
         body: "We evaluate Ψ₀ on eight diverse long-horizon dexterous loco-manipulation tasks involving manipulation, whole-body motion, and locomotion. The tasks range from simple interactions, such as pick-and-place, pushing, and wiping, to more challenging dexterous manipulations requiring precise finger-object coordination, including turning a faucet and pulling out a chip tray.",
       },
diff --git a/src/features/project-page/project-page.css b/src/features/project-page/project-page.css
@@ -592,6 +592,20 @@
   gap: 10px;
 }
 
+.method-figure-copy {
+  display: grid;
+  gap: 8px;
+}
+
+.method-figure-copy h4 {
+  margin: 0;
+  font-family: "Iowan Old Style", "Palatino Linotype", "Book Antiqua", Palatino, "Nimbus Roman No9 L", "Times New Roman", serif;
+  font-size: clamp(24px, 3vw, 34px);
+  font-weight: 600;
+  line-height: 1.1;
+  letter-spacing: -0.02em;
+}
+
 .method-summary h3,
 .method-panel h4 {
   margin: 0;