Skip to content

Commit 44a5f28

Browse files
committed
Refine method subsections
1 parent 32dcd66 commit 44a5f28

File tree

3 files changed

+47
-2
lines changed

3 files changed

+47
-2
lines changed

src/features/project-page/components/MethodSection.astro

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,21 @@ const { method } = Astro.props;
2424
</div>
2525
<div class="method-lead">
2626
<figure class="method-figure">
27+
<div class="method-figure-copy">
28+
<p class="demo-group-kicker">{method.architecture.kicker}</p>
29+
<h4>{method.architecture.title}</h4>
30+
</div>
2731
<img src={method.architecture.src} alt={method.architecture.alt} />
2832
<figcaption>{method.architecture.caption}</figcaption>
2933
</figure>
3034
</div>
35+
<div class="method-summary method-summary--training">
36+
<p class="demo-group-kicker">{method.training.kicker}</p>
37+
<h3>{method.training.title}</h3>
38+
<p>
39+
{method.training.body}
40+
</p>
41+
</div>
3142
<div class="method-steps">
3243
{method.steps.map((card, index) => (
3344
<article class={`step ${index === 0 ? "active" : ""}`}>
@@ -54,6 +65,7 @@ const { method } = Astro.props;
5465
<article class="method-visual method-visual--full">
5566
<img src={method.supportBlocks[2].src} alt={method.supportBlocks[2].alt} />
5667
<div class="method-visual-copy">
68+
<p class="demo-group-kicker">{method.supportBlocks[2].kicker}</p>
5769
<h4>{method.supportBlocks[2].title}</h4>
5870
<p>{method.supportBlocks[2].body}</p>
5971
</div>
@@ -63,6 +75,7 @@ const { method } = Astro.props;
6375
<article class="method-visual method-visual--full">
6476
<img src={method.supportBlocks[3].src} alt={method.supportBlocks[3].alt} />
6577
<div class="method-visual-copy">
78+
<p class="demo-group-kicker">{method.supportBlocks[3].kicker}</p>
6679
<h4>{method.supportBlocks[3].title}</h4>
6780
<p>{method.supportBlocks[3].body}</p>
6881
</div>

src/features/project-page/content/projectContent.js

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,14 @@ const withBase = (path) => {
100100
* title: string;
101101
* body: string;
102102
* };
103-
* architecture: FigureBlock;
103+
* architecture: FigureBlock & {
104+
* kicker: string;
105+
* };
106+
* training: {
107+
* kicker: string;
108+
* title: string;
109+
* body: string;
110+
* };
104111
* steps: MethodStep[];
105112
* supportBlocks: FigureBlock[];
106113
* };
@@ -316,17 +323,24 @@ export const projectPageContent = {
316323
method: {
317324
sectionTitle: "Method",
318325
lead: {
319-
kicker: "Data Collection",
326+
kicker: "DATA COLLECTION",
320327
title:
321328
"Human egocentric video and real humanoid data play different roles",
322329
body: "Training a humanoid foundation model faces a significant data scarcity bottleneck. Human egocentric videos are much cheaper to scale than real-world robotics data and provide large-scale, high-quality, and diverse supervision, while real humanoid data is needed to learn embodiment-specific whole-body control. In Ψ₀, we therefore combine large-scale human video with high-quality humanoid teleoperation data, but use them for different learning stages rather than forcing a single monolithic policy to model both distributions at once.",
323330
},
324331
architecture: {
332+
kicker: "MODEL ARCHITECTURE",
333+
title: "A triple-system foundation model for whole-body control",
325334
src: withBase("/figures/architecture.svg"),
326335
alt: "Ψ₀ architecture diagram",
327336
caption:
328337
"The high-level policy consists of a vision-language backbone and a multi-modal diffusion transformer action expert, while an RL-based tracking controller executes the lower-body commands for whole-body control.",
329338
},
339+
training: {
340+
kicker: "STAGED TRAINING",
341+
title: "Different learning goals for different stages",
342+
body: "We present an efficient training recipe for learning humanoid loco-manipulation skills from both human videos and real robot data. The overall training procedure consists of three stages: first, pre-training the VLM backbone on large-scale high-quality and diverse human egocentric videos; second, post-training the flow-based action expert on cross-task real humanoid data; and third, fine-tuning the action expert using a small amount of in-domain task data, which enables rapid adaptation to new tasks.",
343+
},
330344
steps: [
331345
{
332346
step: "01",
@@ -348,24 +362,28 @@ export const projectPageContent = {
348362
{
349363
src: withBase("/figures/teleoperation.png"),
350364
alt: "Teleoperation setup diagram",
365+
kicker: "DATA COLLECTION",
351366
title: "Collecting Complex Humanoid Data",
352367
body: "Efficiently learning a long-horizon loco-manipulation task critically depends on the quality of in-domain data for fine-tuning. To address the limitations of prior systems, we propose a tailored teleoperation framework that explicitly separates upper-body pose tracking, dexterous manipulation, and locomotion commands, while enabling single-operator whole-body control. By using a small set of wearable trackers and separating locomotion from in-place whole-body actions, our framework enables single-operator humanoid teleoperation with improved locomotion stability across diverse task scenarios.",
353368
},
354369
{
355370
src: withBase("/figures/rtc.png"),
356371
alt: "Real-time chunking diagram",
372+
kicker: "DEPLOYMENT AND RTC",
357373
title: "Real-Time Chunking for Deployment",
358374
body: "Humanoid robots require smooth and reactive control, particularly when executing long-horizon, dexterous manipulation tasks. However, our model comprises over 2.5 billion parameters, with a single forward pass taking approximately 160 ms. To enable smooth policy rollout despite this latency, we adopt training-time real-time chunking. With RTC, each action prediction is conditioned on the previously committed action chunk and outputs a consistent chunk of future actions, while inference runs asynchronously with execution to avoid interruptions between chunks.",
359375
},
360376
{
361377
src: withBase("/figures/sim-data.png"),
362378
alt: "Simulation and data generation figure",
379+
kicker: "FAST EVALUATION IN SIMULATION",
363380
title: "Fast Evaluation in Simulation",
364381
body: "Although our primary goal is to deploy Ψ₀ in the real world, simulation is valuable for accelerating experimental iteration and enabling unified, standardized evaluation. We introduce a large-scale humanoid loco-manipulation benchmark in simulation with automated task generation across 50 indoor scenes, imported rigid objects, and randomized episode conditions, giving Ψ₀ a fast evaluation loop before the most expensive hardware experiments.",
365382
},
366383
{
367384
src: withBase("/figures/psi-tasks.png"),
368385
alt: "Eight real-world Ψ₀ benchmark tasks",
386+
kicker: "REAL-WORLD TASK SETUP",
369387
title: "Real-World Deployment",
370388
body: "We evaluate Ψ₀ on eight diverse long-horizon dexterous loco-manipulation tasks involving manipulation, whole-body motion, and locomotion. The tasks range from simple interactions, such as pick-and-place, pushing, and wiping, to more challenging dexterous manipulations requiring precise finger-object coordination, including turning a faucet and pulling out a chip tray.",
371389
},

src/features/project-page/project-page.css

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,20 @@
592592
gap: 10px;
593593
}
594594

595+
.method-figure-copy {
596+
display: grid;
597+
gap: 8px;
598+
}
599+
600+
.method-figure-copy h4 {
601+
margin: 0;
602+
font-family: "Iowan Old Style", "Palatino Linotype", "Book Antiqua", Palatino, "Nimbus Roman No9 L", "Times New Roman", serif;
603+
font-size: clamp(24px, 3vw, 34px);
604+
font-weight: 600;
605+
line-height: 1.1;
606+
letter-spacing: -0.02em;
607+
}
608+
595609
.method-summary h3,
596610
.method-panel h4 {
597611
margin: 0;

0 commit comments

Comments
 (0)