physical-superintelligence-lab.github.io/research.html at main · physical-superintelligence-lab/physical-superintelligence-lab.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link rel="stylesheet" type="text/css" href="css/fancy.css" />
    <link rel="icon" type="image/png" sizes="32x32" href="/assets/gvl.svg">
    <title>Physical Superintelligence Lab</title>
</head>

<body>
    <header id="header">
        <a href="index.html"><img style="height:35px;" src="assets/gvl.svg" alt="Logo" /></a>
        <nav class="nav-links">
            <a href="index.html">Home</a>
            <a href="lab.html">Lab</a>
            <a href="research.html" class="active">Research</a>
            <a href="contact.html">Contact</a>
        </nav>
    </header>

    <div class="section" id="publications"></div>

    <script>

        let publications = [
            {
                "name": "VectorMapNet: End-to-End Vectorized HD Map Learning",
                "authors": "Yicheng Liu, Tianyuan Yuan, Yue Wang, Yilun Wang, Hang Zhao",
                "conference": "ICML 2023",
                "webpage": "https://tsinghua-mars-lab.github.io/vectormapnet/",
                "paper": "https://arxiv.org/abs/2206.08920",
                "code": "https://github.com/Tsinghua-MARS-Lab/vectormapnet",
                "thumbnail": "assets/papers/vectormapnet.png"
            },
            {
                "name": "Neural Map Prior for Autonomous Driving",
                "authors": "Xuan Xiong, Yicheng Liu, Tianyuan Yuan, Yue Wang, Yilun Wang, Hang Zhao",
                "conference": "CVPR 2023",
                "webpage": "https://tsinghua-mars-lab.github.io/neural_map_prior/",
                "paper": "https://arxiv.org/abs/2303.00322",
                "code": "https://github.com/Tsinghua-MARS-Lab/neural_map_prior",
                "thumbnail": "assets/papers/npn_demo_website.gif"
            },
            {
                "name": "ViP3D: End-to-End Visual Trajectory Prediction via 3D Agent Queries",
                "authors": "Junru Gu*, Chenxu Hu*, Tianyuan Zhang, Xuanyao Chen, Yilun Wang, Yue Wang, Hang Zhao",
                "conference": "CVPR 2023",
                "webpage": "https://tsinghua-mars-lab.github.io/ViP3D/",
                "paper": "https://arxiv.org/abs/2208.01582",
                "code": "https://github.com/Tsinghua-MARS-Lab/ViP3D",
                "thumbnail": "assets/papers/vip3d.png"
            },
            {
                "name": "GeoMAE: Masked Geometric Target Prediction for Self-supervised Point Cloud Pre-Training",
                "authors": "Xiaoyu Tian, Haoxi Ran, Yue Wang, Hang Zhao",
                "conference": "CVPR 2023",
                "paper": "https://arxiv.org/abs/2305.08808",
                "code": "https://github.com/Tsinghua-MARS-Lab/GeoMAE",
                "thumbnail": "assets/papers/geomae.png"
            },
            {
                "name": "FreeNeRF: Improving Few-shot Neural Rendering with Free Frequency Regularization",
                "authors": "Jiawei Yang, Marco Pavone, Yue Wang",
                "conference": "CVPR 2023",
                "webpage": "https://jiawei-yang.github.io/FreeNeRF/",
                "paper": "https://arxiv.org/abs/2303.07418",
                "code": "https://github.com/Jiawei-Yang/FreeNeRF",
                "thumbnail": "assets/papers/freenerf.png",
            },
            {
                "name": "Cross-Dataset Sensor Alignment: Making Visual 3D Object Detector Generalizable",
                "authors": "Liangtao Zheng, Yicheng Liu, Yue Wang, Hang Zhao",
                "conference": "CoRL 2023",
                "paper": "https://proceedings.mlr.press/v229/zheng23a.html",
                "thumbnail": "assets/papers/cross_dataset_sensor_alignment.png"
            },
            {
                "name": "Occ3D: A Large-Scale 3D Occupancy Prediction Benchmark for Autonomous Driving",
                "authors": "Xiaoyu Tian*, Tao Jiang*, Longfei Yun, Yucheng Mao, Huitong Yang, Yue Wang, Yilun Wang, Hang Zhao",
                "conference": "NeurIPS 2023",
                "webpage": "https://tsinghua-mars-lab.github.io/Occ3D/",
                "paper": "https://arxiv.org/abs/2304.14365",
                "code": "https://github.com/Tsinghua-MARS-Lab/Occ3D",
                "thumbnail": "assets/papers/occ3d.png"
            },
            {
                "name": "GPT-Driver: Learning to Drive with GPT",
                "authors": "Jiageng Mao, Yuxi Qian, Junjie Ye, Hang Zhao, Yue Wang",
                "conference": "NeurIPS 2023 Foundation Models for Decision Making Workshop",
                "webpage": "https://pointscoder.github.io/projects/gpt_driver/index.html",
                "paper": "https://arxiv.org/abs/2310.01415",
                "code": "https://github.com/PointsCoder/GPT-Driver",
                "thumbnail": "assets/papers/gpt_driver.png"
            },
            {
                "name": "Rethinking Directional Integration in Neural Radiance Fields",
                "authors": "Congyue Deng, Jiawei Yang, Leonidas Guibas, Yue Wang",
                "conference": "arxiv 2023",
                "webpage": "https://cs.stanford.edu/~congyue/linerf/",
                "paper": "https://arxiv.org/abs/2311.16504",
                "thumbnail": "assets/papers/linerf.gif"
            },
            {
                "name": "Augmenting Lane Perception and Topology Understanding with Standard Definition Navigation Map",
                "authors": "Katie Z Luo, Xinshuo Weng, Yan Wang, Shuang Wu, Jie Li, Kilian Q Weinberger, Yue Wang, Marco Pavone",
                "conference": "ICRA 2024",
                "webpage": "https://katieluo88.github.io/SMERF/",
                "paper": "https://arxiv.org/abs/2311.04079v1",
                "code": "https://github.com/NVlabs/SMERF",
                "thumbnail": "assets/papers/smerf.gif"
            },
            {
                "name": "EmerNeRF: Emergent Spatial-Temporal Scene Decomposition via Self-Supervision",
                "authors": "Jiawei Yang, Boris Ivanovic, Or Litany, Xinshuo Weng, Seung Wook Kim, Boyi Li, Tong Che, Danfei Xu, Sanja Fidler, Marco Pavone, Yue Wang",
                "conference": "ICLR 2024",
                "webpage": "https://emernerf.github.io/",
                "paper": "https://arxiv.org/abs/2311.02077",
                "code": "https://github.com/NVlabs/EmerNeRF",
                "thumbnail": "assets/papers/emernerf.png"
            },
            {
                "name": "PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving",
                "authors": "Xinshuo Weng, Boris Ivanovic, Yan Wang, Yue Wang, Marco Pavone",
                "conference": "CVPR 2024",
                "webpage": "https://xinshuoweng.github.io/paradrive/",
                "paper": "https://openaccess.thecvf.com/content/CVPR2024/html/Weng_PARA-Drive_Parallelized_Architecture_for_Real-time_Autonomous_Driving_CVPR_2024_paper.html",
                "thumbnail": "assets/papers/para_drive.png"
            },
            {
                "name": "Driving Everywhere with Large Language Model Policy Adaptation",
                "authors": "Boyi Li, Yue Wang, Jiageng Mao, Boris Ivanovic, Sushant Veer, Karen Leung, Marco Pavone",
                "conference": "CVPR 2024",
                "webpage": "https://boyiliee.github.io/llada/",
                "paper": "https://arxiv.org/pdf/2402.05932",
                "code": "https://github.com/Boyiliee/LLaDA-AV",
                "thumbnail": "assets/papers/llada.png"
            },
            {
                "name": "Towards Realistic Scene Generation with LiDAR Diffusion Models",
                "authors": "Haoxi Ran, Vitor Guizilini, Yue Wang",
                "conference": "CVPR 2024",
                "webpage": "https://lidar-diffusion.github.io/",
                "paper": "https://arxiv.org/abs/2404.00815",
                "code": "https://github.com/hancyran/LiDAR-Diffusion",
                "thumbnail": "assets/papers/lidar_diffusion.jpg"
            },
            {
                "name": "Parallelized Spatiotemporal Binding",
                "authors": "Gautam Singh, Yue Wang, Jiawei Yang, Boris Ivanovic, Sungjin Ahn, Marco Pavone, Tong Che",
                "conference": "ICML 2024",
                "webpage": "https://parallel-st-binder.github.io/",
                "paper": "https://arxiv.org/abs/2402.17077",
                "thumbnail": "assets/papers/psb.png"
            },
            {
                "name": "PreSight: Enhancing Autonomous Vehicle Perception with City-Scale NeRF Priors",
                "authors": "Tianyuan Yuan, Yucheng Mao, Jiawei Yang, Yicheng Liu, Yue Wang, Hang Zhao",
                "conference": "ECCV 2024",
                "webpage": "https://github.com/yuantianyuan01/PreSight",
                "paper": "https://arxiv.org/abs/2403.09079",
                "code": "https://github.com/yuantianyuan01/PreSight",
                "thumbnail": "assets/papers/presight.jpg"
            },
            {
                "name": "Denoising Vision Transformers",
                "authors": "Jiawei Yang*, Katie Z Luo*, Jiefeng Li, Congyue Deng, Leonidas Guibas, Dilip Krishnan, Kilian Q Weinberger, Yonglong Tian, Yue Wang",
                "conference": "ECCV 2024 Oral",
                "webpage": "https://jiawei-yang.github.io/DenoisingViT/",
                "paper": "https://arxiv.org/abs/2401.02957",
                "code": "https://github.com/Jiawei-Yang/Denoising-ViT",
                "thumbnail": "assets/papers/dvt.jpeg"
            },
            {
                "name": "A Language Agent for Autonomous Driving",
                "authors": "Jiageng Mao*, Junjie Ye*, Yuxi Qian, Marco Pavone, Yue Wang",
                "conference": "COLM 2024",
                "webpage": "https://physical-superintelligence-lab.github.io/Agent-Driver/",
                "paper": "https://arxiv.org/pdf/2311.10813",
                "code": "https://github.com/physical-superintelligence-lab/Agent-Driver",
                "thumbnail": "assets/papers/agent-driver.png"
            },
            {
                "name": "Tokenize the World into Object-level Knowledge to Address Long-tail Events in Autonomous Driving",
                "authors": "Ran Tian, Boyi Li, Xinshuo Weng, Yuxiao Chen, Edward Schmerling, Yue Wang, Boris Ivanovic, Marco Pavone",
                "conference": "CoRL 2024",
                "paper": "https://arxiv.org/abs/2407.00959",
                "thumbnail": "assets/papers/token.png"
            },
            {
                "name": "Q-SLAM: Quadric Representations for Monocular SLAM",
                "authors": "Chensheng Peng, Chenfeng Xu, Yue Wang, Mingyu Ding, Heng Yang, Masayoshi Tomizuka, Kurt Keutzer, Marco Pavone, Wei Zhan",
                "conference": "CoRL 2024",
                "paper": "https://arxiv.org/abs/2403.08125",
                "thumbnail": "assets/papers/q-slam.png"
            },
            {
                "name": "RAM: Retrieval-Based Affordance Transfer for Generalizable Zero-Shot Robotic Manipulation",
                "authors": "Yuxuan Kuang*, Junjie Ye*, Haoran Geng*, Jiageng Mao, Congyue Deng, Leonidas Guibas, He Wang, Yue Wang",
                "conference": "CoRL 2024 Oral",
                "webpage": "https://yuxuank.com/RAM/",
                "paper": "https://arxiv.org/abs/2407.04689",
                "code": "https://github.com/yxKryptonite/RAM_code",
                "thumbnail": "assets/papers/ram.png"
            },
            {
                "name": "DistillNeRF: Perceiving 3D Scenes from Single-Glance Images by Distilling Neural Fields and Foundation Model Features",
                "authors": "Letian Wang, Seung Wook Kim, Jiawei Yang, Cunjun Yu, Boris Ivanovic, Steven L. Waslander, Yue Wang, Sanja Fidler, Marco Pavone, Peter Karkus",
                "conference": "NeurIPS 2024",
                "webpage": "https://distillnerf.github.io/",
                "paper": "https://arxiv.org/abs/2406.12095",
                "thumbnail": "assets/papers/distillnerf.png"
            },
            {
                "name": "Memorize What Matters: Emergent Scene Decomposition from Multitraverse",
                "authors": "Yiming Li, Zehong Wang, Yue Wang, Zhiding Yu, Zan Gojcic, Marco Pavone, Chen Feng, Jose M. Alvarez",
                "conference": "NeurIPS 2024 Spotlight",
                "webpage": "https://nvlabs.github.io/3DGM/",
                "paper": "https://arxiv.org/abs/2405.17187",
                "code": "https://github.com/NVlabs/3DGM",
                "thumbnail": "assets/papers/3dgm.png"
            },
            {
                "name": "Large Spatial Model: End-to-end Unposed Images to Semantic 3D",
                "authors": "Zhiwen Fan*, Jian Zhang*, Wenyan Cong, Peihao Wang, Renjie Li, Kairun Wen, Shijie Zhou, Achuta Kadambi, Zhangyang Wang, Danfei Xu, Boris Ivanovic, Marco Pavone, Yue Wang",
                "conference": "NeurIPS 2024",
                "webpage": "https://largespatialmodel.github.io/",
                "paper": "https://arxiv.org/abs/2410.18956",
                "thumbnail": "assets/papers/lsm.svg"
            },
            {
                "name": "Multiview Equivariance Improves 3D Correspondence Understanding with Minimal Feature Finetuning",
                "authors": "Yang You, Yixin Li, Congyue Deng, Yue Wang, Leonidas Guibas",
                "conference": "ICLR 2025",
                "webpage": "",
                "paper": "https://arxiv.org/abs/2411.19458",
                "code": "https://github.com/qq456cvb/3DCorrEnhance",
                "thumbnail": "assets/papers/3dcorrenhance.png"
            },
            {
                "name": "Scene Flow as a Partial Differential Equation",
                "authors": "Kyle Vedder, Neehar Peri, Ishan Khatri, Siyi Li, Eric Eaton, Mehmet Kocamaz, Yue Wang, Zhiding Yu, Deva Ramanan, Joachim Pehserl",
                "conference": "ICLR 2025",
                "webpage": "https://vedder.io/eulerflow",
                "paper": "https://arxiv.org/abs/2410.02031",
                "code": "https://github.com/kylevedder/SceneFlowZoo",
                "thumbnail": "assets/papers/eulerflow.gif"
            },
            {
                "name": "Fantastic Copyrighted Beasts and How (Not) to Generate Them",
                "authors": "Luxi He, Yangsibo Huang, Weijia Shi, Tinghao Xie, Haotian Liu, Yue Wang, Luke Zettlemoyer, Chiyuan Zhang, Danqi Chen, Peter Henderson",
                "conference": "ICLR 2025",
                "webpage": "https://copycat-eval.github.io/",
                "paper": "https://arxiv.org/abs/2406.14526",
                "code": "https://github.com/princeton-nlp/CopyCat",
                "thumbnail": "assets/papers/copyrighted.png"
            },
            {
                "name": "Language-Image Models with 3D Understanding",
                "authors": "Jang Hyun Cho, Boris Ivanovic, Yulong Cao, Edward Schmerling, Yue Wang, Xinshuo Weng, Boyi Li, Yurong You, Philipp Kraehenbuehl, Yan Wang, Marco Pavone",
                "conference": "ICLR 2025",
                "webpage": "https://janghyuncho.github.io/Cube-LLM/",
                "paper": "https://arxiv.org/abs/2405.03685",
                "code": "https://github.com/NVlabs/Cube-LLM",
                "thumbnail": "assets/papers/cubellm.png"
            },
            {
                "name": "LoRA3D: Low-Rank Self-Calibration of 3D Geometric Foundation Models",
                "authors": "Ziqi Lu, Heng Yang, Danfei Xu, Boyi Li, Boris Ivanovic, Marco Pavone, Yue Wang",
                "conference": "ICLR 2025 Spotlight",
                "webpage": "",
                "paper": "https://arxiv.org/abs/2412.07746",
                "code": "",
                "thumbnail": "assets/papers/lora3d.png"
            },
            {
                "name": "OmniRe: Omni Urban Scene Reconstruction",
                "authors": "Ziyu Chen, Jiawei Yang, Jiahui Huang, Riccardo Lutio, Janick Martinez Esturo, Boris Ivanovic, Or Litany, Zan Gojcic, Sanja Fidler, Marco Pavone, Li Song, Yue Wang",
                "conference": "ICLR 2025 Spotlight",
                "webpage": "https://ziyc.github.io/omnire/",
                "paper": "https://arxiv.org/abs/2408.16760",
                "code": "https://github.com/ziyc/drivestudio",
                "thumbnail": "assets/papers/omnire.png"
            },
            {
                "name": "PhysBench: Benchmarking and Enhancing Vision-Language Models for Physical World Understanding",
                "authors": "Wei Chow*, Jiageng Mao*, Boyi Li, Daniel Seita, Vitor Campagnolo Guizilini, Yue Wang",
                "conference": "ICLR 2025 Oral",
                "webpage": "https://physbench.github.io/",
                "paper": "https://openreview.net/pdf?id=Q6a9W6kzv5",
                "thumbnail": "assets/papers/physbench.mp4"
            },
            {
                "name": "STORM: Spatio-Temporal Reconstruction Model for Large-Scale Outdoor Scenes",
                "authors": "Jiawei Yang, Jiahui Huang, Yuxiao Chen, Yan Wang, Boyi Li, Yurong You, Apoorva Sharma, Maximilian Igl, Peter Karkus, Danfei Xu, Boris Ivanovic, Yue Wang*, Marco Pavone*",
                "conference": "ICLR 2025",
                "webpage": "https://jiawei-yang.github.io/STORM/",
                "paper": "https://arxiv.org/abs/2501.00602",
                "code": "https://github.com/NVlabs/GaussianSTORM",
                "thumbnail": "assets/papers/storm.png"
            },
            {
                "name": "SMART: Advancing Scalable Map Priors for Driving Topology Reasoning",
                "authors": "Junjie Ye, David Paz, Hengyuan Zhang, Yuliang Guo, Xinyu Huang, Henrik I. Christensen, Yue Wang, Liu Ren",
                "conference": "ICRA 2025",
                "webpage": "https://jay-ye.github.io/smart/",
                "paper": "https://arxiv.org/abs/2502.04329",
                "code": "",
                "thumbnail": "assets/papers/smart.gif"
            },
            {
                "name": "DreamDrive: Generative 4D Scene Modeling from Street View Images",
                "authors": "Jiageng Mao, Boyi Li, Boris Ivanovic, Yuxiao Chen, Yan Wang, Yurong You, Chaowei Xiao, Danfei Xu, Marco Pavone, Yue Wang",
                "conference": "ICRA 2025",
                "webpage": "https://pointscoder.github.io/DreamDrive/",
                "paper": "https://arxiv.org/abs/2501.00601",
                "code": "",
                "thumbnail": "assets/papers/dreamdrive.gif"
            },
            {
                "name": "RoboVerse: Towards a Unified Platform, Dataset and Benchmark for Scalable and Generalizable Robot Learning",
                "authors": "Haoran Geng*, Feishi Wang*, Songlin Wei*, Yuyang Li*, Bangjun Wang*, Boshi An*, Charlie Tianyue Cheng*, Haozhe Lou, Peihao Li, Yen-Jen Wang, Yutong Liang, Dylan Goetting, Chaoyi Xu, Haozhe Chen, Yuxi Qian, Yiran Geng, Jiageng Mao, Weikang Wan, Mingtong Zhang, Jiangran Lyu, Siheng Zhao, Jiazhao Zhang, Jialiang Zhang, Chengyang Zhao, Haoran Lu, Yufei Ding, Ran Gong, Yuran Wang, Yuxuan Kuang, Ruihai Wu, Baoxiong Jia, Carlo Sferrazza, Hao Dong, Siyuan Huang, Yue Wang†, Jitendra Malik†, Pieter Abbeel†",
                "conference": "RSS 2025",
                "webpage": "https://roboverseorg.github.io/",
                "paper": "https://arxiv.org/abs/2504.18904",
                "code": "https://github.com/RoboVerseOrg/RoboVerse",
                "thumbnail": "assets/papers/roboverse.jpg"
            },
            {
                "name": "Deformable Beta Splatting",
                "authors": "Rong Liu*, Dylan Sun*, Meida Chen, Yue Wang†, Andrew Feng†",
                "conference": "SIGGRAPH 2025",
                "webpage": "https://rongliu-leo.github.io/beta-splatting/",
                "paper": "https://arxiv.org/abs/2501.18630",
                "code": "https://github.com/RongLiu-Leo/beta-splatting",
                "thumbnail": "assets/papers/dbs.png"
            },
            {
                "name": "Extrapolated Urban View Synthesis Benchmark",
                "authors": "Xiangyu Han, Zhen Jia, Boyi Li, Yan Wang, Boris Ivanovic, Yurong You, Lingjie Liu, Yue Wang, Marco Pavone, Chen Feng, Yiming Li",
                "conference": "ICCV 2025",
                "webpage": "https://ai4ce.github.io/EUVS-Benchmark/",
                "paper": "https://arxiv.org/pdf/2412.05256",
                "code": "https://github.com/ai4ce/EUVS-Benchmark",
                "thumbnail": "assets/papers/euvs.png"
            },
            {
                "name": "InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models",
                "authors": "Yifan Lu, Xuanchi Ren, Jiawei Yang, Tianchang Shen, Zhangjie Wu, Jun Gao, Yue Wang, Siheng Chen, Mike Chen, Sanja Fidler, Jiahui Huang",
                "conference": "ICCV 2025",
                "webpage": "https://research.nvidia.com/labs/toronto-ai/infinicube/",
                "paper": "https://arxiv.org/abs/2412.03934",
                "code": "https://github.com/nv-tlabs/InfiniCube",
                "thumbnail": "assets/papers/infinicube.png"
            },
            {
                "name": "Learning an Implicit Physics Model for Image-based Fluid Simulation",
                "authors": "Emily Yue-ting Jia, Jiageng Mao, Zhiyuan Gao, Yajie Zhao, Yue Wang",
                "conference": "ICCV 2025",
                "webpage": "https://physfluid.github.io/",
                "paper": "https://arxiv.org/abs/2508.08254",
                "code": "",
                "thumbnail": "assets/papers/physfluid.png"
            },
            {
                "name": "Learning from Massive Human Videos for Universal Humanoid Pose Control",
                "authors": "Jiageng Mao, Siheng Zhao, Siqi Song, Tianheng Shi, Junjie Ye, Mingtong Zhang, Haoran Geng, Jitendra Malik, Vitor Campagnolo Guizilini, Yue Wang",
                "conference": "Humanoid 2025",
                "webpage": "https://physical-superintelligence-lab.github.io/UH-1/",
                "paper": "https://arxiv.org/abs/2412.14172",
                "code": "https://github.com/sihengz02/UH-1",
                "thumbnail": "assets/papers/uh-1.mp4"
            },
            {
                "name": "SIRE: SE(3) Intrinsic Rigidity Embeddings",
                "authors": "Cameron Smith, Basile Van Hoorick, Chonghyuk Song, Vincent Sitzmann, Vitor Campagnolo Guizilini, Yue Wang",
                "conference": "TMLR 2025",
                "webpage": "https://cameronosmith.github.io/sire/",
                "paper": "https://arxiv.org/abs/2503.07739",
                "code": "https://github.com/cameronosmith/SIRE",
                "thumbnail": "assets/papers/sire.png"
            },
            {
                "name": "ManipBench: Benchmarking Vision-Language Models for Low-Level Robot Manipulation",
                "authors": "Enyu Zhao*, Vedant Raval*, Hejia Zhang*, Jiageng Mao, Zeyu Shangguan, Stefanos Nikolaidis, Yue Wang, Daniel Seita",
                "conference": "CoRL 2025",
                "webpage": "https://manipbench.github.io/",
                "paper": "https://arxiv.org/abs/2505.09698",
                "code": "https://github.com/slurm-lab-usc/ManipBench-Real-Robot-question",
                "thumbnail": "assets/papers/manipbench.png"
            },
            {
                "name": "Robot Learning from Any Images",
                "authors": "Siheng Zhao*, Jiageng Mao*, Wei Chow, Zeyu Shangguan, Tianheng Shi, Rong Xue, Yuxi Zheng, Yijia Weng, Yang You, Daniel Seita, Leonidas Guibas, Sergey Zakharov, Vitor Campagnolo Guizilini, Yue Wang",
                "conference": "CoRL 2025",
                "webpage": "https://sihengz02.github.io/RoLA/",
                "paper": "https://arxiv.org/abs/2509.22970",
                "code": "https://github.com/PointsCoder/OpenReal2Sim",
                "thumbnail": "assets/papers/rola.mp4"
            },
            {
                "name": "Martian World Model: Controllable Video Synthesis with Physically Accurate 3D Reconstructions",
                "authors": "Longfei Li, Zhiwen Fan, Wenyan Cong, Xinhang Liu, Yuyang Yin, Matt Foutter, Panwang Pan, Chenyu You, Yue Wang, Zhangyang Wang, Yao Zhao, Marco Pavone, Yunchao Wei",
                "conference": "NeurIPS 2025",
                "webpage": "https://marsgenai.github.io/",
                "paper": "https://arxiv.org/abs/2507.07978",
                "code": "https://github.com/loongfeili/Martian-World-Model",
                "thumbnail": "assets/papers/martian.png"
            },
            {
                "name": "Seeing the Wind from a Falling Leaf",
                "authors": "Zhiyuan Gao*, Jiageng Mao*, Hong-Xing (Koven) Yu, Haozhe Lou, Emily Yue-Ting Jia, Jernej Barbic, Jiajun Wu, Yue Wang",
                "conference": "NeurIPS 2025",
                "webpage": "https://chaoren2357.github.io/seeingthewind/",
                "paper": "https://arxiv.org/abs/2512.00762",
                "code": "",
                "thumbnail": "assets/papers/windseeker.mp4"
            },
            {
                "name": "ResMimic: From General Motion Tracking to Humanoid Whole-Body Loco-Manipulation via Residual Learning",
                "authors": "Siheng Zhao, Yanjie Ze, Yue Wang, C. Karen Liu, Pieter Abbeel, Guanya Shi, Rocky Duan",
                "conference": "arXiv 2025",
                "webpage": "https://resmimic.github.io/",
                "paper": "https://arxiv.org/abs/2510.05070",
                "code": "https://github.com/amazon-far/ResMimic",
                "thumbnail": "assets/papers/resmimic.mp4"
            },
            {
                "name": "Flex: Towards Efficient and Effective Multi-Camera Encoding for End-to-End Driving",
                "authors": "Jiawei Yang, Ziyu Chen, Yurong You, Yan Wang, Yiming Li, Yuxiao Chen, Boyi Li, Boris Ivanovic, Marco Pavone, Yue Wang",
                "conference": "arXiv 2025",
                "webpage": "https://jiawei-yang.github.io/Flex/",
                "paper": "https://arxiv.org/abs/2512.10947",
                "code": "",
                "thumbnail": "assets/papers/flex.png"
            },
            {
                "name": "Fiducial Exoskeletons: Image-Centric Robot State Estimation",
                "authors": "Cameron Smith, Basile Van Hoorick, Vitor Guizilini, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://cameronosmith.github.io/fiducial_exoskeleton/",
                "paper": "https://arxiv.org/abs/2601.08034",
                "code": "https://github.com/cameronosmith/FiducialExoskeletons",
                "thumbnail": "assets/papers/fidex.mp4"
            },
            {
                "name": "D-REX: Differentiable Real-to-Sim-to-Real Engine for Learning Dexterous Grasping",
                "authors": "Haozhe Lou, Haozhe_Lou, Mingtong Zhang, Haoran Geng, Hanyang Zhou, Sicheng He, Zhiyuan Gao, Siheng Zhao, Jiageng Mao, Pieter Abbeel, Jitendra Malik, Daniel Seita, Yue Wang",
                "conference": "ICLR 2026",
                "webpage": "https://robot-drex-engine.github.io/",
                "paper": "https://openreview.net/pdf?id=13jshGCK9i",
                "code": "",
                "thumbnail": "assets/papers/d_rex.png"
            },
            {
                "name": "Latent Denoising Makes Good Visual Tokenizers",
                "authors": "Jiawei Yang, Tianhong Li, Lijie Fan, Yonglong Tian, Yue Wang",
                "conference": "ICLR 2026",
                "webpage": "https://jiawei-yang.github.io/DeTok/",
                "paper": "https://arxiv.org/abs/2507.15856",
                "code": "https://github.com/Jiawei-Yang/DeTok",
                "thumbnail": "assets/papers/detok.gif"
            },
            {
                "name": "Universal Beta Splatting",
                "authors": "Rong Liu, Zhongpai Gao, Benjamin Planche, Meida Chen, Van Nguyen Nguyen, Meng Zheng, Anwesa Choudhuri, Terrence Chen, Yue Wang, Andrew Feng, Ziyan Wu",
                "conference": "ICLR 2026",
                "webpage": "https://rongliu-leo.github.io/universal-beta-splatting/",
                "paper": "https://arxiv.org/abs/2510.03312",
                "code": "https://github.com/RongLiu-Leo/universal-beta-splatting",
                "thumbnail": "assets/papers/universal_beta_splatting.png"
            },
            {
                "name": "ROVER: Benchmarking Reciprocal Cross-Modal Reasoning for Omnimodal Generation",
                "authors": "Yongyuan Liang, Wei Chow, Feng Li, Ziqiao Ma, Xiyao Wang, Jiageng Mao, Jiuhai Chen, Jiatao Gu, Yue Wang, Furong Huang",
                "conference": "ICLR 2026",
                "webpage": "https://roverbench.github.io/",
                "paper": "https://arxiv.org/abs/2511.01163",
                "code": "https://github.com/cheryyunl/ROVER",
                "thumbnail": "assets/papers/rover.png"
            },
            {
                "name": "SeFA-Policy: Fast and Accurate Visuomotor Policy Learning with Selective Flow Alignment",
                "authors": "Rong Xue*, Jiageng Mao*, Mingtong Zhang, Yue Wang",
                "conference": "ICRA 2026",
                "webpage": "https://rongxuezoe.github.io/SeFAPolicy-homepage/",
                "paper": "https://arxiv.org/abs/2511.08583",
                "code": "https://github.com/RongXueZoe/SeFA",
                "thumbnail": "assets/papers/sefa_policy.png"
            },
            {
                "name": "Humanoid Everyday: A Comprehensive Robotic Dataset for Open-World Humanoid Manipulation",
                "authors": "Zhenyu Zhao*, Hongyi Jing*, Xiawei Liu, Jiageng Mao, Abha Jha, Hanwen Yang, Rong Xue, Sergey Zakharov, Vitor Guizilini, Yue Wang",
                "conference": "ICRA 2026",
                "webpage": "https://humanoideveryday.github.io/",
                "paper": "https://arxiv.org/abs/2510.08807",
                "code": "https://github.com/physical-superintelligence-lab/Humanoid-Teleop",
                "thumbnail": "assets/papers/humanoid_everyday.mp4"
            },
            {
                "name": "AnchorDream: Repurposing Video Diffusion for Embodiment-Aware Robot Data Synthesis",
                "authors": "Junjie Ye, Rong Xue, Basile Van Hoorick, Pavel Tokmakov, Muhammad Zubair Irshad, Yue Wang, Vitor Guizilini",
                "conference": "ICRA 2026",
                "webpage": "https://jay-ye.github.io/AnchorDream/",
                "paper": "https://arxiv.org/abs/2512.11797",
                "code": "",
                "thumbnail": "assets/papers/anchordream.mp4"
            },
            {
                "name": "Robot Learning from a Physical World Model",
                "authors": "Jiageng Mao, Sicheng He, Hao-Ning Wu, Yang You, Shuyang Sun, Zhicheng Wang, Yanan Bao, Huizhong Chen, Leonidas Guibas, Vitor Guizilini, Howard Zhou, Yue Wang",
                "conference": "ICRA 2026",
                "webpage": "https://pointscoder.github.io/PhysWorld_Web/",
                "paper": "https://arxiv.org/pdf/2511.07416",
                "code": "https://github.com/PointsCoder/OpenReal2Sim",
                "thumbnail": "assets/papers/physworld.png"
            },
            {
                "name": "VideoGPA: Distilling Geometry Priors for 3D-Consistent Video Generation",
                "authors": "Hongyang Du*, Junjie Ye*, Xiaoyan Cong*, Runhao Li, Jingcheng Ni, Aman Agarwal, Zeqi Zhou, Zekun Li, Randall Balestriero, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://hongyang-du.github.io/VideoGPA-Website/",
                "paper": "https://arxiv.org/pdf/2601.23286",
                "code": "https://github.com/Hongyang-Du/VideoGPA",
                "thumbnail": "assets/papers/videogpa.png"
            },
            {
                "name": "ICLR: In-Context Imitation Learning with Visual Reasoning",
                "authors": "Toan Nguyen, Weiduo Yuan, Songlin Wei, Hui Li, Daniel Seita, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://toannguyen1904.github.io/ICLR",
                "paper": "https://arxiv.org/abs/2603.07530",
                "code": "",
                "thumbnail": "assets/papers/iclr_icl.png"
            },
            {
                "name": "Fast SAM 3D Body: Accelerating SAM 3D Body for Real-Time Full-Body Human Mesh Recovery",
                "authors": "Timing Yang, Sicheng He, Hongyi Jing, Jiawei Yang, Zhijian Liu, Chuhang Zou, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://yangtiming.github.io/Fast-SAM-3D-Body-Page/",
                "paper": "https://arxiv.org/abs/2603.15603",
                "code": "https://github.com/yangtiming/Fast-SAM-3D-Body",
                "thumbnail": "assets/papers/fast_sam3d.png"
            },
            {
                "name": "HumDex: Humanoid Dexterous Manipulation Made Easy",
                "authors": "Liang Heng, Yihe Tang, Jiajun Xu, Henghui Bao, Di Huang, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://psi-lab.ai/humdex/",
                "paper": "https://arxiv.org/abs/2603.12260",
                "code": "https://github.com/physical-superintelligence-lab/HumDex",
                "thumbnail": "assets/papers/humdex.png"
            },
            {
                "name": "Ψ₀: An Open Foundation Model Towards Universal Humanoid Loco-Manipulation",
                "authors": "Songlin Wei, Hongyi Jing, Boqian Li, Zhenyu Zhao, Jiageng Mao, Zhenhao Ni, Sicheng He, Jie Liu, Xiawei Liu, Kaidi Kang, Sheng Zang, Weiduo Yuan, Marco Pavone, Di Huang, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "http://psi-lab.ai/Psi0",
                "paper": "https://arxiv.org/abs/2603.12263",
                "code": "https://github.com/physical-superintelligence-lab/Psi0",
                "thumbnail": "assets/papers/psi0.png"
            },
            {
                "name": "FuzzingRL: Reinforcement Fuzz-Testing for Revealing VLM Failures",
                "authors": "Jiajun Xu, Jiageng Mao, Ang Qi, Weiduo Yuan, Alexander Romanus, Helen Xia, Vitor Campagnolo Guizilini, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "",
                "paper": "https://arxiv.org/abs/2603.06600",
                "code": "",
                "thumbnail": "assets/papers/fuzzingrl.png"
            },
            {
                "name": "DreamPlan: Efficient Reinforcement Fine-Tuning of Vision-Language Planners via Video World Models",
                "authors": "Emily Yue-Ting Jia, Weiduo Yuan, Tianheng Shi, Vitor Guizilini, Jiageng Mao, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://psi-lab.ai/DreamPlan/",
                "paper": "https://arxiv.org/abs/2603.16860",
                "code": "",
                "thumbnail": "assets/papers/dreamplan.jpg"
            },
            {
                "name": "Concurrent Prehensile and Nonprehensile Manipulation: A Practical Approach to Multi-Stage Dexterous Tasks",
                "authors": "Hao Jiang, Yue Wu, Yue Wang, Gaurav S. Sukhatme, Daniel Seita",
                "conference": "arXiv 2026",
                "webpage": "",
                "paper": "https://arxiv.org/abs/2603.11655",
                "code": "",
                "thumbnail": "assets/papers/concurrent_manip.png"
            },
            {
                "name": "RealWonder: Real-Time Physical Action-Conditioned Video Generation",
                "authors": "Wei Liu, Ziyu Chen, Zizhang Li, Yue Wang, Hong-Xing Yu, Jiajun Wu",
                "conference": "arXiv 2026",
                "webpage": "https://liuwei283.github.io/RealWonder/",
                "paper": "https://arxiv.org/abs/2603.05449",
                "code": "https://github.com/liuwei283/RealWonder",
                "thumbnail": "assets/papers/realwonder.png"
            },
            {
                "name": "Large Reward Models: Generalizable Online Robot Reward Generation with Vision-Language Models",
                "authors": "Yanru Wu, Weiduo Yuan, Ang Qi, Vitor Guizilini, Jiageng Mao, Yue Wang",
                "conference": "arXiv 2026",
                "webpage": "https://yanru-wu.github.io/Large-Reward-Models/",
                "paper": "https://arxiv.org/abs/2603.16065",
                "code": "",
                "thumbnail": "assets/papers/lrm.jpg"
            },
        ];

        let section = document.getElementById("publications");

        publications.forEach(publication => {
            let name = publication["name"];
            let authors = publication["authors"];
            let conference = publication["conference"];
            let note = publication["note"] ? publication["note"] : "";

            // Highlight main venue+year (e.g., "ICRA 2026") with the shared blue style.
            const venueYearMatch = conference.match(/\b[A-Za-z0-9-]+\s+\d{4}\b/);
            if (venueYearMatch) {
                conference = conference.replace(venueYearMatch[0], `<span class="highlight-blue">${venueYearMatch[0]}</span>`);
            }

            // Keep "Spotlight" / "Oral" emphasized as well.
            if (conference.includes("Spotlight")) conference = conference.replace("Spotlight", `<span class="highlight-red">Spotlight</span>`);
            if (conference.includes("Oral")) conference = conference.replace("Oral", `<span class="highlight-red">Oral</span>`);

            let webpage = publication["webpage"] ? `<a href="${publication["webpage"]}">Webpage</a>` : "";
            let code = publication["code"] ? `<a href="${publication["code"]}">Code</a>` : "";
            let paper = publication["paper"] ? `<a href="${publication["paper"]}">Paper</a>` : "";

            let thumbnail_node = publication["thumbnail"].includes("mp4") ?
                `<video autoplay loop muted playsinline><source src="${publication["thumbnail"]}" type="video/mp4"></video>` :
                `<img src="${publication["thumbnail"]}" alt="${name}" />`;

            // --- NEW FANCY HTML STRUCTURE ---
            // Wraps item in 'fancy-card pub-item'
            section.insertAdjacentHTML('afterbegin', `
                <div class="fancy-card pub-item">
                    <a href="${publication["webpage"] || '#'}" class="pub-thumb">
                        ${thumbnail_node}
                    </a>
                    <div class="pub-info">
                        <a href="${publication["webpage"] || '#'}" style="text-decoration:none;">
                            <b>${name}</b>
                        </a>
                        <p style="color:#64748B;">${authors}</p>
                        <p>${conference}</p>
                        <p style="color:var(--primary); font-weight:500;">${note}</p>
                        <div class="pub-links">
                            ${[webpage, paper, code].filter(link => link).join("")}
                        </div>
                    </div>
                </div>
            `);
        });
        section.insertAdjacentHTML('afterbegin', `<h1>Research</h1>`);
        </script>
</body>
</html>