@@ -9,15 +9,75 @@ nav_order: 6
99---
1010
1111<div align =" center " >
12- <h3 > Xmart Student Forum </h3 >
12+ <h3 > Xmart Student Forum </h3 >
1313
14- <h4 > Episode 2 </h4 >
14+ <h4 > Session 14 Yuancheng Wang: Towards Natural and Efficient Speech Synthesis — Perspectives on Modeling, Alignment, and Representation</h4 >
15+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1FuKzzGEq3&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
1516
16- <iframe src =" //player.bilibili.com/player.html?bvid=BV1qihreEE6L&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " > </iframe >
17+ <h4 > Session 13 Dongchao Yang: Towards Multi-task Audio Foundation Models — An Audio Generation Perspective</h4 >
18+ <!-- Pending upload -->
1719
18- <h4 > Episode 1 </h4 >
20+ <h4 > Session 12 Junzuo Zhou & Yong Ren: Traceable Protection of Speech — Research on Audio Watermarking</h4 >
21+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1EuL7zNEHH&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
1922
20- <iframe src =" //player.bilibili.com/player.html?bvid=BV1FJ4m137ZB&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " > </iframe >
23+ <h4 > Session 11 Shengpeng Ji: Opportunities and Challenges in the Era of End-to-End Spoken Dialogue</h4 >
24+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1FaZGYXEdc&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
25+
26+ <h4 > Session 10 Ruibin Yuan: Scaling Open Foundation Models for Music</h4 >
27+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1rU9EYhEx3&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
28+
29+ <h4 > Session 9 Shaolei Zhang: Toward Real-time Cross-Language Communication — Challenges, Techniques, and Future of Real-time Speech Models</h4 >
30+ <iframe src =" //player.bilibili.com/player.html?bvid=BV15nwLeaEU1&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
31+
32+ <h4 > Session 8 Junbin Xiao & Leilei Li: Research and Outlook on First-Person Perspective Problems</h4 >
33+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1Ftk1Y6Ehs&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
34+
35+ <h4 > Session 7 Zirui Guo: From Retrieval-Augmented Generation to Graph-Augmented Generation — Exploring Next-Generation Intelligent Q&A Systems</h4 >
36+ <iframe src =" //player.bilibili.com/player.html?bvid=BV137kJYHEoC&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
37+
38+ <h4 > Session 6 Haohe Liu: Latent Diffusion Model as a Versatile Coarse-to-Fine Audio Decoder</h4 >
39+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1JjmBYYEoW&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
40+
41+ <h4 > Session 5 Tianbao Xie: OSWorld — Benchmarking Multimodal Agents for Open-Ended Tasks in a Real Computer Environment</h4 >
42+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1CpyNYBE6o&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
43+
44+ <h4 > Session 4 Yuchen Hu: Post-Training Alignment of Large Speech Models</h4 >
45+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1uzxeevEb8&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
46+
47+ <h4 > Session 3 Junyi Ao: SD-Eval New Benchmark — Equipping Large Speech Interaction Models with Cognitive and Emotional Intelligence</h4 >
48+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1hixeeqEkQ&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
49+
50+ <h4 > Session 2 Keqi Deng: Label-synchronous Neural Transducer</h4 >
51+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1qihreEE6L&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
52+
53+ <h4 > Session 1 Dong Zhang: Building End-to-End Spoken Dialogue Large Models</h4 >
54+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1FJ4m137ZB&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
2155</div >
2256
57+ <div align =" center " >
58+ <h3 > Xmart Frontier Talks </h3 >
59+
60+ <h4 > Session 7 Kele Xu: Multimodal Machine Learning for Sound Understanding</h4 >
61+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1zX9EYZELX&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
62+
63+ <h4 > Session 6 Cewu Lu: Embodied Intelligence Scaling Laws and Scalable Data</h4 >
64+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1hc6JYLE11&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
65+
66+ <h4 > Session 5 Wenwu Wang: Large Language-Audio Models and Their Applications</h4 >
67+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1k7knYgEXT&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
68+
69+ <h4 > Session 4 Xipeng Qiu: From Large Language Models to World Models</h4 >
70+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1AJqGYuEZa&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
71+
72+ <h4 > Session 3 Tianfan Fu: Applications of Deep Learning in Drug Discovery and Development</h4 >
73+ <iframe src =" //player.bilibili.com/player.html?bvid=BV13XBiYdELy&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
74+
75+ <h4 > Session 2 Hung-yi Lee: Challenges of Teaching New Skills to Foundation Models</h4 >
76+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1FsUtY3EQV&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
77+
78+ <h4 > Session 1 Haofen Wang: Knowledge Retrieval Augmentation — Paradigms and Key Technologies</h4 >
79+ <iframe src =" //player.bilibili.com/player.html?bvid=BV1FJ4m137ZB&page=1 " scrolling =" no " border =" 0 " frameborder =" no " framespacing =" 0 " allowfullscreen =" true " width =" 100% " height =" 500 " ></iframe >
80+ </div >
81+
82+
2383
0 commit comments