|
22 | 22 |
|
23 | 23 | *\* Logo inspired by oracle bone character "eye".* |
24 | 24 |
|
| 25 | +## 🔥 Updates |
| 26 | +- [2025/11/10] We have released DeepEyesV2, an agentic multimodal model, which combine code execution and search together. See [DeepEyesV2](https://github.com/Visual-Agent/DeepEyesV2) for more information. |
| 27 | + |
| 28 | + |
25 | 29 | ## DeepEyes |
26 | 30 | Quote from [https://openai.com/index/thinking-with-images/](https://openai.com/index/thinking-with-images/) |
27 | 31 | > They don’t just see an image, they can integrate visual information directly into the reasoning chain. |
@@ -268,13 +272,18 @@ This project is released under [Apache licence](./LICENSE). |
268 | 272 | ## Citation |
269 | 273 |
|
270 | 274 | ``` |
271 | | -@article{zheng2025deepeyesincentivizingthinkingimages, |
272 | | - title={DeepEyes: Incentivizing "Thinking with Images" via Reinforcement Learning}, |
273 | | - author={Ziwei Zheng, Michael Yang, Jack Hong, Chenxiao Zhao, Guohai Xu, Le Yang, Chao Shen, Xing Yu}, |
274 | | - year={2025}, |
275 | | - eprint={2505.14362}, |
276 | | - archivePrefix={arXiv}, |
277 | | - primaryClass={cs.CV}, |
278 | | - url={https://arxiv.org/abs/2505.14362}, |
| 275 | +# DeepEyesV2 |
| 276 | +@article{hong2025deepeyesv2, |
| 277 | + title={DeepEyesV2: Toward Agentic Multimodal Model}, |
| 278 | + author={Hong, Jack and Zhao, Chenxiao and Zhu, ChengLin and Lu, Weiheng and Xu, Guohai and Yu, Xing}, |
| 279 | + journal={arXiv preprint arXiv:2511.05271}, |
| 280 | + year={2025} |
| 281 | +} |
| 282 | +# DeepEyes |
| 283 | +@article{zheng2025deepeyes, |
| 284 | + title={DeepEyes: Incentivizing" Thinking with Images" via Reinforcement Learning}, |
| 285 | + author={Zheng, Ziwei and Yang, Michael and Hong, Jack and Zhao, Chenxiao and Xu, Guohai and Yang, Le and Shen, Chao and Yu, Xing}, |
| 286 | + journal={arXiv preprint arXiv:2505.14362}, |
| 287 | + year={2025} |
279 | 288 | } |
280 | 289 | ``` |
0 commit comments