AdaptiveMotorControlLab
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.vscode/launch.json‎
Lines changed: 8 additions & 8 deletions b/‎.vscode/launch.json‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 17 additions & 2 deletions b/‎README.md‎
Lines changed: 17 additions & 2 deletions
@@ -73,4 +73,5 @@ data_processing/
 
 
 experiments/
-*.out
+*.out
+pretrained_models/
@@ -7,7 +7,7 @@
             "request": "launch",
             "module": "torch.distributed.run",
             "env": {
-                "CUDA_VISIBLE_DEVICES": "1,2",
+                "CUDA_VISIBLE_DEVICES": "1,2,3",
                 "OMP_NUM_THREADS": "8",
                 "NCCL_IB_DISABLE": "0",
                 "NCCL_IB_GID_INDEX": "3",
@@ -18,7 +18,7 @@
                 "WANDB_API_KEY": "65aeda82a75f1eed29c8e9250b175fcc73dca0d7",
             },
             "args": [
-                "--nproc_per_node=2",
+                "--nproc_per_node=3",
                 "--nnodes=1",
                 "--node_rank=0",
                 "--master_addr=127.0.0.1",
@@ -31,6 +31,7 @@
                 // "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data",
                 "--image_folder", "/mediaPFM/data/haozhe/onevision/llava_data/geo3k/",
                 "--video_folder", "/mediaPFM/data/haozhe/onevision/llava_video",
+                // "--video_folder", "/home/haozhe/kitchen/AVION/datasets",
                 "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
                 "--mm_vision_tower_lr", "2e-6",
                 "--vision_tower", "google/siglip-so400m-patch14-384",
@@ -89,13 +90,12 @@
 //             "request": "launch",
 //             "program": "docs/LLaVA_OneVision_Tutorials.py",
 //             "console": "integratedTerminal",
-//             "env":{"CUDA_VISIBLE_DEVICES":"0",
-//                    "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"},
+//             "env":{
+//                 "CUDA_VISIBLE_DEVICES":"0",
+//                 // "HF_HOME": "/mnt/SV_storage/VFM/huggingface",
+//                 // "LD_PRELOAD": "/usr/lib/x86_64-linux-gnu/libffi.so.7"
+//                 },
 //             "justMyCode": false,
-//             // "args": [
-//             //     "--run_dir_name", "test",
-//             //     // "--use_big_decoder"
-//             // ]
 //         }
 //     ]
 // }
 
@@ -3,21 +3,36 @@
 </p>
 
 # LLaVA-NeXT: Open Large Multimodal Models
+[![Static Badge](https://img.shields.io/badge/llava_video-paper-green)](http://arxiv.org/abs/2410.02713)
 [![Static Badge](https://img.shields.io/badge/llava_onevision-paper-green)](https://arxiv.org/abs/2408.03326)
 [![llava_next-blog](https://img.shields.io/badge/llava_next-blog-green)](https://llava-vl.github.io/blog/)
 
 [![llava_onevision-demo](https://img.shields.io/badge/llava_onevision-demo-red)](https://llava-onevision.lmms-lab.com/)
+[![llava_next-video_demo](https://img.shields.io/badge/llava_video-demo-red)](https://huggingface.co/spaces/WildVision/vision-arena)
 [![llava_next-interleave_demo](https://img.shields.io/badge/llava_next-interleave_demo-red)](https://huggingface.co/spaces/lmms-lab/LLaVA-NeXT-Interleave-Demo)
-[![llava_next-video_demo](https://img.shields.io/badge/llava_next-video_demo-red)](https://huggingface.co/spaces/WildVision/vision-arena)
 [![Openbayes Demo](https://img.shields.io/static/v1?label=Demo&message=OpenBayes%E8%B4%9D%E5%BC%8F%E8%AE%A1%E7%AE%97&color=green)](https://openbayes.com/console/public/tutorials/gW0ng9jKXfO)
 
+[![llava_video-checkpoints](https://img.shields.io/badge/llava_video-checkpoints-blue)](https://huggingface.co/collections/lmms-lab/llava-next-video-661e86f5e8dabc3ff793c944)
 [![llava_onevision-checkpoints](https://img.shields.io/badge/llava_onevision-checkpoints-blue)](https://huggingface.co/collections/lmms-lab/llava-onevision-66a259c3526e15166d6bba37)
 [![llava_next-interleave_checkpoints](https://img.shields.io/badge/llava_next-interleave_checkpoints-blue)](https://huggingface.co/collections/lmms-lab/llava-next-interleave-66763c55c411b340b35873d1)
-[![llava_next-video_checkpoints](https://img.shields.io/badge/llava_next-video_checkpoints-blue)](https://huggingface.co/collections/lmms-lab/llava-next-video-661e86f5e8dabc3ff793c944)
 [![llava_next-image_checkpoints](https://img.shields.io/badge/llava_next-image_checkpoints-blue)](https://huggingface.co/lmms-lab)
 
 ## Release Notes
 
+- **[2024/10/04] 🔥 LLaVA-Video** (formerly LLaVA-NeXT-Video) has undergone a major upgrade! We are excited to release **LLaVA-Video-178K**, a high-quality synthetic dataset for video instruction tuning. This dataset includes:
+
+  - 178,510 caption entries
+  - 960,792 open-ended Q&A pairs
+  - 196,198 multiple-choice Q&A items
+
+  Along with this, we’re also releasing the **LLaVA-Video 7B/72B models**, which deliver competitive performance on the latest video benchmarks, including [Video-MME](https://video-mme.github.io/home_page.html#leaderboard), [LongVideoBench](https://longvideobench.github.io/), and [Dream-1K](https://tarsier-vlm.github.io/).
+
+  📄 **Explore more**:
+  - [LLaVA-Video-178K Dataset](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K): Download the dataset.
+  - [LLaVA-Video Models](https://huggingface.co/collections/lmms-lab/llava-video-661e86f5e8dabc3ff793c944): Access model checkpoints.
+  - [Paper](http://arxiv.org/abs/2410.02713): Detailed information about LLaVA-Video.
+  - [LLaVA-Video Documentation](https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/docs/LLaVA_Video_1003.md): Guidance on training, inference and evaluation.
+
 - [2024/09/13] 🔥 **🚀 [LLaVA-OneVision-Chat](docs/LLaVA_OneVision_Chat.md)**. The new LLaVA-OV-Chat (7B/72B) significantly improves the chat experience of LLaVA-OV. 📄
 
   ![](docs/ov_chat_images/chat_results.png)