NVIDIA-NeMo · ethanhe42 · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -32,20 +32,20 @@ repos:
       args: ["check", "--select", "I", "--fix"]
     - id: ruff-format
 
-  - repo: local
-    hooks:
-      - id: no-underscore-md
-        name: "Disallow '_' in Markdown filenames"
-        language: system
-        entry: |
-          bash -c '
-            # Report the offending files
-            echo "[pre-commit] ERROR: Found Markdown files with underscores:" >&2
-            for file in "$@"; do
-              echo "  - $file (use hyphens instead)" >&2
-            done
-            exit 1
-          '
-        files: '.*\/[^\/]*_[^\/]*\.md$'
-        exclude: '^\.github/'
-        types: [file]
+  # - repo: local
+  #   hooks:
+  #     - id: no-underscore-md
+  #       name: "Disallow '_' in Markdown filenames"
+  #       language: system
+  #       entry: |
+  #         bash -c '
+  #           # Report the offending files
+  #           echo "[pre-commit] ERROR: Found Markdown files with underscores:" >&2
+  #           for file in "$@"; do
+  #             echo "  - $file (use hyphens instead)" >&2
+  #           done
+  #           exit 1
+  #         '
+  #       files: '.*\/[^\/]*_[^\/]*\.md$'
+  #       exclude: '^\.github/'
+  #       types: [file]
diff --git a/README.md b/README.md
@@ -1 +1,30 @@
-# NeMo VFM
+# NeMo VFM: video foundation model collection
+
+NeMo VFM is a state-of-the-art framework for fast, large-scale training and inference of video world models. It unifies the latest diffusion-based and autoregressive techniques, prioritizing efficiency and performance from research prototyping to production deployment.
+
+## Projects
+
+This collection consists of 4 projects:
+1. [Scalable diffusion training framework](nemo_vfm/diffusion/readme.rst)
+2. [Accelerated diffusion world models](nemo_vfm/physicalai/Cosmos/cosmos1/models/diffusion/README.md)
+3. [Accelerated autoregressive world models](nemo_vfm/physicalai/Cosmos/cosmos1/models/autoregressive/README.md)
+4. [Sparse attention for efficient diffusion inference](nemo_vfm/sparse_attention/README.md)
+
+## Citations
+
+If you find our code useful, please consider citing the following papers:
+```bibtex
+@article{patel2025training,
+  title={Training Video Foundation Models with NVIDIA NeMo},
+  author={Patel, Zeeshan and He, Ethan and Mannan, Parth and Ren, Xiaowei and Wolf, Ryan and Agarwal, Niket and Huffman, Jacob and Wang, Zhuoyao and Wang, Carl and Chang, Jack and others},
+  journal={arXiv preprint arXiv:2503.12964},
+  year={2025}
+}
+
+@article{agarwal2025cosmos,
+  title={Cosmos world foundation model platform for physical ai},
+  author={Agarwal, Niket and Ali, Arslan and Bala, Maciej and Balaji, Yogesh and Barker, Erik and Cai, Tiffany and Chattopadhyay, Prithvijit and Chen, Yongxin and Cui, Yin and Ding, Yifan and others},
+  journal={arXiv preprint arXiv:2501.03575},
+  year={2025}
+}
+```
diff --git a/nemo_vfm/diffusion/Dockerfile b/nemo_vfm/diffusion/Dockerfile
@@ -0,0 +1,17 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM nvcr.io/nvidia/nemo:24.07
+
+RUN pip install --no-cache-dir diffusers==0.30.3 git+https://github.com/NVIDIA/NeMo-Run.git av megatron-energon ffmpeg-python==0.2.0 imageio-ffmpeg==0.4.8 imageio==2.26.0 opencv-python==4.8.0.74 opencv-python-headless==4.8.0.74 mediapy
diff --git a/nemo_vfm/diffusion/__init__.py b/nemo_vfm/diffusion/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_vfm/diffusion/assets/mixed_training.png b/nemo_vfm/diffusion/assets/mixed_training.png
diff --git a/nemo_vfm/diffusion/assets/pipeline_conditioning.png b/nemo_vfm/diffusion/assets/pipeline_conditioning.png
diff --git a/nemo_vfm/diffusion/assets/st_dit_hybrid_parallel.png b/nemo_vfm/diffusion/assets/st_dit_hybrid_parallel.png
diff --git a/nemo_vfm/diffusion/data/__init__.py b/nemo_vfm/diffusion/data/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.