Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/Dockerfile.pytorch
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM nvcr.io/nvidia/pytorch:25.06-py3
FROM nvcr.io/nvidia/pytorch:25.10-py3
WORKDIR /workspace
COPY . .
ARG INFERENCE_FRAMEWORK
Expand Down
30 changes: 15 additions & 15 deletions docker/common/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ main() {
dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
apt-get update
apt-get install -y cuda-toolkit-12-8 cudnn-cuda-12 libcudnn9-cuda-12 libcutlass-dev
apt-get install -y cuda-toolkit-12-8 cudnn-cuda-12 libcudnn9-cuda-12 libcutlass-dev

# Install OpenMPI and nvinfer
apt-get install -y libopenmpi-dev libnvinfer-lean-dev
fi

# Clean up
apt-get clean

Expand All @@ -109,18 +109,18 @@ main() {
"--no-install-package" "torch"
"--no-install-package" "torchvision"
"--no-install-package" "triton"
"--no-install-package" "nvidia-cublas-cu12"
"--no-install-package" "nvidia-cuda-cupti-cu12"
"--no-install-package" "nvidia-cuda-nvrtc-cu12"
"--no-install-package" "nvidia-cuda-runtime-cu12"
"--no-install-package" "nvidia-cudnn-cu12"
"--no-install-package" "nvidia-cufft-cu12"
"--no-install-package" "nvidia-cufile-cu12"
"--no-install-package" "nvidia-curand-cu12"
"--no-install-package" "nvidia-cusolver-cu12"
"--no-install-package" "nvidia-cusparse-cu12"
"--no-install-package" "nvidia-cusparselt-cu12"
"--no-install-package" "nvidia-nccl-cu12"
"--no-install-package" "nvidia-cublas"
"--no-install-package" "nvidia-cuda-cupti"
"--no-install-package" "nvidia-cuda-nvrtc"
"--no-install-package" "nvidia-cuda-runtime"
"--no-install-package" "nvidia-cudnn-cu13"
"--no-install-package" "nvidia-cufft"
"--no-install-package" "nvidia-cufile"
"--no-install-package" "nvidia-curand"
"--no-install-package" "nvidia-cusolver"
"--no-install-package" "nvidia-cusparse"
"--no-install-package" "nvidia-cusparselt-cu13"
"--no-install-package" "nvidia-nccl-cu13"
)
else
UV_ARGS=()
Expand Down Expand Up @@ -152,7 +152,7 @@ main() {
pip install --pre --no-cache-dir --upgrade pip
pip install --pre --no-cache-dir 'torch>=2.7.0,<2.8.0' pybind11 wheel_stub ninja wheel packaging


pip install --pre --no-cache-dir --no-build-isolation .$EXTRA
fi

Expand Down
26 changes: 14 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ license = { text = "Apache 2.0" }
dependencies = [
"megatron-core>=0.14.0a0,<0.15.0",
"megatron-bridge>=0.1.0a0,<0.2.0",
"nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
"nvidia-modelopt[torch]>=0.37.0a0,<0.41.0; sys_platform != 'darwin'",
"nvidia-resiliency-ext>=0.4.0a0,<0.5.0; sys_platform != 'darwin'",
"transformer-engine[pytorch]>=2.6.0a0,<2.7.0; sys_platform != 'darwin'",
"accelerate",
Expand All @@ -61,7 +61,7 @@ dependencies = [
"lightning",
"omegaconf>=2.3.0",
"peft<0.14.0",
"torch==2.7.1",
"torch==2.9.0",
"torchvision",
"torchmetrics>=0.11.0",
"wandb",
Expand All @@ -80,13 +80,13 @@ dependencies = [

[project.optional-dependencies]
inframework = []
vllm = ["vllm~=0.10.0", "pandas", "timm"]
trtllm = ["tensorrt-llm>=1.0.0a0,<1.1.0,>=1.0.0rc6", "cuda-python~=12.8.0"]
trt-onnx = ["tensorrt==10.11.0.33", "onnx==1.18.0", "transformers==4.51.3"]
vllm = ["vllm~=0.13.0", "pandas", "timm"]
trtllm = ["tensorrt-llm>=1.1.0a0,<1.2.0", "cuda-python~=13.0.0"]
trt-onnx = ["tensorrt==10.14.1.48", "onnx==1.18.0", "transformers==4.51.3"]

[dependency-groups]
# This is a default group so that we install these even with bare `uv sync`
build = ["setuptools", "torch==2.7.1", "pybind11", "Cython>=3.0.0", "ninja"]
build = ["setuptools", "torch==2.9.0", "pybind11", "Cython>=3.0.0", "ninja"]
docs = [
"sphinx",
"sphinx-autobuild", # For live doc serving while editing docs
Expand All @@ -98,15 +98,15 @@ docs = [
linting = ["pre-commit>=3.6.0", "ruff~=0.9.0"]
test = ["pytest", "pytest-mock", "coverage", "click"]
nemo-toolkit = [
"nemo-toolkit[automodel,common-only,nlp-only,eval,multimodal-only]>=2.5.0a0,<2.6.0",
"nemo-toolkit[automodel,common-only,nlp-only,eval,multimodal-only,lightning]>=2.6.0a0,<2.8.0",
]
nemo-run = ["nemo-run"]

[tool.uv.sources]
xformers = [{ index = "pytorch-cu128" }]
torch = [{ index = "pytorch-cu128" }]
xformers = [{ index = "pytorch-cu130" }]
torch = [{ index = "pytorch-cu130" }]
vllm = [
{ index = "pytorch-cu128", marker = "python_version < '3.9' and platform_machine == 'x86_64'" },
{ index = "pytorch-cu130", marker = "python_version < '3.9' and platform_machine == 'x86_64'" },
{ index = "pypi", marker = "platform_machine == 'aarch64'" },
]
# megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "ecf05926b4765aada82c8eabab4a374e8e83a9c5" }
Expand All @@ -130,6 +130,8 @@ override-dependencies = [
"urllib3>1.27.0",
"tiktoken>=0.9.0", # because nemo-toolkit and megatron-bridge disagree on tiktoken, we need to pin it here,
"fsspec[http]>=2023.1.0,<=2024.9.0",
"patchelf; sys_platform == 'never'",
"transformers==4.51.3"
]
prerelease = "allow"

Expand All @@ -139,8 +141,8 @@ url = "https://pypi.org/simple"
explicit = true

[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
name = "pytorch-cu130"
url = "https://download.pytorch.org/whl/cu130"
explicit = true

[tool.pytest.ini_options]
Expand Down
Loading
Loading