Skip to content

Commit 06ee6e4

Browse files
Merge pull request #2709 from ROCm/release/2.9_IFU_2025-10-14
[AUTOGENERATED] release/2.9_IFU_2025-10-14
2 parents 426b2e8 + 31b3b8e commit 06ee6e4

File tree

11 files changed

+49
-26
lines changed

11 files changed

+49
-26
lines changed

.circleci/scripts/binary_populate_env.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,7 @@ export PYTORCH_BUILD_NUMBER=1
7575
: <<'BLOCK_COMMENT'
7676
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
7777
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
78-
79-
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
80-
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
81-
82-
# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
83-
if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
84-
TRITON_CONSTRAINT="platform_system == 'Linux'"
85-
fi
78+
TRITON_CONSTRAINT="platform_system == 'Linux'"
8679
8780
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
8881
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"

.github/templates/linux_binary_build_workflow.yml.j2

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ jobs:
7777
runs_on: linux.s390x
7878
ALPINE_IMAGE: "docker.io/s390x/alpine"
7979
timeout-minutes: 420
80+
{%- elif config["gpu_arch_type"] == "rocm" %}
81+
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
82+
timeout-minutes: 300
8083
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
8184
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
8285
runs_on: linux.24xlarge.ephemeral

.github/workflows/generated-linux-binary-libtorch-nightly.yml

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.github/workflows/generated-linux-binary-manywheel-nightly.yml

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.github/workflows/generated-linux-binary-manywheel-rocm-main.yml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.github/workflows/operator_benchmark.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ on:
1414
schedule:
1515
# Run at 07:00 UTC every Sunday
1616
- cron: 0 7 * * 0
17+
pull_request:
18+
paths:
19+
- benchmarks/operator_benchmark/**
20+
- .github/workflows/operator_benchmark.yml
1721

1822
concurrency:
1923
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ ARG CUDA_PATH=cu121
5353
ARG INSTALL_CHANNEL=whl/nightly
5454
# Automatically set by buildx
5555
# pinning version of conda here see: https://github.com/pytorch/pytorch/issues/164574
56-
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION} conda=25.7.0
56+
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda=25.7.0
5757

5858
ARG TARGETPLATFORM
5959

aten/src/ATen/mps/EmptyTensor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#define MPS_ERROR_NOT_COMPILED "PyTorch code is not compiled with MPS enabled"
1414
#define MPS_ERROR_RUNTIME_TOO_LOW \
15-
"The MPS backend is supported on MacOS 13.0+.", \
15+
"The MPS backend is supported on MacOS 14.0+. ", \
1616
"Current OS version can be queried using `sw_vers`"
1717
#define MPS_ERROR_DOUBLE_NOT_SUPPORTED "Cannot convert a MPS Tensor to float64 dtype " \
1818
"as the MPS framework doesn't support float64. Please use float32 instead."

benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time
2-
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
2+
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,2.459
33
PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
44
PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826
55
PyTorch,add,add_M1_N1_K1_cpu_bwdall_BACKWARD,short,TRUE,58.1449
@@ -376,10 +376,10 @@ PyTorch,relu6,"relu6_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",sho
376376
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,9.6588
377377
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,9.5969
378378
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,9.547
379-
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,68.739
379+
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.21375
380380
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,45.14133333
381381
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,52.6664
382-
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,69.1875
382+
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,51.49525
383383
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,48.3458
384384
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,62.0719
385385
PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,7.5728
@@ -388,10 +388,10 @@ PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplace
388388
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,8.1647
389389
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,8.1768
390390
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,8.0619
391-
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.118
391+
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,48.88475
392392
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,43.702
393393
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,50.3613
394-
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.436
394+
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.3995
395395
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,46.9813
396396
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,59.2295
397397
PyTorch,functional.hardsigmoid,"functional.hardsigmoid_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,6.5189
@@ -1316,4 +1316,4 @@ PyTorch,where,"where_cond_shape(8,16,1)_input_shape(1,)_other_shape(1,)_cpu_dtyp
13161316
PyTorch,where,"where_cond_shape(8,16,1)_input_shape(16,1)_other_shape(8,16,1)_cpu_dtypetorch.float32",short,FALSE,5.763
13171317
PyTorch,where,"where_cond_shape(8,16,1)_input_shape(8,1,1)_other_shape(1,)_cpu_dtypetorch.float32",short,FALSE,5.744666667
13181318
PyTorch,clamp,clamp_M512_N512_cpu,short,FALSE,15.26233333
1319-
PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667
1319+
PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667

torch/__init__.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def _get_cuda_dep_paths(path: str, lib_folder: str, lib_name: str) -> list[str]:
302302
return nvidia_lib_paths + lib_paths
303303

304304

305-
def _preload_cuda_deps(lib_folder: str, lib_name: str) -> None:
305+
def _preload_cuda_deps(lib_folder: str, lib_name: str, required: bool = True) -> None: # type: ignore[valid-type]
306306
"""Preloads cuda deps if they could not be found otherwise."""
307307
# Should only be called on Linux if default path resolution have failed
308308
assert platform.system() == "Linux", "Should only be called on Linux"
@@ -313,9 +313,10 @@ def _preload_cuda_deps(lib_folder: str, lib_name: str) -> None:
313313
if candidate_lib_paths:
314314
lib_path = candidate_lib_paths[0]
315315
break
316-
if not lib_path:
316+
if not lib_path and required:
317317
raise ValueError(f"{lib_name} not found in the system path {sys.path}")
318-
ctypes.CDLL(lib_path)
318+
if lib_path:
319+
ctypes.CDLL(lib_path)
319320

320321

321322
# See Note [Global dependencies]
@@ -354,8 +355,6 @@ def _load_global_deps() -> None:
354355
except OSError as err:
355356
# Can only happen for wheel with cuda libs as PYPI deps
356357
# As PyTorch is not purelib, but nvidia-*-cu12 is
357-
from torch.version import cuda as cuda_version
358-
359358
cuda_libs: dict[str, str] = {
360359
"cublas": "libcublas.so.*[0-9]",
361360
"cudnn": "libcudnn.so.*[0-9]",
@@ -369,7 +368,6 @@ def _load_global_deps() -> None:
369368
"cusparselt": "libcusparseLt.so.*[0-9]",
370369
"cusolver": "libcusolver.so.*[0-9]",
371370
"nccl": "libnccl.so.*[0-9]",
372-
"nvtx": "libnvToolsExt.so.*[0-9]",
373371
"nvshmem": "libnvshmem_host.so.*[0-9]",
374372
"cufile": "libcufile.so.*[0-9]",
375373
}
@@ -381,6 +379,9 @@ def _load_global_deps() -> None:
381379
raise err
382380
for lib_folder, lib_name in cuda_libs.items():
383381
_preload_cuda_deps(lib_folder, lib_name)
382+
383+
# libnvToolsExt is Optional Dependency
384+
_preload_cuda_deps("nvtx", "libnvToolsExt.so.*[0-9]", required=False)
384385
ctypes.CDLL(global_deps_lib_path, mode=ctypes.RTLD_GLOBAL)
385386

386387

0 commit comments

Comments
 (0)