Skip to content

Commit 0feb31e

Browse files
Merge branch 'pytorch:main' into temp-gha-runner-v2
2 parents 67842a3 + ea5b9ec commit 0feb31e

File tree

908 files changed

+20031
-2383
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

908 files changed

+20031
-2383
lines changed

.ci/aarch64_linux/aarch64_ci_build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then
2727
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
2828
else
2929
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
30+
export USE_SYSTEM_NCCL=1
3031
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
3132
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
3233
fi

.ci/docker/build.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ case "$tag" in
110110
TRITON=yes
111111
;;
112112
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
113-
CUDA_VERSION=12.8
113+
CUDA_VERSION=12.8.1
114114
CUDNN_VERSION=9
115115
ANACONDA_PYTHON_VERSION=3.10
116116
GCC_VERSION=9
@@ -168,8 +168,8 @@ case "$tag" in
168168
TRITON=yes
169169
INDUCTOR_BENCHMARKS=yes
170170
;;
171-
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9)
172-
CUDA_VERSION=11.8.0
171+
pytorch-linux-focal-cuda12.8-cudnn9-py3-gcc9)
172+
CUDA_VERSION=12.8.1
173173
CUDNN_VERSION=9
174174
ANACONDA_PYTHON_VERSION=3.10
175175
GCC_VERSION=9
@@ -252,9 +252,9 @@ case "$tag" in
252252
DOCS=yes
253253
INDUCTOR_BENCHMARKS=yes
254254
;;
255-
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12)
255+
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
256256
ANACONDA_PYTHON_VERSION=3.9
257-
CUDA_VERSION=11.8
257+
CUDA_VERSION=12.8.1
258258
CUDNN_VERSION=9
259259
CLANG_VERSION=12
260260
VISION=yes
@@ -309,9 +309,9 @@ case "$tag" in
309309
# would be to upgrade mypy to 1.0.0 with Python 3.11
310310
PYTHON_VERSION=3.9
311311
;;
312-
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
312+
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
313313
PYTHON_VERSION=3.9
314-
CUDA_VERSION=11.8
314+
CUDA_VERSION=12.8.1
315315
;;
316316
pytorch-linux-jammy-aarch64-py3.10-gcc11)
317317
ANACONDA_PYTHON_VERSION=3.10

.ci/docker/common/install_cuda.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ do
205205
;;
206206
12.4) install_124; prune_124
207207
;;
208-
12.6) install_126; prune_126
208+
12.6|12.6.*) install_126; prune_126
209209
;;
210-
12.8) install_128;
210+
12.8|12.8.*) install_128;
211211
;;
212212
*) echo "bad argument $1"; exit 1
213213
;;

.ci/docker/common/install_onnx.sh

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,6 @@ retry () {
88
"$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
99
}
1010

11-
# A bunch of custom pip dependencies for ONNX
12-
pip_install \
13-
beartype==0.15.0 \
14-
filelock==3.9.0 \
15-
flatbuffers==2.0 \
16-
mock==5.0.1 \
17-
ninja==1.10.2 \
18-
networkx==2.5 \
19-
numpy==1.24.2
20-
2111
# ONNXRuntime should be installed before installing
2212
# onnx-weekly. Otherwise, onnx-weekly could be
2313
# overwritten by onnx.
@@ -29,11 +19,8 @@ pip_install \
2919
transformers==4.36.2
3020

3121
pip_install coloredlogs packaging
32-
3322
pip_install onnxruntime==1.18.1
34-
pip_install onnxscript==0.2.6 --no-deps
35-
# required by onnxscript
36-
pip_install ml_dtypes
23+
pip_install onnxscript==0.3.0
3724

3825
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
3926
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

.ci/docker/common/install_triton.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,12 @@ as_jenkins git clone --recursive ${TRITON_REPO} triton
5151
cd triton
5252
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
5353
as_jenkins git submodule update --init --recursive
54-
cd python
54+
55+
# Old versions of python have setup.py in ./python; newer versions have it in ./
56+
if [ ! -f setup.py ]; then
57+
cd python
58+
fi
59+
5560
pip_install pybind11==2.13.6
5661

5762
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527

.ci/docker/requirements-ci.txt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,11 @@ fbscribelogger==0.1.7
4141
#Pinned versions: 0.1.6
4242
#test that import:
4343

44-
flatbuffers==2.0 ; platform_machine != "s390x"
44+
flatbuffers==24.12.23
4545
#Description: cross platform serialization library
46-
#Pinned versions: 2.0
46+
#Pinned versions: 24.12.23
4747
#test that import:
4848

49-
flatbuffers ; platform_machine == "s390x"
50-
#Description: cross platform serialization library; Newer version is required on s390x for new python version
51-
5249
hypothesis==5.35.1
5350
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
5451
#Description: advanced library for generating parametrized tests

.ci/manywheel/build_cuda.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ export INSTALL_TEST=0 # dont install test binaries into site-packages
1515
export USE_CUPTI_SO=0
1616
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
1717
export USE_CUFILE=${USE_CUFILE:-1}
18+
export USE_SYSTEM_NCCL=1
19+
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
20+
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
1821

1922
# Keep an array of cmake variables to add to
2023
if [[ -z "$CMAKE_ARGS" ]]; then
@@ -172,12 +175,9 @@ if [[ $CUDA_VERSION == 12* ]]; then
172175
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
173176
export FORCE_RPATH="--force-rpath"
174177
export USE_STATIC_NCCL=0
175-
export USE_SYSTEM_NCCL=1
176178
export ATEN_STATIC_CUDA=0
177179
export USE_CUDA_STATIC_LINK=0
178180
export USE_CUPTI_SO=1
179-
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
180-
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
181181
fi
182182
elif [[ $CUDA_VERSION == "11.8" ]]; then
183183
export USE_STATIC_CUDNN=0
@@ -254,12 +254,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
254254
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
255255
export FORCE_RPATH="--force-rpath"
256256
export USE_STATIC_NCCL=0
257-
export USE_SYSTEM_NCCL=1
258257
export ATEN_STATIC_CUDA=0
259258
export USE_CUDA_STATIC_LINK=0
260259
export USE_CUPTI_SO=1
261-
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
262-
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
263260
fi
264261
else
265262
echo "Unknown cuda version $CUDA_VERSION"

.ci/pytorch/test.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ test_python_smoke() {
324324
assert_git_not_dirty
325325
}
326326

327+
test_h100_distributed() {
328+
# Distributed tests at H100
329+
time python test/run_test.py --include distributed/_composable/test_composability/test_pp_composability.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
330+
assert_git_not_dirty
331+
}
332+
327333
test_lazy_tensor_meta_reference_disabled() {
328334
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
329335
echo "Testing lazy tensor operations without meta reference"
@@ -1641,7 +1647,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
16411647
install_torchaudio cuda
16421648
fi
16431649
install_torchvision
1644-
TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git
1650+
TORCH_CUDA_ARCH_LIST="8.0;8.6" install_torchao
16451651
id=$((SHARD_NUMBER-1))
16461652
# https://github.com/opencv/opencv-python/issues/885
16471653
pip_install opencv-python==4.8.0.74
@@ -1726,6 +1732,8 @@ elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
17261732
test_xpu_bin
17271733
elif [[ "${TEST_CONFIG}" == smoke ]]; then
17281734
test_python_smoke
1735+
elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
1736+
test_h100_distributed
17291737
else
17301738
install_torchvision
17311739
install_monkeytype

.github/actions/reuse-old-whl/reuse_old_whl.py

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import subprocess
44
from functools import lru_cache
55
from pathlib import Path
6-
from typing import Any, cast, Optional
6+
from typing import Any, cast, Optional, Union
77

88
import requests
99

@@ -120,6 +120,23 @@ def ok_changed_file(file: str) -> bool:
120120
def check_changed_files(sha: str) -> bool:
121121
# Return true if all the changed files are in the list of allowed files to
122122
# be changed to reuse the old whl
123+
124+
# Removing any files is not allowed since rysnc will not remove files
125+
removed_files = (
126+
subprocess.check_output(
127+
["git", "diff", "--name-only", sha, "HEAD", "--diff-filter=D"],
128+
text=True,
129+
stderr=subprocess.DEVNULL,
130+
)
131+
.strip()
132+
.split()
133+
)
134+
if removed_files:
135+
print(
136+
f"Removed files between {sha} and HEAD: {removed_files}, cannot reuse old whl"
137+
)
138+
return False
139+
123140
changed_files = (
124141
subprocess.check_output(
125142
["git", "diff", "--name-only", sha, "HEAD"],
@@ -179,38 +196,83 @@ def unzip_artifact_and_replace_files() -> None:
179196
)
180197
os.remove("artifacts.zip")
181198

199+
head_sha = get_head_sha()
200+
182201
# Rename wheel into zip
183202
wheel_path = Path("artifacts/dist").glob("*.whl")
184203
for path in wheel_path:
185-
new_path = path.with_suffix(".zip")
186-
os.rename(path, new_path)
187-
print(f"Renamed {path} to {new_path}")
188-
print(new_path.stem)
204+
# Should be of the form torch-2.0.0+git1234567-cp37-etc.whl
205+
# Should usually be the merge base sha but for the ones that didn't do
206+
# the replacement, it won't be. Can probably change it to just be merge
207+
# base later
208+
old_version = f"+git{path.stem.split('+')[1].split('-')[0][3:]}"
209+
new_version = f"+git{head_sha[:7]}"
210+
211+
def rename_to_new_version(file: Union[str, Path]) -> None:
212+
# Rename file with old_version to new_version
213+
subprocess.check_output(
214+
["mv", file, str(file).replace(old_version, new_version)]
215+
)
216+
217+
def change_content_to_new_version(file: Union[str, Path]) -> None:
218+
# Check if is a file
219+
if os.path.isdir(file):
220+
return
221+
# Replace the old version in the file with the new version
222+
with open(file) as f:
223+
content = f.read()
224+
content = content.replace(old_version, new_version)
225+
with open(file, "w") as f:
226+
f.write(content)
227+
228+
zip_path = path.with_suffix(".zip")
229+
os.rename(path, zip_path)
230+
old_stem = zip_path.stem
189231
# Unzip the wheel
190232
subprocess.check_output(
191-
["unzip", "-o", new_path, "-d", f"artifacts/dist/{new_path.stem}"],
233+
["unzip", "-o", zip_path, "-d", f"artifacts/dist/{old_stem}"],
192234
)
235+
236+
# Remove the old wheel (which is now a zip file)
237+
os.remove(zip_path)
238+
193239
# Copy python files into the artifact
194240
subprocess.check_output(
195-
["rsync", "-avz", "torch", f"artifacts/dist/{new_path.stem}"],
241+
["rsync", "-avz", "torch", f"artifacts/dist/{old_stem}"],
196242
)
197243

244+
change_content_to_new_version(f"artifacts/dist/{old_stem}/torch/version.py")
245+
246+
for file in Path(f"artifacts/dist/{old_stem}").glob(
247+
"*.dist-info/**",
248+
):
249+
change_content_to_new_version(file)
250+
251+
rename_to_new_version(f"artifacts/dist/{old_stem}")
252+
new_stem = old_stem.replace(old_version, new_version)
253+
254+
for file in Path(f"artifacts/dist/{new_stem}").glob(
255+
"*.dist-info",
256+
):
257+
rename_to_new_version(file)
258+
198259
# Zip the wheel back
199260
subprocess.check_output(
200-
["zip", "-r", f"{new_path.stem}.zip", "."],
201-
cwd=f"artifacts/dist/{new_path.stem}",
261+
["zip", "-r", f"{new_stem}.zip", "."],
262+
cwd=f"artifacts/dist/{new_stem}",
202263
)
264+
203265
subprocess.check_output(
204266
[
205267
"mv",
206-
f"artifacts/dist/{new_path.stem}/{new_path.stem}.zip",
207-
f"artifacts/dist/{new_path.stem}.whl",
268+
f"artifacts/dist/{new_stem}/{new_stem}.zip",
269+
f"artifacts/dist/{new_stem}.whl",
208270
],
209271
)
210272

211273
# Remove the extracted folder
212274
subprocess.check_output(
213-
["rm", "-rf", f"artifacts/dist/{new_path.stem}"],
275+
["rm", "-rf", f"artifacts/dist/{new_stem}"],
214276
)
215277

216278
# Rezip the artifact

.github/ci_commit_pins/audio.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1a8f6213b0b61efc6a4862bc45b853551a93dbb6
1+
4cb7f57d31b0b288696f09b89e890e5fac092eed

0 commit comments

Comments
 (0)