Skip to content

Commit 189aa88

Browse files
committed
Merge upstream branch 'release/2.7' into release/2.7
2 parents d17e222 + e2d141d commit 189aa88

File tree

77 files changed

+988
-298
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+988
-298
lines changed

.ci/aarch64_linux/aarch64_wheel_ci_build.py

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,22 @@ def build_ArmComputeLibrary() -> None:
5555
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
5656

5757

58-
def update_wheel(wheel_path, desired_cuda) -> None:
58+
def replace_tag(filename) -> None:
59+
with open(filename) as f:
60+
lines = f.readlines()
61+
for i, line in enumerate(lines):
62+
if line.startswith("Tag:"):
63+
lines[i] = line.replace("-linux_", "-manylinux_2_28_")
64+
print(f"Updated tag from {line} to {lines[i]}")
65+
break
66+
67+
with open(filename, "w") as f:
68+
f.writelines(lines)
69+
70+
71+
def package_cuda_wheel(wheel_path, desired_cuda) -> None:
5972
"""
60-
Update the cuda wheel libraries
73+
Package the cuda wheel libraries
6174
"""
6275
folder = os.path.dirname(wheel_path)
6376
wheelname = os.path.basename(wheel_path)
@@ -88,30 +101,19 @@ def update_wheel(wheel_path, desired_cuda) -> None:
88101
"/usr/lib64/libgfortran.so.5",
89102
"/acl/build/libarm_compute.so",
90103
"/acl/build/libarm_compute_graph.so",
104+
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
105+
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
106+
"/usr/local/lib/libnvpl_lapack_core.so.0",
107+
"/usr/local/lib/libnvpl_blas_core.so.0",
91108
]
92-
if enable_cuda:
93-
libs_to_copy += [
94-
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
95-
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
96-
"/usr/local/lib/libnvpl_lapack_core.so.0",
97-
"/usr/local/lib/libnvpl_blas_core.so.0",
98-
]
99-
if "126" in desired_cuda:
100-
libs_to_copy += [
101-
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
102-
"/usr/local/cuda/lib64/libcufile.so.0",
103-
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
104-
]
105-
elif "128" in desired_cuda:
106-
libs_to_copy += [
107-
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
108-
"/usr/local/cuda/lib64/libcufile.so.0",
109-
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
110-
]
111-
else:
109+
110+
if "128" in desired_cuda:
112111
libs_to_copy += [
113-
"/opt/OpenBLAS/lib/libopenblas.so.0",
112+
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
113+
"/usr/local/cuda/lib64/libcufile.so.0",
114+
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
114115
]
116+
115117
# Copy libraries to unzipped_folder/a/lib
116118
for lib_path in libs_to_copy:
117119
lib_name = os.path.basename(lib_path)
@@ -120,6 +122,13 @@ def update_wheel(wheel_path, desired_cuda) -> None:
120122
f"cd {folder}/tmp/torch/lib/; "
121123
f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}"
122124
)
125+
126+
# Make sure the wheel is tagged with manylinux_2_28
127+
for f in os.scandir(f"{folder}/tmp/"):
128+
if f.is_dir() and f.name.endswith(".dist-info"):
129+
replace_tag(f"{f.path}/WHEEL")
130+
break
131+
123132
os.mkdir(f"{folder}/cuda_wheel")
124133
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
125134
shutil.move(
@@ -242,6 +251,6 @@ def parse_arguments():
242251
print("Updating Cuda Dependency")
243252
filename = os.listdir("/pytorch/dist/")
244253
wheel_path = f"/pytorch/dist/{filename[0]}"
245-
update_wheel(wheel_path, desired_cuda)
254+
package_cuda_wheel(wheel_path, desired_cuda)
246255
pytorch_wheel_name = complete_wheel("/pytorch/")
247256
print(f"Build Complete. Created {pytorch_wheel_name}..")
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
01a22b6f16d117454b7d21ebdc691b0785b84a7f
1+
ebe8522378c3f9944aaaef44868f5ececdd845fc

.ci/docker/common/install_executorch.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ setup_executorch() {
5050
pushd executorch
5151

5252
export PYTHON_EXECUTABLE=python
53-
export EXECUTORCH_BUILD_PYBIND=ON
54-
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
53+
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
5554

5655
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
5756
popd

.ci/docker/common/install_halide.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ git clone https://github.com/halide/Halide.git
3535
pushd Halide
3636
git checkout ${COMMIT} && git submodule update --init --recursive
3737
pip_install -r requirements.txt
38-
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build
38+
# NOTE: pybind has a requirement for cmake > 3.5 so set the minimum cmake version here with a flag
39+
# Context: https://github.com/pytorch/pytorch/issues/150420
40+
cmake -G Ninja -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release -S . -B build
3941
cmake --build build
4042
test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3
4143
cmake --install build --prefix ${CONDA_PREFIX}

.ci/docker/common/install_inductor_benchmark_deps.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@ function install_timm() {
1414
local commit
1515
commit=$(get_pinned_commit timm)
1616

17-
# TODO (huydhn): There is no torchvision release on 3.13 when I write this, so
18-
# I'm using nightly here instead. We just need to package to be able to install
19-
# TIMM. Removing this once vision has a release on 3.13
20-
if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then
21-
pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
22-
fi
23-
2417
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
2518
# Clean up
2619
conda_run pip uninstall -y cmake torch torchvision triton

.ci/docker/triton_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.3.0
1+
3.3.1

.ci/manywheel/build_common.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,8 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
333333
# ROCm workaround for roctracer dlopens
334334
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
335335
patchedpath=$(fname_without_so_number $destpath)
336-
# Keep the so number for XPU dependencies
337-
elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then
336+
# Keep the so number for XPU dependencies and libgomp.so.1 to avoid twice load
337+
elif [[ "$DESIRED_CUDA" == *"xpu"* || "$filename" == "libgomp.so.1" ]]; then
338338
patchedpath=$destpath
339339
else
340340
patchedpath=$(fname_with_sha256 $destpath)

.ci/pytorch/check_binary.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,6 @@ else
5959
export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
6060
fi
6161

62-
###############################################################################
63-
# Setup XPU ENV
64-
###############################################################################
65-
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
66-
set +u
67-
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
68-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
69-
source /opt/intel/oneapi/pti/latest/env/vars.sh
70-
fi
71-
7262
###############################################################################
7363
# Check GCC ABI
7464
###############################################################################

.ci/pytorch/install_cache_xla.sh

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,50 @@
11
#!/bin/bash
22

33
# Script for installing sccache on the xla build job, which uses xla's docker
4-
# image and doesn't have sccache installed on it. This is mostly copied from
5-
# .ci/docker/install_cache.sh. Changes are: removing checks that will always
6-
# return the same thing, ex checks for for rocm, CUDA, and changing the path
7-
# where sccache is installed, and not changing /etc/environment.
4+
# image, which has sccache installed but doesn't write the stubs. This is
5+
# mostly copied from .ci/docker/install_cache.sh. Changes are: removing checks
6+
# that will always return the same thing, ex checks for for rocm, CUDA, changing
7+
# the path where sccache is installed, not changing /etc/environment, and not
8+
# installing/downloading sccache as it is already in the docker image.
89

910
set -ex -o pipefail
1011

11-
install_binary() {
12-
echo "Downloading sccache binary from S3 repo"
13-
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /tmp/cache/bin/sccache
14-
}
15-
1612
mkdir -p /tmp/cache/bin
17-
mkdir -p /tmp/cache/lib
1813
export PATH="/tmp/cache/bin:$PATH"
1914

20-
install_binary
21-
chmod a+x /tmp/cache/bin/sccache
22-
2315
function write_sccache_stub() {
2416
# Unset LD_PRELOAD for ps because of asan + ps issues
2517
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
26-
# shellcheck disable=SC2086
27-
# shellcheck disable=SC2059
28-
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/tmp/cache/bin/$1"
18+
if [ "$1" == "gcc" ]; then
19+
# Do not call sccache recursively when dumping preprocessor argument
20+
# For some reason it's very important for the first cached nvcc invocation
21+
cat >"/tmp/cache/bin/$1" <<EOF
22+
#!/bin/sh
23+
24+
# sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
25+
for arg in "\$@"; do
26+
if [ "\$arg" = "-E" ]; then
27+
exec $(which "$1") "\$@"
28+
fi
29+
done
30+
31+
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
32+
exec sccache $(which "$1") "\$@"
33+
else
34+
exec $(which "$1") "\$@"
35+
fi
36+
EOF
37+
else
38+
cat >"/tmp/cache/bin/$1" <<EOF
39+
#!/bin/sh
40+
41+
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
42+
exec sccache $(which "$1") "\$@"
43+
else
44+
exec $(which "$1") "\$@"
45+
fi
46+
EOF
47+
fi
2948
chmod a+x "/tmp/cache/bin/$1"
3049
}
3150

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import ctypes
2+
import os
3+
import sys
4+
from pathlib import Path
5+
6+
7+
def get_gomp_thread():
8+
"""
9+
Retrieves the maximum number of OpenMP threads after loading the `libgomp.so.1` library
10+
and the `libtorch_cpu.so` library. It then queries the
11+
maximum number of threads available for OpenMP parallel regions using the
12+
`omp_get_max_threads` function.
13+
14+
Returns:
15+
int: The maximum number of OpenMP threads available.
16+
17+
Notes:
18+
- The function assumes the default path for `libgomp.so.1` on AlmaLinux OS.
19+
- The path to `libtorch_cpu.so` is constructed based on the Python executable's
20+
installation directory.
21+
- This function is specific to environments where PyTorch and OpenMP are used
22+
together and may require adjustments for other setups.
23+
"""
24+
python_path = Path(sys.executable).resolve()
25+
python_prefix = (
26+
python_path.parent.parent
27+
) # Typically goes to the Python installation root
28+
29+
# Get the additional ABI flags (if any); it may be an empty string.
30+
abiflags = getattr(sys, "abiflags", "")
31+
32+
# Construct the Python directory name correctly (e.g., "python3.13t").
33+
python_version = (
34+
f"python{sys.version_info.major}.{sys.version_info.minor}{abiflags}"
35+
)
36+
37+
libtorch_cpu_path = (
38+
python_prefix
39+
/ "lib"
40+
/ python_version
41+
/ "site-packages"
42+
/ "torch"
43+
/ "lib"
44+
/ "libtorch_cpu.so"
45+
)
46+
47+
# use the default gomp path of AlmaLinux OS
48+
libgomp_path = "/usr/lib64/libgomp.so.1"
49+
50+
os.environ["GOMP_CPU_AFFINITY"] = "0-3"
51+
52+
libgomp = ctypes.CDLL(libgomp_path)
53+
libgomp = ctypes.CDLL(libtorch_cpu_path)
54+
55+
libgomp.omp_get_max_threads.restype = ctypes.c_int
56+
libgomp.omp_get_max_threads.argtypes = []
57+
58+
omp_max_threads = libgomp.omp_get_max_threads()
59+
return omp_max_threads
60+
61+
62+
def main():
63+
omp_max_threads = get_gomp_thread()
64+
print(
65+
f"omp_max_threads after loading libgomp.so and libtorch_cpu.so: {omp_max_threads}"
66+
)
67+
if omp_max_threads == 1:
68+
raise RuntimeError(
69+
"omp_max_threads is 1. Check whether libgomp.so is loaded twice."
70+
)
71+
72+
73+
if __name__ == "__main__":
74+
main()

0 commit comments

Comments
 (0)