ROCm
diff --git a/‎.ci/aarch64_linux/aarch64_ci_build.sh‎
Lines changed: 14 additions & 2 deletions b/‎.ci/aarch64_linux/aarch64_ci_build.sh‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎.ci/aarch64_linux/aarch64_wheel_ci_build.py‎
Lines changed: 21 additions & 9 deletions b/‎.ci/aarch64_linux/aarch64_wheel_ci_build.py‎
Lines changed: 21 additions & 9 deletions
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 7 additions & 6 deletions b/‎.ci/docker/build.sh‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎.ci/docker/centos-rocm/Dockerfile‎
Lines changed: 5 additions & 1 deletion b/‎.ci/docker/centos-rocm/Dockerfile‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/huggingface-requirements.txt‎
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/ci_commit_pins/huggingface-requirements.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/rocm-composable-kernel.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/rocm-composable-kernel.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_executorch.sh‎
Lines changed: 14 additions & 9 deletions b/‎.ci/docker/common/install_executorch.sh‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎.ci/docker/common/install_rocm.sh‎
Lines changed: 11 additions & 0 deletions b/‎.ci/docker/common/install_rocm.sh‎
Lines changed: 11 additions & 0 deletions
@@ -5,9 +5,15 @@ GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
 
 # Set CUDA architecture lists to match x86 build_cuda.sh
 if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
+<<<<<<< HEAD
     export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;8.0;9.0"
 elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
     export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;10.0;12.0"
+=======
+    export TORCH_CUDA_ARCH_LIST="8.0;9.0"
+elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
+    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
+>>>>>>> upstream/main
 elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
 fi
@@ -31,8 +37,7 @@ pip install -r /pytorch/requirements.txt
 pip install auditwheel==6.2.0 wheel
 if [ "$DESIRED_CUDA" = "cpu" ]; then
     echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
-    #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
-    USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
+    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
 else
     echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
     export USE_SYSTEM_NCCL=1
@@ -42,13 +47,20 @@ else
         echo "Bundling CUDA libraries with wheel for aarch64."
     else
         echo "Using nvidia libs from pypi for aarch64."
+<<<<<<< HEAD
         # Fix platform constraints in PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64
         # Replace 'platform_machine == "x86_64"' with 'platform_machine == "aarch64"'
         export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS//platform_machine == \'x86_64\'/platform_machine == \'aarch64\'}"
+=======
+>>>>>>> upstream/main
         echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
         export USE_NVIDIA_PYPI_LIBS=1
     fi
 
+<<<<<<< HEAD
     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
     USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
+=======
+    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
+>>>>>>> upstream/main
 fi
@@ -138,6 +138,11 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
     folder = os.path.dirname(wheel_path)
     os.mkdir(f"{folder}/tmp")
     os.system(f"unzip {wheel_path} -d {folder}/tmp")
+<<<<<<< HEAD
+=======
+    # Delete original wheel since it will be repackaged
+    os.system(f"rm {wheel_path}")
+>>>>>>> upstream/main
 
     # Check if we should use PyPI NVIDIA libraries or bundle system libraries
     use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
@@ -211,7 +216,12 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
         ]
 
         # CUDA version-specific libraries
+<<<<<<< HEAD
         if "130" in desired_cuda:
+=======
+        if "13" in desired_cuda:
+            minor_version = desired_cuda[-1]
+>>>>>>> upstream/main
             version_specific_libs = [
                 "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
                 "/usr/local/cuda/lib64/libcublas.so.13",
@@ -221,7 +231,11 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
                 "/usr/local/cuda/lib64/libcusolver.so.12",
                 "/usr/local/cuda/lib64/libnvJitLink.so.13",
                 "/usr/local/cuda/lib64/libnvrtc.so.13",
+<<<<<<< HEAD
                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.13.0",
+=======
+                f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
+>>>>>>> upstream/main
             ]
         elif "12" in desired_cuda:
             # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
@@ -237,6 +251,11 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
                 "/usr/local/cuda/lib64/libnvrtc.so.12",
                 f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
             ]
+<<<<<<< HEAD
+=======
+        else:
+            raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
+>>>>>>> upstream/main
 
         # Combine all libraries
         libs_to_copy = common_libs + version_specific_libs
@@ -275,14 +294,7 @@ def complete_wheel(folder: str) -> str:
             f"/{folder}/dist/{repaired_wheel_name}",
         )
     else:
-        repaired_wheel_name = wheel_name.replace(
-            "linux_aarch64", "manylinux_2_28_aarch64"
-        )
-        print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
-        os.rename(
-            f"/{folder}/dist/{wheel_name}",
-            f"/{folder}/dist/{repaired_wheel_name}",
-        )
+        repaired_wheel_name = list_dir(f"/{folder}/dist")[0]
 
     print(f"Copying {repaired_wheel_name} to artifacts")
     shutil.copy2(
@@ -319,7 +331,7 @@ def parse_arguments():
     ).decode()
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = ""
     # MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
     if enable_cuda:
         build_vars += "MAX_JOBS=5 "
 
@@ -214,8 +214,12 @@ case "$tag" in
     TRITON=yes
     ;;
   pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
+<<<<<<< HEAD
     # TODO (huydhn): Upgrade this to Python >= 3.10
     ANACONDA_PYTHON_VERSION=3.9
+=======
+    ANACONDA_PYTHON_VERSION=3.10
+>>>>>>> upstream/main
     GCC_VERSION=11
     VISION=yes
     KATEX=yes
@@ -263,13 +267,10 @@ case "$tag" in
     TRITON_CPU=yes
     ;;
   pytorch-linux-jammy-linter)
-    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
-    # We will need to update mypy version eventually, but that's for another day. The task
-    # would be to upgrade mypy to 1.0.0 with Python 3.11
-    PYTHON_VERSION=3.9
+    PYTHON_VERSION=3.10
     ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
-    PYTHON_VERSION=3.9
+  pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
+    PYTHON_VERSION=3.10
     CUDA_VERSION=12.8.1
     ;;
   pytorch-linux-jammy-aarch64-py3.10-gcc11)
 
@@ -59,9 +59,13 @@ ENV INSTALLED_VISION ${VISION}
 
 # Install rocm
 ARG ROCM_VERSION
+RUN mkdir ci_commit_pins
+COPY ./common/common_utils.sh common_utils.sh
+COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
 COPY ./common/install_rocm.sh install_rocm.sh
 RUN bash ./install_rocm.sh
-RUN rm install_rocm.sh
+RUN rm install_rocm.sh common_utils.sh
+RUN rm -r ci_commit_pins
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
 RUN rm install_rocm_magma.sh
 
@@ -1 +1 @@
-56392aa978594cc155fa8af48cd949f5b5f1823a
+e0dda9059d082537cee36be6c5e4fe3b18c880c0
@@ -1,2 +1,6 @@
+<<<<<<< HEAD
 transformers==4.54.0
+=======
+transformers==4.56.0
+>>>>>>> upstream/main
 soxr==0.5.0
@@ -0,0 +1 @@
+7fe50dc3da2069d6645d9deb8c017a876472a977
@@ -1 +1,5 @@
+<<<<<<< HEAD
 6193b30becb1ac7be704cf87b8cb9bf13e7f9689
+=======
+bbb06c0334a6772b92d24bde54956e675c8c6604
+>>>>>>> upstream/main
@@ -42,22 +42,27 @@ install_pip_dependencies() {
   # A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
   # numba and scipy version used in PyTorch CI
   conda_run pip uninstall -y numba scipy
+  # Yaspin is needed for running CI test (get_benchmark_analysis_data.py)
+  pip_install yaspin==3.1.0
 
   popd
 }
 
 setup_executorch() {
-  pushd executorch
-
   export PYTHON_EXECUTABLE=python
-  export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
+  export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON"
 
   as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
-  popd
 }
 
-clone_executorch
-install_buck2
-install_conda_dependencies
-install_pip_dependencies
-setup_executorch
+if [ $# -eq 0 ]; then
+  clone_executorch
+  install_buck2
+  install_conda_dependencies
+  install_pip_dependencies
+  pushd executorch
+  setup_executorch
+  popd
+else
+  "$@"
+fi
@@ -2,6 +2,11 @@
 
 set -ex
 
+# for pip_install function
+source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+
+ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)"
+
 ver() {
     printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }
@@ -109,8 +114,12 @@ EOF
         rm -rf HIP clr
     fi
 
+<<<<<<< HEAD
     # temporary hipblasLT dependency install
     apt install libmsgpackc2
+=======
+    pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
+>>>>>>> upstream/main
 
     # Cleanup
     apt-get autoclean && apt-get clean
@@ -195,6 +204,8 @@ install_centos() {
       sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
   done
 
+  pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
+
   # Cleanup
   yum clean all
   rm -rf /var/cache/yum
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-56392aa978594cc155fa8af48cd949f5b5f1823a`
	`1`	`+e0dda9059d082537cee36be6c5e4fe3b18c880c0`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+7fe50dc3da2069d6645d9deb8c017a876472a977`