From bc20ad6bb9a6db23ea83d2481da7d5d81dfddf24 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 12:48:49 +0000 Subject: [PATCH 01/12] Fix up workflow permissions We have picked up the image signing and scanning that needs some more permissions --- .github/workflows/publish-benchmark-images.yaml | 5 +++++ .github/workflows/publish-operator.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/publish-benchmark-images.yaml b/.github/workflows/publish-benchmark-images.yaml index 20fbede..fda478d 100644 --- a/.github/workflows/publish-benchmark-images.yaml +++ b/.github/workflows/publish-benchmark-images.yaml @@ -12,6 +12,11 @@ jobs: build_push_images: name: Build and push benchmark images runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files strategy: matrix: include: diff --git a/.github/workflows/publish-operator.yaml b/.github/workflows/publish-operator.yaml index 2b4a41e..d9a30b2 100644 --- a/.github/workflows/publish-operator.yaml +++ b/.github/workflows/publish-operator.yaml @@ -12,6 +12,11 @@ jobs: build_push_operator_image: name: Build and push operator image runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files steps: - name: Check out the repository uses: actions/checkout@v2 From c2574fcd2c82adf424656bc4e1f895683fc73b05 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 13:12:56 +0000 Subject: [PATCH 02/12] Try to unblock the benchmark containers --- .github/workflows/publish-benchmark-images.yaml | 1 + images/iperf/Dockerfile | 5 ++--- images/mpi-benchmarks/Dockerfile | 7 +++---- images/perftest/Dockerfile | 5 ++--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish-benchmark-images.yaml b/.github/workflows/publish-benchmark-images.yaml index fda478d..4a45108 100644 --- a/.github/workflows/publish-benchmark-images.yaml +++ b/.github/workflows/publish-benchmark-images.yaml @@ -18,6 +18,7 @@ jobs: packages: write # required for pushing container images security-events: write # required for pushing SARIF files strategy: + fail-fast: false matrix: include: - component: discovery diff --git a/images/iperf/Dockerfile b/images/iperf/Dockerfile index 737afd2..10cb23d 100644 --- a/images/iperf/Dockerfile +++ b/images/iperf/Dockerfile @@ -2,12 +2,11 @@ # Dockerfile for the iperf benchmarks ##### - FROM debian:bookworm-slim -ARG IPERF_VERSION=2.1.8+dfsg-1 RUN apt-get update && \ - apt-get install -y "iperf=$IPERF_VERSION" && \ + apt-get upgrade && \ + apt-get install -y "iperf" && \ rm -rf /var/lib/apt/lists/* EXPOSE 5001 diff --git a/images/mpi-benchmarks/Dockerfile b/images/mpi-benchmarks/Dockerfile index 95764fa..b17696d 100644 --- a/images/mpi-benchmarks/Dockerfile +++ b/images/mpi-benchmarks/Dockerfile @@ -3,20 +3,19 @@ # https://www.intel.com/content/www/us/en/develop/documentation/imb-user-guide/top.html ##### -FROM rockylinux:9.2 +FROM rockylinux:9.5 -ARG MPITESTS_VERSION=5.8 RUN yum install -y \ openssh-clients openssh-server \ rdma-core ucx-ib ucx-rdmacm \ - "mpitests-openmpi-${MPITESTS_VERSION}" && \ + mpitests-openmpi && \ yum clean all -y && \ rm -rf /var/cache # Make sure the MPI binaries are on the PATH ENV OPENMPI_ROOT=/usr/lib64/openmpi ENV PATH=$OPENMPI_ROOT/bin:$PATH -ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib # Install helper scripts COPY ./scripts/* /usr/local/bin diff --git a/images/perftest/Dockerfile b/images/perftest/Dockerfile index 92e677e..b87db63 100644 --- a/images/perftest/Dockerfile +++ b/images/perftest/Dockerfile @@ -2,9 +2,8 @@ # Dockerfile for the RDMA bandwidth and latency benchmarks ##### -FROM rockylinux:9.2 +FROM rockylinux:9.5 -ARG PERFTEST_VERSION=4.5.0.20 -RUN yum install -y "perftest-${PERFTEST_VERSION}" && \ +RUN yum install -y perftest && \ yum clean all -y && \ rm -rf /var/cache From 0628c0ff731b6fde8bf975703e806dad9b1c8db5 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 13:36:34 +0000 Subject: [PATCH 03/12] Fix up the rocky image reference --- images/mpi-benchmarks/Dockerfile | 2 +- images/perftest/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/images/mpi-benchmarks/Dockerfile b/images/mpi-benchmarks/Dockerfile index b17696d..093c319 100644 --- a/images/mpi-benchmarks/Dockerfile +++ b/images/mpi-benchmarks/Dockerfile @@ -3,7 +3,7 @@ # https://www.intel.com/content/www/us/en/develop/documentation/imb-user-guide/top.html ##### -FROM rockylinux:9.5 +FROM quay.io/rockylinux/rockylinux:9.5 RUN yum install -y \ openssh-clients openssh-server \ diff --git a/images/perftest/Dockerfile b/images/perftest/Dockerfile index b87db63..974a2c1 100644 --- a/images/perftest/Dockerfile +++ b/images/perftest/Dockerfile @@ -2,7 +2,7 @@ # Dockerfile for the RDMA bandwidth and latency benchmarks ##### -FROM rockylinux:9.5 +FROM quay.io/rockylinux/rockylinux:9.5 RUN yum install -y perftest && \ yum clean all -y && \ From 5fd5d3d5c1c560debe824eef60d8d43c368f436e Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 13:40:19 +0000 Subject: [PATCH 04/12] Bump the pytorch base image --- images/pytorch-benchmarks/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/images/pytorch-benchmarks/Dockerfile b/images/pytorch-benchmarks/Dockerfile index 7454a19..aba03ea 100644 --- a/images/pytorch-benchmarks/Dockerfile +++ b/images/pytorch-benchmarks/Dockerfile @@ -1,14 +1,14 @@ -FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime +FROM pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime RUN apt update && apt install -y git time RUN git clone https://github.com/pytorch/benchmark WORKDIR /workspace/benchmark # Pin pytorch-benchmark repo version -RUN git reset --hard 6fef32ddaf93a63088b97eb27620fb57ef247521 +RUN git reset --hard a22a2a8309d513c66df995ae27ee48c954b49f66 # List of models here should match PytorchModel enum # in python/perftest/pytorch.py RUN python install.py alexnet resnet50 llama # PyTorch install.py pins numpy=1.21.2 but # this breaks numba so update both here -RUN pip install -U numpy numba \ No newline at end of file +RUN pip install -U numpy numba From c70ab5838f43bf2c2c50d2dc13a886ba00494579 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 13:48:41 +0000 Subject: [PATCH 05/12] Configure dependabot --- .github/dependabot.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f0d8f76 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,28 @@ +--- + +version: 2 + +updates: + # Automatically propose PRs for out-of-date GitHub actions + - package-ecosystem: github-actions + directory: "/" + schedule: + # Check for new versions weekly + interval: weekly + # Update all actions in a single PR + groups: + github-actions: + patterns: ["*"] + labels: + - automation + - gha-update + + # Automatically propose PRs for Python dependencies + - package-ecosystem: pip + directory: "/python" + schedule: + # Check for new versions daily + interval: daily + labels: + - automation + - pip-update From 443dfb2c10a516d33218d7d17495ec966dae7cf8 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 13:55:03 +0000 Subject: [PATCH 06/12] Skip pytouch builds for the moment --- .github/workflows/publish-benchmark-images.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-benchmark-images.yaml b/.github/workflows/publish-benchmark-images.yaml index 4a45108..550ff56 100644 --- a/.github/workflows/publish-benchmark-images.yaml +++ b/.github/workflows/publish-benchmark-images.yaml @@ -27,7 +27,8 @@ jobs: - component: mpi-benchmarks - component: openfoam - component: perftest - - component: pytorch-benchmarks + # TODO - need to fix this build + # - component: pytorch-benchmarks steps: - name: Check out the repository uses: actions/checkout@v2 From b593523b32cf087cfbd3765e2e0c95ab44c35c2d Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 6 Feb 2025 15:32:23 +0000 Subject: [PATCH 07/12] Fix up requirements to pre pydantic2.0 --- python/requirements.txt | 50 ++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/python/requirements.txt b/python/requirements.txt index 8b0c5cb..6252b98 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,28 +1,32 @@ -aiohttp==3.10.11 -aiosignal==1.2.0 -anyio==3.6.1 -async-timeout==4.0.2 -attrs==22.1.0 -certifi==2024.7.4 -charset-normalizer==2.1.1 -click==8.1.3 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.12 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.8.0 +async-timeout==5.0.1 +attrs==25.1.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2 -frozenlist==1.3.1 -h11==0.12.0 -httpcore==0.15.0 -httpx==0.23.0 +frozenlist==1.5.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 idna==3.10 -iso8601==1.0.2 +iso8601==2.1.0 Jinja2==3.1.5 -kopf==1.35.6 +kopf==1.37.4 kube-custom-resource @ git+https://github.com/stackhpc/kube-custom-resource.git@851b1bf25fecdbc180e73494eb77c7899274ee15 -MarkupSafe==2.1.1 -multidict==6.0.2 -pydantic==1.10.13 -python-json-logger==2.0.4 -PyYAML==6.0 -rfc3986==1.5.0 -sniffio==1.3.0 -typing-extensions==4.3.0 -yarl==1.8.1 +MarkupSafe==3.0.2 +multidict==6.1.0 +propcache==0.2.1 +pydantic==1.10.21 +pydantic_core==2.27.2 +python-json-logger==3.2.1 +PyYAML==6.0.2 +rfc3986==2.0.0 +sniffio==1.3.1 +typing_extensions==4.12.2 +yarl==1.18.3 From 3e5b835fdfe1d3bbcdf948bcf668512f71dd5fde Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 10 Feb 2025 10:38:07 +0000 Subject: [PATCH 08/12] Fix up python requirements to older pydantic --- python/Dockerfile | 43 ++++++++++++++++++++++++++++++++--------- python/requirements.txt | 2 +- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/python/Dockerfile b/python/Dockerfile index f48caac..6b4ebf1 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -1,4 +1,34 @@ -FROM python:3.9 +FROM ubuntu:jammy as build-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3.10-venv git -y && \ + rm -rf /var/lib/apt/lists/* + +# build into a venv we can copy across +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY ./requirements.txt /perftest/ +RUN pip install -U pip setuptools +RUN pip install --no-deps --requirement /perftect/requirements.txt + +COPY . /perftest +RUN pip install -e /perftest + +# +# Now the image we run with +# +FROM ubuntu:jammy as run-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3 tini ca-certificates -y && \ + rm -rf /var/lib/apt/lists/* + +# Copy accross the venv +COPY --from=build-image /opt/venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" # Create the user that will be used to run the app ENV APP_UID 1001 @@ -22,14 +52,9 @@ RUN apt-get update && \ # Don't buffer stdout and stderr as it breaks realtime logging ENV PYTHONUNBUFFERED 1 -# Install dependencies -# Doing this separately by copying only the requirements file enables better use of the build cache -COPY ./requirements.txt /perftest/ -RUN pip install --no-deps --requirement /perftest/requirements.txt - -# Install the perftest package -COPY . /perftest -RUN pip install --no-deps -e /perftest +# Make httpx use the system trust roots +# By default, this means we use the CAs from the ca-certificates package +ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt # By default, run the operator using kopf USER $APP_UID diff --git a/python/requirements.txt b/python/requirements.txt index 6252b98..aa1fa46 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,7 +9,7 @@ certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d -easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2 +easykube==0.5.0 frozenlist==1.5.0 h11==0.14.0 httpcore==1.0.7 From acd07f7b9c71aa89a1e3535bd528ef25cb7faef8 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 10 Feb 2025 10:50:50 +0000 Subject: [PATCH 09/12] Revert "Fix up python requirements to older pydantic" This reverts commit 3e5b835fdfe1d3bbcdf948bcf668512f71dd5fde. --- python/Dockerfile | 43 +++++++++-------------------------------- python/requirements.txt | 2 +- 2 files changed, 10 insertions(+), 35 deletions(-) diff --git a/python/Dockerfile b/python/Dockerfile index 6b4ebf1..f48caac 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -1,34 +1,4 @@ -FROM ubuntu:jammy as build-image - -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get install --no-install-recommends python3.10-venv git -y && \ - rm -rf /var/lib/apt/lists/* - -# build into a venv we can copy across -RUN python3 -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" - -COPY ./requirements.txt /perftest/ -RUN pip install -U pip setuptools -RUN pip install --no-deps --requirement /perftect/requirements.txt - -COPY . /perftest -RUN pip install -e /perftest - -# -# Now the image we run with -# -FROM ubuntu:jammy as run-image - -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get install --no-install-recommends python3 tini ca-certificates -y && \ - rm -rf /var/lib/apt/lists/* - -# Copy accross the venv -COPY --from=build-image /opt/venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +FROM python:3.9 # Create the user that will be used to run the app ENV APP_UID 1001 @@ -52,9 +22,14 @@ RUN apt-get update && \ # Don't buffer stdout and stderr as it breaks realtime logging ENV PYTHONUNBUFFERED 1 -# Make httpx use the system trust roots -# By default, this means we use the CAs from the ca-certificates package -ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt +# Install dependencies +# Doing this separately by copying only the requirements file enables better use of the build cache +COPY ./requirements.txt /perftest/ +RUN pip install --no-deps --requirement /perftest/requirements.txt + +# Install the perftest package +COPY . /perftest +RUN pip install --no-deps -e /perftest # By default, run the operator using kopf USER $APP_UID diff --git a/python/requirements.txt b/python/requirements.txt index aa1fa46..6252b98 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,7 +9,7 @@ certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d -easykube==0.5.0 +easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2 frozenlist==1.5.0 h11==0.14.0 httpcore==1.0.7 From e7820f0b0f3cf01dd54b9224e8f3955b3145cae7 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 10 Feb 2025 10:52:03 +0000 Subject: [PATCH 10/12] unpin easykube --- python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements.txt b/python/requirements.txt index 6252b98..aa1fa46 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,7 +9,7 @@ certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d -easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2 +easykube==0.5.0 frozenlist==1.5.0 h11==0.14.0 httpcore==1.0.7 From 590130e2258a48ec17d718a16960c039979f071f Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 10 Feb 2025 10:58:24 +0000 Subject: [PATCH 11/12] Move to two stage docker build for the operator --- python/Dockerfile | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/python/Dockerfile b/python/Dockerfile index f48caac..971e57c 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -1,4 +1,34 @@ -FROM python:3.9 +FROM ubuntu:jammy as build-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3.10-venv git -y && \ + rm -rf /var/lib/apt/lists/* + +# build into a venv we can copy across +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY ./requirements.txt /perftest/ +RUN pip install -U pip setuptools +RUN pip install --no-deps --requirement /perftest/requirements.txt + +COPY . /perftest +RUN pip install -e /perftest + +# +# Now the image we run with +# +FROM ubuntu:jammy as run-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3 tini ca-certificates -y && \ + rm -rf /var/lib/apt/lists/* + +# Copy accross the venv +COPY --from=build-image /opt/venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" # Create the user that will be used to run the app ENV APP_UID 1001 @@ -22,14 +52,9 @@ RUN apt-get update && \ # Don't buffer stdout and stderr as it breaks realtime logging ENV PYTHONUNBUFFERED 1 -# Install dependencies -# Doing this separately by copying only the requirements file enables better use of the build cache -COPY ./requirements.txt /perftest/ -RUN pip install --no-deps --requirement /perftest/requirements.txt - -# Install the perftest package -COPY . /perftest -RUN pip install --no-deps -e /perftest +# Make httpx use the system trust roots +# By default, this means we use the CAs from the ca-certificates package +ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt # By default, run the operator using kopf USER $APP_UID From 33e8f72956d5ddd5f53f6f1fbd724329e55fd364 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 10 Feb 2025 22:06:37 +0000 Subject: [PATCH 12/12] copy source into container --- python/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/Dockerfile b/python/Dockerfile index 971e57c..a051455 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -9,11 +9,9 @@ RUN apt-get update && \ RUN python3 -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" -COPY ./requirements.txt /perftest/ +COPY . /perftest RUN pip install -U pip setuptools RUN pip install --no-deps --requirement /perftest/requirements.txt - -COPY . /perftest RUN pip install -e /perftest # @@ -28,6 +26,8 @@ RUN apt-get update && \ # Copy accross the venv COPY --from=build-image /opt/venv /opt/venv +# Copy code to keep editable install working +COPY . /perftest ENV PATH="/opt/venv/bin:$PATH" # Create the user that will be used to run the app