diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f0d8f76 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,28 @@ +--- + +version: 2 + +updates: + # Automatically propose PRs for out-of-date GitHub actions + - package-ecosystem: github-actions + directory: "/" + schedule: + # Check for new versions weekly + interval: weekly + # Update all actions in a single PR + groups: + github-actions: + patterns: ["*"] + labels: + - automation + - gha-update + + # Automatically propose PRs for Python dependencies + - package-ecosystem: pip + directory: "/python" + schedule: + # Check for new versions daily + interval: daily + labels: + - automation + - pip-update diff --git a/.github/workflows/publish-benchmark-images.yaml b/.github/workflows/publish-benchmark-images.yaml index 20fbede..550ff56 100644 --- a/.github/workflows/publish-benchmark-images.yaml +++ b/.github/workflows/publish-benchmark-images.yaml @@ -12,7 +12,13 @@ jobs: build_push_images: name: Build and push benchmark images runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files strategy: + fail-fast: false matrix: include: - component: discovery @@ -21,7 +27,8 @@ jobs: - component: mpi-benchmarks - component: openfoam - component: perftest - - component: pytorch-benchmarks + # TODO - need to fix this build + # - component: pytorch-benchmarks steps: - name: Check out the repository uses: actions/checkout@v2 diff --git a/.github/workflows/publish-operator.yaml b/.github/workflows/publish-operator.yaml index 2b4a41e..d9a30b2 100644 --- a/.github/workflows/publish-operator.yaml +++ b/.github/workflows/publish-operator.yaml @@ -12,6 +12,11 @@ jobs: build_push_operator_image: name: Build and push operator image runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files steps: - name: Check out the repository uses: actions/checkout@v2 diff --git a/images/iperf/Dockerfile b/images/iperf/Dockerfile index 737afd2..10cb23d 100644 --- a/images/iperf/Dockerfile +++ b/images/iperf/Dockerfile @@ -2,12 +2,11 @@ # Dockerfile for the iperf benchmarks ##### - FROM debian:bookworm-slim -ARG IPERF_VERSION=2.1.8+dfsg-1 RUN apt-get update && \ - apt-get install -y "iperf=$IPERF_VERSION" && \ + apt-get upgrade && \ + apt-get install -y "iperf" && \ rm -rf /var/lib/apt/lists/* EXPOSE 5001 diff --git a/images/mpi-benchmarks/Dockerfile b/images/mpi-benchmarks/Dockerfile index 95764fa..093c319 100644 --- a/images/mpi-benchmarks/Dockerfile +++ b/images/mpi-benchmarks/Dockerfile @@ -3,20 +3,19 @@ # https://www.intel.com/content/www/us/en/develop/documentation/imb-user-guide/top.html ##### -FROM rockylinux:9.2 +FROM quay.io/rockylinux/rockylinux:9.5 -ARG MPITESTS_VERSION=5.8 RUN yum install -y \ openssh-clients openssh-server \ rdma-core ucx-ib ucx-rdmacm \ - "mpitests-openmpi-${MPITESTS_VERSION}" && \ + mpitests-openmpi && \ yum clean all -y && \ rm -rf /var/cache # Make sure the MPI binaries are on the PATH ENV OPENMPI_ROOT=/usr/lib64/openmpi ENV PATH=$OPENMPI_ROOT/bin:$PATH -ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib # Install helper scripts COPY ./scripts/* /usr/local/bin diff --git a/images/perftest/Dockerfile b/images/perftest/Dockerfile index 92e677e..974a2c1 100644 --- a/images/perftest/Dockerfile +++ b/images/perftest/Dockerfile @@ -2,9 +2,8 @@ # Dockerfile for the RDMA bandwidth and latency benchmarks ##### -FROM rockylinux:9.2 +FROM quay.io/rockylinux/rockylinux:9.5 -ARG PERFTEST_VERSION=4.5.0.20 -RUN yum install -y "perftest-${PERFTEST_VERSION}" && \ +RUN yum install -y perftest && \ yum clean all -y && \ rm -rf /var/cache diff --git a/images/pytorch-benchmarks/Dockerfile b/images/pytorch-benchmarks/Dockerfile index 7454a19..aba03ea 100644 --- a/images/pytorch-benchmarks/Dockerfile +++ b/images/pytorch-benchmarks/Dockerfile @@ -1,14 +1,14 @@ -FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime +FROM pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime RUN apt update && apt install -y git time RUN git clone https://github.com/pytorch/benchmark WORKDIR /workspace/benchmark # Pin pytorch-benchmark repo version -RUN git reset --hard 6fef32ddaf93a63088b97eb27620fb57ef247521 +RUN git reset --hard a22a2a8309d513c66df995ae27ee48c954b49f66 # List of models here should match PytorchModel enum # in python/perftest/pytorch.py RUN python install.py alexnet resnet50 llama # PyTorch install.py pins numpy=1.21.2 but # this breaks numba so update both here -RUN pip install -U numpy numba \ No newline at end of file +RUN pip install -U numpy numba diff --git a/python/Dockerfile b/python/Dockerfile index f48caac..a051455 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -1,4 +1,34 @@ -FROM python:3.9 +FROM ubuntu:jammy as build-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3.10-venv git -y && \ + rm -rf /var/lib/apt/lists/* + +# build into a venv we can copy across +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY . /perftest +RUN pip install -U pip setuptools +RUN pip install --no-deps --requirement /perftest/requirements.txt +RUN pip install -e /perftest + +# +# Now the image we run with +# +FROM ubuntu:jammy as run-image + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install --no-install-recommends python3 tini ca-certificates -y && \ + rm -rf /var/lib/apt/lists/* + +# Copy accross the venv +COPY --from=build-image /opt/venv /opt/venv +# Copy code to keep editable install working +COPY . /perftest +ENV PATH="/opt/venv/bin:$PATH" # Create the user that will be used to run the app ENV APP_UID 1001 @@ -22,14 +52,9 @@ RUN apt-get update && \ # Don't buffer stdout and stderr as it breaks realtime logging ENV PYTHONUNBUFFERED 1 -# Install dependencies -# Doing this separately by copying only the requirements file enables better use of the build cache -COPY ./requirements.txt /perftest/ -RUN pip install --no-deps --requirement /perftest/requirements.txt - -# Install the perftest package -COPY . /perftest -RUN pip install --no-deps -e /perftest +# Make httpx use the system trust roots +# By default, this means we use the CAs from the ca-certificates package +ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt # By default, run the operator using kopf USER $APP_UID diff --git a/python/requirements.txt b/python/requirements.txt index 8b0c5cb..aa1fa46 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,28 +1,32 @@ -aiohttp==3.10.11 -aiosignal==1.2.0 -anyio==3.6.1 -async-timeout==4.0.2 -attrs==22.1.0 -certifi==2024.7.4 -charset-normalizer==2.1.1 -click==8.1.3 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.12 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.8.0 +async-timeout==5.0.1 +attrs==25.1.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d -easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2 -frozenlist==1.3.1 -h11==0.12.0 -httpcore==0.15.0 -httpx==0.23.0 +easykube==0.5.0 +frozenlist==1.5.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 idna==3.10 -iso8601==1.0.2 +iso8601==2.1.0 Jinja2==3.1.5 -kopf==1.35.6 +kopf==1.37.4 kube-custom-resource @ git+https://github.com/stackhpc/kube-custom-resource.git@851b1bf25fecdbc180e73494eb77c7899274ee15 -MarkupSafe==2.1.1 -multidict==6.0.2 -pydantic==1.10.13 -python-json-logger==2.0.4 -PyYAML==6.0 -rfc3986==1.5.0 -sniffio==1.3.0 -typing-extensions==4.3.0 -yarl==1.8.1 +MarkupSafe==3.0.2 +multidict==6.1.0 +propcache==0.2.1 +pydantic==1.10.21 +pydantic_core==2.27.2 +python-json-logger==3.2.1 +PyYAML==6.0.2 +rfc3986==2.0.0 +sniffio==1.3.1 +typing_extensions==4.12.2 +yarl==1.18.3