Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---

version: 2

updates:
# Automatically propose PRs for out-of-date GitHub actions
- package-ecosystem: github-actions
directory: "/"
schedule:
# Check for new versions weekly
interval: weekly
# Update all actions in a single PR
groups:
github-actions:
patterns: ["*"]
labels:
- automation
- gha-update

# Automatically propose PRs for Python dependencies
- package-ecosystem: pip
directory: "/python"
schedule:
# Check for new versions daily
interval: daily
labels:
- automation
- pip-update
9 changes: 8 additions & 1 deletion .github/workflows/publish-benchmark-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ jobs:
build_push_images:
name: Build and push benchmark images
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write # needed for signing the images with GitHub OIDC Token
packages: write # required for pushing container images
security-events: write # required for pushing SARIF files
strategy:
fail-fast: false
matrix:
include:
- component: discovery
Expand All @@ -21,7 +27,8 @@ jobs:
- component: mpi-benchmarks
- component: openfoam
- component: perftest
- component: pytorch-benchmarks
# TODO - need to fix this build
# - component: pytorch-benchmarks
steps:
- name: Check out the repository
uses: actions/checkout@v2
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/publish-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ jobs:
build_push_operator_image:
name: Build and push operator image
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write # needed for signing the images with GitHub OIDC Token
packages: write # required for pushing container images
security-events: write # required for pushing SARIF files
steps:
- name: Check out the repository
uses: actions/checkout@v2
Expand Down
5 changes: 2 additions & 3 deletions images/iperf/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
# Dockerfile for the iperf benchmarks
#####


FROM debian:bookworm-slim

ARG IPERF_VERSION=2.1.8+dfsg-1
RUN apt-get update && \
apt-get install -y "iperf=$IPERF_VERSION" && \
apt-get upgrade && \
apt-get install -y "iperf" && \
rm -rf /var/lib/apt/lists/*

EXPOSE 5001
7 changes: 3 additions & 4 deletions images/mpi-benchmarks/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
# https://www.intel.com/content/www/us/en/develop/documentation/imb-user-guide/top.html
#####

FROM rockylinux:9.2
FROM quay.io/rockylinux/rockylinux:9.5

ARG MPITESTS_VERSION=5.8
RUN yum install -y \
openssh-clients openssh-server \
rdma-core ucx-ib ucx-rdmacm \
"mpitests-openmpi-${MPITESTS_VERSION}" && \
mpitests-openmpi && \
yum clean all -y && \
rm -rf /var/cache

# Make sure the MPI binaries are on the PATH
ENV OPENMPI_ROOT=/usr/lib64/openmpi
ENV PATH=$OPENMPI_ROOT/bin:$PATH
ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib:$LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib

# Install helper scripts
COPY ./scripts/* /usr/local/bin
5 changes: 2 additions & 3 deletions images/perftest/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# Dockerfile for the RDMA bandwidth and latency benchmarks
#####

FROM rockylinux:9.2
FROM quay.io/rockylinux/rockylinux:9.5

ARG PERFTEST_VERSION=4.5.0.20
RUN yum install -y "perftest-${PERFTEST_VERSION}" && \
RUN yum install -y perftest && \
yum clean all -y && \
rm -rf /var/cache
6 changes: 3 additions & 3 deletions images/pytorch-benchmarks/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
FROM pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime

RUN apt update && apt install -y git time
RUN git clone https://github.com/pytorch/benchmark
WORKDIR /workspace/benchmark
# Pin pytorch-benchmark repo version
RUN git reset --hard 6fef32ddaf93a63088b97eb27620fb57ef247521
RUN git reset --hard a22a2a8309d513c66df995ae27ee48c954b49f66
# List of models here should match PytorchModel enum
# in python/perftest/pytorch.py
RUN python install.py alexnet resnet50 llama

# PyTorch install.py pins numpy=1.21.2 but
# this breaks numba so update both here
RUN pip install -U numpy numba
RUN pip install -U numpy numba
43 changes: 34 additions & 9 deletions python/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,34 @@
FROM python:3.9
FROM ubuntu:jammy as build-image

RUN apt-get update && \
apt-get upgrade -y && \
apt-get install --no-install-recommends python3.10-venv git -y && \
rm -rf /var/lib/apt/lists/*

# build into a venv we can copy across
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

COPY . /perftest
RUN pip install -U pip setuptools
RUN pip install --no-deps --requirement /perftest/requirements.txt
RUN pip install -e /perftest

#
# Now the image we run with
#
FROM ubuntu:jammy as run-image

RUN apt-get update && \
apt-get upgrade -y && \
apt-get install --no-install-recommends python3 tini ca-certificates -y && \
rm -rf /var/lib/apt/lists/*

# Copy accross the venv
COPY --from=build-image /opt/venv /opt/venv
# Copy code to keep editable install working
COPY . /perftest
ENV PATH="/opt/venv/bin:$PATH"

# Create the user that will be used to run the app
ENV APP_UID 1001
Expand All @@ -22,14 +52,9 @@ RUN apt-get update && \
# Don't buffer stdout and stderr as it breaks realtime logging
ENV PYTHONUNBUFFERED 1

# Install dependencies
# Doing this separately by copying only the requirements file enables better use of the build cache
COPY ./requirements.txt /perftest/
RUN pip install --no-deps --requirement /perftest/requirements.txt

# Install the perftest package
COPY . /perftest
RUN pip install --no-deps -e /perftest
# Make httpx use the system trust roots
# By default, this means we use the CAs from the ca-certificates package
ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt

# By default, run the operator using kopf
USER $APP_UID
Expand Down
52 changes: 28 additions & 24 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,32 @@
aiohttp==3.10.11
aiosignal==1.2.0
anyio==3.6.1
async-timeout==4.0.2
attrs==22.1.0
certifi==2024.7.4
charset-normalizer==2.1.1
click==8.1.3
aiohappyeyeballs==2.4.4
aiohttp==3.11.12
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.8.0
async-timeout==5.0.1
attrs==25.1.0
certifi==2025.1.31
charset-normalizer==3.4.1
click==8.1.8
configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d
easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2
frozenlist==1.3.1
h11==0.12.0
httpcore==0.15.0
httpx==0.23.0
easykube==0.5.0
frozenlist==1.5.0
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1
idna==3.10
iso8601==1.0.2
iso8601==2.1.0
Jinja2==3.1.5
kopf==1.35.6
kopf==1.37.4
kube-custom-resource @ git+https://github.com/stackhpc/kube-custom-resource.git@851b1bf25fecdbc180e73494eb77c7899274ee15
MarkupSafe==2.1.1
multidict==6.0.2
pydantic==1.10.13
python-json-logger==2.0.4
PyYAML==6.0
rfc3986==1.5.0
sniffio==1.3.0
typing-extensions==4.3.0
yarl==1.8.1
MarkupSafe==3.0.2
multidict==6.1.0
propcache==0.2.1
pydantic==1.10.21
pydantic_core==2.27.2
python-json-logger==3.2.1
PyYAML==6.0.2
rfc3986==2.0.0
sniffio==1.3.1
typing_extensions==4.12.2
yarl==1.18.3
Loading