Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e8c630a
Make container runnable under Apptainer/Singularity
t0mdavid-m May 13, 2026
e6e6e50
fix(docker): make entrypoint apptainer/singularity-compatible
claude May 13, 2026
90d6f28
fix(apptainer): traverse /root, bound redis wait, drop chmod 0777
claude May 13, 2026
02bf8db
fix(apptainer): cd /app before exec to survive WORKDIR drop
claude May 13, 2026
2398ff6
fix(entrypoint): warn when nginx-backed multi-instance is downgraded
claude May 13, 2026
189a94b
ci(apptainer): self-diagnose by streaming instance logs
claude May 13, 2026
dd17f25
fix(apptainer): use `instance run` so the entrypoint actually executes
claude May 15, 2026
b36fb90
Merge pull request #387 from OpenMS/claude/fix-opendiakiosk-apptainer…
t0mdavid-m May 15, 2026
628abc2
fix(singularity): pre-create /workspaces and /mounted-data so :rw bin…
claude May 15, 2026
f8d0106
fix(upload): gate mounted-drive browser on os.path.ismount, not exist…
claude May 15, 2026
2765119
Merge pull request #388 from OpenMS/claude/singularity-bind-mountpoints
t0mdavid-m May 15, 2026
d85c6d8
Merge branch 'main' into claude/fix-opendiakiosk-apptainer-dHZXJ
t0mdavid-m May 15, 2026
9128698
Merge pull request #386 from OpenMS/claude/fix-opendiakiosk-apptainer…
t0mdavid-m May 15, 2026
a40e0c6
fix(apptainer): use unix socket for Redis so host:6379 can't shadow us
claude May 15, 2026
bce2e27
Merge pull request #389 from OpenMS/claude/singularity-bind-mountpoints
t0mdavid-m May 15, 2026
bb86534
Merge remote-tracking branch 'template/main' into singularity_support
t0mdavid-m May 15, 2026
1a17170
ci(apptainer): publish prebuilt SIFs to GHCR via ORAS
claude May 15, 2026
6ca8e97
Merge pull request #390 from OpenMS/claude/singularity-github-hosting…
t0mdavid-m May 15, 2026
f01c16b
Merge remote-tracking branch 'template/main' into singularity_support
t0mdavid-m May 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
319 changes: 300 additions & 19 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,258 @@ jobs:
FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1)
docker tag "$FIRST_TAG" openms-streamlit:test

- name: Save image as tar
run: docker save openms-streamlit:test -o /tmp/image.tar

- name: Upload image artifact
uses: actions/upload-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-image
path: /tmp/image.tar
retention-days: 1

test-apptainer:
# Apptainer/Singularity is the dominant container runtime on HPC clusters.
# It mounts the root filesystem read-only and runs as the host user's UID
# (not root inside the image). The entrypoint must tolerate both: this job
# exercises that contract by running the built image under apptainer and
# waiting for the streamlit /_stcore/health endpoint to come up.
needs: build
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: [full]
steps:
- uses: actions/checkout@v4

- name: Download image artifact
uses: actions/download-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-image
path: /tmp

- name: Install apptainer
uses: eWaterCycle/setup-apptainer@v2
with:
apptainer-version: 1.3.4

- name: Build SIF from docker-archive
run: |
sudo apptainer build /tmp/openms.sif docker-archive:///tmp/image.tar
sudo chmod a+r /tmp/openms.sif

- name: Prepare host bind dirs (mountpoint contract)
run: |
# Host paths we'll bind into the SIF. Asserting writability through
# singularity's bind machinery requires that the destination paths
# exist as real directories in the squashfs (otherwise singularity
# silently degrades the bind to read-only via underlay).
mkdir -p /tmp/host-workspaces /tmp/host-mounted-data
echo "from-host-pretest" > /tmp/host-mounted-data/sentinel.txt

- name: Start apptainer instance (read-only root, host UID, with binds)
run: |
# Default apptainer semantics: read-only root, no --writable-tmpfs.
# This matches how users on HPC clusters run the SIF.
# Use `instance run` (apptainer 1.1+), not `instance start`: the SIF
# was built from docker-archive, which populates %runscript with the
# Docker ENTRYPOINT but leaves %startscript as the default no-op
# `exec "$@"`. `instance start` would launch an empty instance and
# streamlit would never bind 8501.
apptainer instance run \
--bind /tmp/host-workspaces:/workspaces-streamlit-template:rw \
--bind /tmp/host-mounted-data:/mounted-data:ro \
/tmp/openms.sif openms-test
apptainer instance list
# Record where this run's logs will land so subsequent steps can tail
# them deterministically (path depends on hostname/user).
LOG_DIR=$(find "$HOME/.apptainer/instances/logs" -type d -name "$(whoami)" 2>/dev/null | head -n 1)
echo "APPTAINER_LOG_DIR=${LOG_DIR}" >> "$GITHUB_ENV"
ls -la "$LOG_DIR" || true

- name: Wait for streamlit /_stcore/health
run: |
# Tail the entrypoint's stdout/stderr alongside the health probe so
# any startup failure surfaces directly in the CI log (the dedicated
# "Dump entrypoint logs on failure" step is post-mortem only and
# easy to miss in the GH Actions UI).
OUT="${APPTAINER_LOG_DIR}/openms-test.out"
ERR="${APPTAINER_LOG_DIR}/openms-test.err"
for i in $(seq 1 90); do
if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8501/_stcore/health; then
echo "Streamlit is ready after $i attempts"
exit 0
fi
if [ $((i % 5)) -eq 0 ]; then
echo "--- attempt $i: instance log tail ---"
tail -n 20 "$OUT" 2>/dev/null || echo "(no $OUT yet)"
tail -n 10 "$ERR" 2>/dev/null || echo "(no $ERR yet)"
apptainer instance list || true
fi
sleep 2
done
echo "TIMED OUT waiting for streamlit health endpoint"
echo "--- full entrypoint stdout ---"
cat "$OUT" 2>/dev/null || echo "(missing)"
echo "--- full entrypoint stderr ---"
cat "$ERR" 2>/dev/null || echo "(missing)"
exit 1

- name: Verify health endpoint returns 200
run: curl -fsS http://127.0.0.1:8501/_stcore/health

- name: Verify Redis is reachable inside container (full variant)
if: matrix.variant == 'full'
run: |
# In apptainer mode the entrypoint uses a unix socket (TCP 6379 on
# localhost is the host's, since net namespace is shared). The
# entrypoint writes the resolved URL to /tmp/openms-redis-url for
# out-of-band discovery, since `apptainer exec` spawns a fresh
# shell that doesn't inherit the daemon's exported env.
URL=$(apptainer exec instance://openms-test cat /tmp/openms-redis-url 2>/dev/null || true)
case "$URL" in
unix://*)
SOCK="${URL#unix://}"
echo "Redis URL is unix socket: $SOCK"
apptainer exec instance://openms-test redis-cli -s "$SOCK" ping | grep -i pong
;;
*)
echo "Redis URL is TCP (or unset): ${URL:-default}"
apptainer exec instance://openms-test redis-cli ping | grep -i pong
;;
esac

- name: Verify bind mount is writable (workspaces) and readable (data)
run: |
# The whole point of pre-creating /workspaces-streamlit-template
# and /mounted-data in the image: singularity now has a real
# attach point and `:rw` actually sticks. Without the mkdir,
# `apptainer exec ... touch` here would fail with EROFS.
apptainer exec instance://openms-test sh -c \
'echo from-container > /workspaces-streamlit-template/probe.txt'
test -f /tmp/host-workspaces/probe.txt
grep -q from-container /tmp/host-workspaces/probe.txt
# Read-only data mount should also be visible inside the container.
apptainer exec instance://openms-test grep -q from-host-pretest /mounted-data/sentinel.txt
# The mounted-drive browser uses os.path.ismount() to gate
# rendering (existence is no longer enough now that the image
# pre-creates the dir). Assert the kernel reports both paths as
# real mount points so the detection function returns truthy.
apptainer exec instance://openms-test python3 -c "
import os, sys
for p in ('/mounted-data', '/workspaces-streamlit-template'):
assert os.path.ismount(p), f'{p} not reported as mount point'
print(f'ismount({p}) = True')
"

- name: Dump entrypoint logs on failure
if: failure()
run: |
echo "--- apptainer instance list ---"
apptainer instance list || true
echo "--- apptainer instance logs ---"
find "$HOME/.apptainer" \( -name '*.out' -o -name '*.err' \) 2>/dev/null \
| while read -r f; do echo "=== $f ==="; cat "$f"; done || true

- name: Stop apptainer instance
if: always()
run: apptainer instance stop openms-test || true

- name: Upload validated SIF artifact (push events only)
if: success() && github.event_name != 'pull_request'
uses: actions/upload-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-sif
path: /tmp/openms.sif
retention-days: 1
if-no-files-found: error

publish-apptainer:
# Publish the validated SIF (already health-checked above) to GHCR as an
# OCI artifact via ORAS, in a sibling package: ghcr.io/<owner>/<repo>/sif.
# Keeping it separate from the docker image package keeps tag lists clean
# and lets HPC users `apptainer pull oras://...` without the 5-15 min
# on-the-fly OCI->SIF conversion the docker:// path requires.
needs: test-apptainer
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
variant: [full, simple]
steps:
- name: Download validated SIF artifact
uses: actions/download-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-sif
path: /tmp

- name: Install apptainer
uses: eWaterCycle/setup-apptainer@v2
with:
apptainer-version: 1.3.4

- name: Compute SIF tags
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/sif
tags: |
type=ref,event=branch,suffix=-${{ matrix.variant }}
type=ref,event=tag,suffix=-${{ matrix.variant }}
type=sha,prefix=,suffix=-${{ matrix.variant }}
type=raw,value=latest,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }}

- name: Log in to GHCR for ORAS push
env:
GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# apptainer reads its auth from ~/.apptainer/remote.yaml, NOT from
# ~/.docker/config.json — so docker/login-action won't work here.
# Login and push must both run as the runner user (no sudo) so they
# share the same $HOME and therefore the same auth file.
echo "$GHCR_TOKEN" | apptainer registry login \
--username "${{ github.actor }}" \
--password-stdin \
oras://ghcr.io

- name: Push SIF to each computed tag
run: |
# `apptainer push` accepts ONE destination per invocation; iterate
# over the newline-separated tag list from docker/metadata-action.
# tr lowercase is belt-and-braces — metadata-action already
# lowercases, but GHCR is strict about case in OCI refs.
set -euo pipefail
while IFS= read -r tag; do
[ -z "$tag" ] && continue
tag_lc="$(echo "$tag" | tr '[:upper:]' '[:lower:]')"
echo "Pushing SIF to oras://${tag_lc}"
apptainer push /tmp/openms.sif "oras://${tag_lc}"
done <<< "${{ steps.meta.outputs.tags }}"

test-nginx:
needs: build
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: [full]
steps:
- uses: actions/checkout@v4

- name: Download image artifact
uses: actions/download-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-image
path: /tmp

- name: Load image into local docker
run: docker load -i /tmp/image.tar

- name: Create kind cluster
uses: helm/kind-action@v1
with:
Expand Down Expand Up @@ -129,47 +381,62 @@ jobs:
sleep "${i}0"
done

- name: Discover overlay identity
run: |
SLUG=$(yq '.commonLabels.app' k8s/overlays/prod/kustomization.yaml)
echo "SLUG=$SLUG" >> "$GITHUB_ENV"

- name: Wait for Redis to be ready
run: |
kubectl wait -n openms --for=condition=ready pod -l app=opendiakiosk,component=redis --timeout=60s
kubectl wait -n openms --for=condition=ready pod -l app=${SLUG},component=redis --timeout=60s

- name: Verify Redis Service is reachable
run: |
kubectl run redis-test -n openms --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h opendiakiosk-redis.openms.svc.cluster.local ping
kubectl run redis-test -n openms --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h ${SLUG}-redis.openms.svc.cluster.local ping

- name: Verify all deployments are available
run: |
kubectl wait -n openms --for=condition=available deployment -l app=opendiakiosk --timeout=120s || true
kubectl get pods -n openms -l app=opendiakiosk
kubectl get services -n openms -l app=opendiakiosk
kubectl wait -n openms --for=condition=available deployment -l app=${SLUG} --timeout=180s || true
kubectl get pods -n openms -l app=${SLUG}
kubectl get services -n openms -l app=${SLUG}

- name: Curl both hostnames via nginx ingress
run: |
NGINX_POD=$(kubectl -n ingress-nginx get pod -l app.kubernetes.io/component=controller -o name | head -n 1)
kubectl -n ingress-nginx port-forward "$NGINX_POD" 8080:80 &
PF_PID=$!
trap 'kill "$PF_PID" 2>/dev/null || true' EXIT
for i in 1 2 3 4 5; do
for i in $(seq 1 30); do
sleep 2
if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8080/_stcore/health -H "Host: streamlit.openms.example.de"; then
break
fi
echo "port-forward not ready yet, retry $i"
echo "port-forward / app not ready yet, retry $i"
done
for host in streamlit.openms.example.de streamlit.openms.example.org; do
curl -fsS --resolve "$host:8080:127.0.0.1" "http://$host:8080/_stcore/health"
echo ""
echo "$host -> 200 OK"
done

traefik-integration:
needs: lint-manifests
test-traefik:
needs: build
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: [full]
steps:
- uses: actions/checkout@v4

- name: Build image (simple variant; routing is image-agnostic)
run: docker build -t openms-streamlit:test -f Dockerfile .
- name: Download image artifact
uses: actions/download-artifact@v4
with:
name: openms-streamlit-${{ matrix.variant }}-image
path: /tmp

- name: Load image into local docker
run: docker load -i /tmp/image.tar

- name: Create kind cluster
uses: helm/kind-action@v1
Expand Down Expand Up @@ -206,25 +473,39 @@ jobs:
sleep "${i}0"
done

- name: Wait for Redis and deployments to be ready
- name: Discover overlay identity
run: |
SLUG=$(yq '.commonLabels.app' k8s/overlays/prod/kustomization.yaml)
TRAEFIK_HOSTS=$(kubectl kustomize k8s/overlays/prod/ \
| yq 'select(.kind == "IngressRoute") | .spec.routes[0].match' \
| grep -oP "Host\(\`\K[^\`]+" | tr '\n' ' ')
echo "SLUG=$SLUG" >> "$GITHUB_ENV"
echo "TRAEFIK_HOSTS=$TRAEFIK_HOSTS" >> "$GITHUB_ENV"

- name: Wait for Redis to be ready
run: |
kubectl wait -n openms --for=condition=ready pod -l app=opendiakiosk,component=redis --timeout=60s
kubectl wait -n openms --for=condition=available deployment -l app=opendiakiosk --timeout=120s
kubectl get pods -n openms -l app=opendiakiosk
kubectl wait -n openms --for=condition=ready pod -l app=${SLUG},component=redis --timeout=60s

- name: Verify all deployments are available
run: |
kubectl wait -n openms --for=condition=available deployment -l app=${SLUG} --timeout=180s || true
kubectl get pods -n openms -l app=${SLUG}
kubectl get services -n openms -l app=${SLUG}

- name: Curl both hostnames via Traefik
run: |
kubectl -n traefik port-forward svc/traefik 8080:80 &
PF_PID=$!
trap 'kill "$PF_PID" 2>/dev/null || true' EXIT
for i in 1 2 3 4 5; do
FIRST_HOST=$(echo ${TRAEFIK_HOSTS} | awk '{print $1}')
for i in $(seq 1 30); do
sleep 2
if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8080/_stcore/health -H "Host: template.webapps.openms.de"; then
if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8080/_stcore/health -H "Host: ${FIRST_HOST}"; then
break
fi
echo "port-forward not ready yet, retry $i"
echo "port-forward / app not ready yet, retry $i"
done
for host in template.webapps.openms.de template.webapps.openms.org; do
for host in ${TRAEFIK_HOSTS}; do
curl -fsS --resolve "$host:8080:127.0.0.1" "http://$host:8080/_stcore/health"
echo ""
echo "$host -> 200 OK"
Expand Down
Loading
Loading