Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 6 additions & 42 deletions .github/workflows/nightly-cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,53 +40,17 @@ jobs:
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
shell: bash

- name: Find CI clusters
- name: Delete all CI clusters
run: |
. venv/bin/activate
CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq || true)
echo "DEBUG: Raw CI clusters: $CI_CLUSTERS"

if [[ -z "$CI_CLUSTERS" ]]; then
echo "No matching CI clusters found."
else
# Flatten multiline value so can be passed as env var
CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//')
echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED"
echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> $GITHUB_ENV
fi
./dev/delete-cluster.py slurmci-RL --force
shell: bash
- name: Delete CI clusters

- name: Delete all CI build VMs
run: |
. venv/bin/activate
if [[ -z ${ci_clusters} ]]; then
echo "No clusters to delete."
exit 0
fi

for cluster_prefix in ${ci_clusters}
for build_vm in $(openstack server list -c Name -f value | grep openhpc-extra-RL)
do
echo "Processing cluster: $cluster_prefix"

# Get all servers with the matching name for control node
CONTROL_SERVERS=$(openstack server list --name ${cluster_prefix}-control --format json)

# Get unique server names to avoid duplicate cleanup
UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq)
for name in $UNIQUE_NAMES; do
echo "Deleting cluster with control node: $name"

# Get the first matching server ID by name
server=$(echo "$CONTROL_SERVERS" | jq -r '.[] | select(.Name=="'"$name"'") | .ID' | head -n1)

# Make sure server still exists (wasn't deleted earlier)
if ! openstack server show "$server" &>/dev/null; then
echo "Server $server no longer exists, skipping $name."
continue
fi

echo "Deleting cluster $cluster_prefix (server $server)..."
./dev/delete-cluster.py $cluster_prefix --force
done
openstack server delete $build_vm
done
shell: bash
Loading