diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml index 9bfc5230a..f76bd51a9 100644 --- a/.github/workflows/nightly-cleanup.yml +++ b/.github/workflows/nightly-cleanup.yml @@ -1,17 +1,8 @@ name: Cleanup CI clusters on: workflow_dispatch: - inputs: - ci_cloud: - description: 'Select the CI_CLOUD' - required: true - type: choice - options: - - LEAFCLOUD - - SMS - - ARCUS schedule: - - cron: '0 20 * * *' # Run at 8PM - image sync runs at midnight + - cron: '0 21 * * *' # Run at 9PM - image sync runs at midnight jobs: ci_cleanup: @@ -52,20 +43,35 @@ jobs: - name: Find CI clusters run: | . venv/bin/activate - CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq) - echo "ci_clusters=${CI_CLUSTERS}" >> GITHUB_ENV + CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq || true) + echo "DEBUG: Raw CI clusters: $CI_CLUSTERS" + + if [[ -z "$CI_CLUSTERS" ]]; then + echo "No matching CI clusters found." + else + # Flatten multiline value so can be passed as env var + CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//') + echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED" + echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> $GITHUB_ENV + fi shell: bash - name: Delete clusters if control node not tagged with keep run: | . venv/bin/activate - for cluster_prefix in ${CI_CLUSTERS} + if [[ -z ${ci_clusters} ]]; then + echo "No clusters to delete." + exit 0 + fi + + for cluster_prefix in ${ci_clusters} do + echo "Processing cluster: $cluster_prefix" TAGS=$(openstack server show ${cluster_prefix}-control --column tags --format value) if [[ $TAGS =~ "keep" ]]; then echo "Skipping ${cluster_prefix} - control instance is tagged as keep" else - yes | ./dev/delete-cluster.py ${cluster_prefix} + ./dev/delete-cluster.py ${cluster_prefix} --force fi done shell: bash diff --git a/dev/delete-cluster.py b/dev/delete-cluster.py index 861396efd..05f53fbfa 100755 --- a/dev/delete-cluster.py +++ b/dev/delete-cluster.py @@ -4,18 +4,18 @@ Delete infrastructure for a cluster without using Terraform. Useful for CI clusters. Usage: - delete-cluster.py PREFIX + delete-cluster.py PREFIX [--force] Where PREFIX is the string at the start of the resource's names. -It will list matching resources and prompt to confirm deletion. +If --force is provided, it will delete all resources without confirmation. """ -import sys, json, subprocess, pprint +import sys, json, subprocess CLUSTER_RESOURCES = ['server', 'port', 'volume'] -def delete_cluster(cluster_prefix): +def delete_cluster(cluster_prefix, force=False): to_delete = {} for resource_type in CLUSTER_RESOURCES: to_delete[resource_type] = [] @@ -29,7 +29,8 @@ def delete_cluster(cluster_prefix): except: print(resource_type, item) raise - if input('Delete these (y/n)?:') == 'y': + + if force or input('Delete these (y/n)?:') == 'y': for resource_type in CLUSTER_RESOURCES: items = [v['ID'] for v in to_delete[resource_type]] if items: @@ -40,7 +41,10 @@ def delete_cluster(cluster_prefix): print('Cancelled - no resources deleted') if __name__ == '__main__': - if len(sys.argv) != 2: + if len(sys.argv) < 2 or len(sys.argv) > 3: print('ERROR: Incorrect argument(s).\n' + __doc__) exit(1) - delete_cluster(sys.argv[1]) + force_flag = '--force' in sys.argv + cluster_prefix = sys.argv[1] + delete_cluster(cluster_prefix, force_flag) +