-
Notifications
You must be signed in to change notification settings - Fork 0
257 lines (217 loc) · 8.42 KB
/
test-baseline-clusters.yml
File metadata and controls
257 lines (217 loc) · 8.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
name: Test Baseline Clusters
on:
push:
branches:
- main
paths:
- 'src/**'
- 'manifests/*.yaml'
- 'tests/**'
- '.github/workflows/test-baseline-clusters.yml'
- 'pyproject.toml'
pull_request:
paths:
- 'src/**'
- 'manifests/*.yaml'
- 'tests/**'
- 'pyproject.toml'
workflow_dispatch:
inputs:
k8s-version:
description: 'Specific K8s version to test (leave empty for all)'
required: false
test-suite:
description: 'Test suite to run'
required: false
default: 'functional-basic'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false
jobs:
# Discover manifests dynamically based on trigger and ci-skip flags
discover-manifests:
runs-on: ubuntu-latest
outputs:
manifests: ${{ steps.discover.outputs.manifests }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Discover manifest files
id: discover
shell: bash
run: |
# Filter manifests based on event type and ci-skip-on-pr label
# On PRs (pull_request event OR push to PR branch): skip manifests with ci-skip-on-pr: "true"
# On other events: include all manifests
manifests="[]"
# Check if this is a PR-related event
IS_PR=false
if [ "${{ github.event_name }}" = "pull_request" ]; then
IS_PR=true
echo "Detected pull_request event"
elif [ "${{ github.head_ref }}" != "" ]; then
IS_PR=true
echo "Detected push to PR branch: ${{ github.head_ref }}"
fi
if [ "$IS_PR" = "true" ]; then
echo "PR context: Including manifests without ci-skip-on-pr: true plus any changed manifests"
# Base set: manifests that don't have ci-skip-on-pr: "true"
base_manifests=$(find manifests/ -name "*.yaml" -type f | while read -r file; do
if ! grep -q 'ci-skip-on-pr.*\:.*"true"' "$file" && ! grep -q "ci-skip-on-pr.*\:.*'true'" "$file"; then
echo "$file"
fi
done | sort)
# Changed manifests in this PR (always included)
changed_manifests=""
if [ "${{ github.event_name }}" = "pull_request" ]; then
changed_manifests=$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" -- 'manifests/*.yaml' | sort)
fi
manifests=$(printf "%s\n%s\n" "$base_manifests" "$changed_manifests" \
| sed '/^$/d' \
| sort -u \
| jq -R -s -c 'split("\n")[:-1] | map({"manifest-file": .})')
else
echo "Non-PR context: Including all manifests"
manifests=$(find manifests/ -name "*.yaml" -type f | sort | jq -R -s -c 'split("\n")[:-1] | map({"manifest-file": .})')
fi
echo "manifests=$manifests" >> $GITHUB_OUTPUT
echo "Discovered manifests: $manifests"
# Matrix configuration for baseline cluster testing
test-matrix:
needs: discover-manifests
runs-on: ubuntu-latest
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
include: ${{ fromJson(needs.discover-manifests.outputs.manifests) }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Astral UV
uses: astral-sh/setup-uv@v7.2.1
with:
python-version: "3.12"
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: ">=1.22"
- name: Install Spread
run: go install github.com/snapcore/spread/cmd/spread@latest
- name: Setup LXD
uses: canonical/setup-lxd@main
- name: Install kube-galaxy
shell: bash
run: |
uv tool install -e .
kube-galaxy --version
- name: Display Cluster Configuration
shell: bash
run: |
echo "## Test Configuration"
echo "- **Manifest**: ${{ matrix.manifest-file }}"
echo "- **Runner**: ubuntu-latest"
echo "- **Timeout**: 90 minutes"
- name: Setup Cluster
id: setup-cluster
shell: bash
run: |
# Create sanitized artifact name (replace / with -) before setup
ARTIFACT_NAME=$(echo "${{ matrix.manifest-file }}" | sed 's/\//-/g')
echo "artifact-name=$ARTIFACT_NAME" >> $GITHUB_OUTPUT
kube-galaxy setup ${{ matrix.manifest-file }}
- name: Verify Cluster Health
shell: bash
run: |
kube-galaxy status --wait --timeout 300
- name: Run Spread Tests
id: run-tests
shell: bash
run: |
kube-galaxy test ${{ matrix.manifest-file }}
- name: Upload Test Results
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results-${{ steps.setup-cluster.outputs.artifact-name }}
path: logs/
retention-days: 30
if-no-files-found: warn
- name: Collect Kubernetes Logs
if: failure()
shell: bash
run: |
mkdir -p debug-logs
echo "Collecting debug information..."
# Pods and their logs
kubectl get pods -A -o wide > debug-logs/pods.txt 2>&1 || true
for ns in $(kubectl get ns -o jsonpath='{.items[*].metadata.name}'); do
for pod in $(kubectl get pods -n $ns -o jsonpath='{.items[*].metadata.name}'); do
mkdir -p "debug-logs/$ns/$pod"
kubectl logs -n $ns $pod --all-containers=true > "debug-logs/$ns/$pod/logs.txt" 2>&1 || true
kubectl describe pod -n $ns $pod > "debug-logs/$ns/$pod/describe.txt" 2>&1 || true
done
done
# Node information
kubectl get nodes -o wide > debug-logs/nodes.txt 2>&1 || true
kubectl describe nodes > debug-logs/nodes-describe.txt 2>&1 || true
# Cluster events
kubectl get events -A --sort-by='.lastTimestamp' > debug-logs/events.txt 2>&1 || true
# System pod status
kubectl get pods -n kube-system -o wide > debug-logs/system-pods.txt 2>&1 || true
# API server logs
kubectl logs -n kube-system -l component=kube-apiserver --tail=500 > debug-logs/apiserver.txt 2>&1 || true
echo "✅ Debug logs collected"
- name: Upload Debug Logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: debug-logs-${{ steps.setup-cluster.outputs.artifact-name || hashFiles(matrix.manifest-file) }}
path: debug-logs/
retention-days: 30
- name: Setup upterm session for debugging
if: failure() && github.event_name == 'pull_request'
uses: owenthereal/action-upterm@v1
timeout-minutes: 5
with:
limit-access-to-actor: true
- name: Create Issue on Failure
if: failure() && github.event_name != 'pull_request'
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Create a simple issue on failure (without labels to avoid errors if labels don't exist)
gh issue create \
--title "[TEST FAILURE] Manifest ${{ matrix.manifest-file }} - Test #${{ github.run_number }}" \
--body "Cluster test failed for manifest: \`${{ matrix.manifest-file }}\`
Test Suite: ${{ inputs.test-suite || 'functional-basic' }}
Workflow Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Check test results and debug logs in the action artifacts." || echo "Failed to create issue, continuing..."
- name: Cleanup Cluster
if: always()
shell: bash
run: |
kube-galaxy cleanup all --manifest ${{ matrix.manifest-file }}
# Summary job that reports overall test status
test-summary:
needs: test-matrix
runs-on: ubuntu-latest
if: always()
steps:
- name: Check Test Results
shell: bash
run: |
echo "## Test Summary"
echo "- **Result**: ${{ needs.test-matrix.result }}"
if [ "${{ needs.test-matrix.result }}" == "failure" ]; then
echo ""
echo "⚠️ **Some tests failed**"
echo "Check individual job results above for details"
exit 1
else
echo ""
echo "✅ **All tests passed**"
fi