Skip to content

Commit 260db40

Browse files
committed
Added observability tests.
1 parent 2d79755 commit 260db40

File tree

11 files changed

+2094
-88
lines changed

11 files changed

+2094
-88
lines changed

.github/workflows/ci.yml

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343

4444
integration-tests:
4545
name: Integration tests
46-
needs: fast-checks
46+
# needs: fast-checks
4747
if: github.event.pull_request.head.repo.full_name == github.repository
4848
runs-on: ubuntu-latest
4949
steps:
@@ -89,6 +89,7 @@ jobs:
8989
echo "=== eoAPI Deployment ==="
9090
export RELEASE_NAME="${RELEASE_NAME}"
9191
export PGO_VERSION="${{ env.PGO_VERSION }}"
92+
export GITHUB_SHA="${{ github.sha }}"
9293
export CI_MODE=true
9394
9495
# Deploy using consolidated script with CI mode
@@ -109,7 +110,7 @@ jobs:
109110
run: |
110111
./scripts/debug-deployment.sh
111112
112-
- name: Cleanup
113+
- name: Cleanup integration test
113114
if: always()
114115
run: |
115116
helm uninstall "$RELEASE_NAME" -n eoapi || true
@@ -158,3 +159,122 @@ jobs:
158159
head -1 "$file" | grep -q "^---$" || { echo "❌ Missing frontmatter: $file"; missing=1; }
159160
done
160161
exit $missing
162+
163+
observability-tests:
164+
name: Observability tests
165+
if: github.event.pull_request.head.repo.full_name == github.repository
166+
permissions:
167+
contents: 'read'
168+
id-token: 'write'
169+
# needs: integration-tests
170+
runs-on: ubuntu-latest
171+
steps:
172+
- uses: actions/checkout@v5
173+
174+
- name: Start K3s cluster
175+
uses: jupyterhub/action-k3s-helm@v4
176+
with:
177+
k3s-channel: latest
178+
helm-version: ${{ env.HELM_VERSION }}
179+
metrics-enabled: false
180+
docker-enabled: true
181+
182+
- name: Set release name
183+
run: echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV"
184+
185+
- name: Wait for K3s readiness
186+
run: |
187+
echo "=== Waiting for K3s cluster to be ready ==="
188+
189+
# The action already sets up kubectl context, just verify it works
190+
kubectl cluster-info
191+
kubectl get nodes
192+
193+
# Wait for core components
194+
kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s
195+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s
196+
197+
# Verify Traefik CRDs
198+
timeout=300; counter=0
199+
for crd in "middlewares.traefik.io" "ingressroutes.traefik.io"; do
200+
while [ $counter -lt $timeout ] && ! kubectl get crd "$crd" &>/dev/null; do
201+
sleep 3; counter=$((counter + 3))
202+
done
203+
[ $counter -ge $timeout ] && { echo "❌ Timeout waiting for $crd"; exit 1; }
204+
done
205+
206+
echo "✅ K3s cluster ready"
207+
208+
- name: Deploy eoAPI with monitoring
209+
run: |
210+
echo "=== Deploying eoAPI with monitoring stack ==="
211+
export RELEASE_NAME="$RELEASE_NAME"
212+
export PGO_VERSION="${{ env.PGO_VERSION }}"
213+
export GITHUB_SHA="${{ github.sha }}"
214+
export CI_MODE=true
215+
export OBSERVABILITY_MODE=true
216+
217+
# Deploy using consolidated script with observability mode enabled
218+
./scripts/deploy.sh --ci
219+
220+
- name: Wait for monitoring stack to be ready
221+
run: |
222+
echo "=== Waiting for monitoring components ==="
223+
224+
# Wait for metrics-server first (required for HPA)
225+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=metrics-server -n eoapi --timeout=300s || echo "metrics-server not ready"
226+
227+
# Wait for Prometheus server
228+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/component=server,app.kubernetes.io/name=prometheus -n eoapi --timeout=300s || echo "Prometheus server not ready"
229+
230+
# Wait for Grafana
231+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n eoapi --timeout=300s || echo "Grafana not ready"
232+
233+
# Wait for prometheus-adapter
234+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus-adapter -n eoapi --timeout=300s || echo "prometheus-adapter not ready"
235+
236+
# Give time for HPA to be created and configured
237+
echo "=== Waiting for HPA creation ==="
238+
sleep 60
239+
240+
echo "=== Final monitoring stack status ==="
241+
kubectl get pods -n eoapi | grep -E "(prometheus|grafana|metrics-server)" || true
242+
kubectl get hpa -n eoapi || echo "No HPA resources found yet"
243+
244+
- name: Validate core eoAPI services
245+
run: |
246+
echo "=== Validating core eoAPI services ==="
247+
248+
# Wait for core application pods to be ready
249+
kubectl wait --for=condition=Ready pod -l app="$RELEASE_NAME"-stac -n eoapi --timeout=300s
250+
kubectl wait --for=condition=Ready pod -l app="$RELEASE_NAME"-raster -n eoapi --timeout=300s
251+
kubectl wait --for=condition=Ready pod -l app="$RELEASE_NAME"-vector -n eoapi --timeout=300s
252+
253+
echo "✅ Core eoAPI services are ready"
254+
255+
- name: Run observability tests
256+
run: |
257+
echo "=== Running observability test suite ==="
258+
export RELEASE_NAME="$RELEASE_NAME"
259+
export NAMESPACE="eoapi"
260+
261+
# Install python dependencies for testing
262+
python -m pip install --upgrade pip
263+
pip install pytest requests psycopg2-binary
264+
265+
# Run observability tests
266+
python -m pytest .github/workflows/tests/test_observability.py -v --tb=short
267+
268+
# Run autoscaling tests
269+
python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow"
270+
271+
- name: Debug observability stack on failure
272+
if: failure()
273+
run: |
274+
./scripts/debug-deployment.sh
275+
276+
- name: Cleanup observability test
277+
if: always()
278+
run: |
279+
helm uninstall "$RELEASE_NAME" -n eoapi || true
280+
kubectl delete namespace eoapi || true

0 commit comments

Comments
 (0)