@@ -158,3 +158,122 @@ jobs:
158158 head -1 "$file" | grep -q "^---$" || { echo "❌ Missing frontmatter: $file"; missing=1; }
159159 done
160160 exit $missing
161+
162+ observability-tests :
163+ name : Observability Tests
164+ if : github.event.pull_request.head.repo.full_name == github.repository
165+ permissions :
166+ contents : ' read'
167+ id-token : ' write'
168+ needs : integration-tests
169+ runs-on : ubuntu-latest
170+ steps :
171+ - uses : actions/checkout@v5
172+
173+ - name : Start K3s cluster
174+ uses : jupyterhub/action-k3s-helm@v4
175+ with :
176+ k3s-channel : latest
177+ helm-version : ${{ env.HELM_VERSION }}
178+ metrics-enabled : false
179+ docker-enabled : true
180+
181+ - name : Set release name
182+ run : echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV"
183+
184+ - name : Wait for K3s to be fully ready
185+ run : |
186+ echo "=== Waiting for K3s to be fully ready ==="
187+ kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s
188+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s
189+ kubectl get nodes
190+ kubectl get pods --all-namespaces
191+ sleep 10
192+ echo "✅ K3s is ready"
193+
194+ - name : Deploy eoAPI with monitoring
195+ run : |
196+ echo "=== Deploying eoAPI with monitoring stack ==="
197+ export RELEASE_NAME="$RELEASE_NAME"
198+ export PGO_VERSION="${{ env.PGO_VERSION }}"
199+ export GITHUB_SHA="${{ github.sha }}"
200+ ./scripts/deploy.sh --ci
201+
202+ # Enable monitoring components
203+ helm upgrade "$RELEASE_NAME" ./charts/eoapi \
204+ --set monitoring.prometheus.enabled=true \
205+ --set monitoring.prometheusAdapter.enabled=true \
206+ --set monitoring.kube-state-metrics.enabled=true \
207+ --set monitoring.prometheus-node-exporter.enabled=true \
208+ --set observability.grafana.enabled=true \
209+ --set stac.autoscaling.enabled=true \
210+ --set raster.autoscaling.enabled=true \
211+ --set vector.autoscaling.enabled=true \
212+ --namespace eoapi \
213+ --wait --timeout=10m
214+
215+ - name : Wait for monitoring stack to be ready
216+ run : |
217+ echo "=== Waiting for monitoring components ==="
218+
219+ # Wait for Prometheus
220+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n eoapi --timeout=300s || echo "Prometheus not ready"
221+
222+ # Wait for Grafana
223+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n eoapi --timeout=300s || echo "Grafana not ready"
224+
225+ # Wait for prometheus-adapter
226+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus-adapter -n eoapi --timeout=300s || echo "prometheus-adapter not ready"
227+
228+ # Wait for HPA to be created
229+ sleep 30
230+
231+ echo "=== Final monitoring stack status ==="
232+ kubectl get pods -n eoapi -l 'app.kubernetes.io/component in (server,grafana,prometheus-adapter)' || true
233+ kubectl get hpa -n eoapi || true
234+
235+ - name : Run observability tests
236+ run : |
237+ echo "=== Running observability test suite ==="
238+ export RELEASE_NAME="$RELEASE_NAME"
239+ export NAMESPACE="eoapi"
240+
241+ # Install python dependencies for testing
242+ python -m pip install --upgrade pip
243+ pip install pytest requests
244+
245+ # Run observability tests
246+ python -m pytest .github/workflows/tests/test_observability.py -v --tb=short
247+
248+ # Run autoscaling tests
249+ python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow"
250+
251+ - name : Debug observability stack on failure
252+ if : failure()
253+ run : |
254+ echo "=== Observability Debug Information ==="
255+
256+ echo "=== Monitoring Pods Status ==="
257+ kubectl get pods -n eoapi -l 'app.kubernetes.io/name in (prometheus,grafana,prometheus-adapter)' -o wide || true
258+
259+ echo "=== HPA Status ==="
260+ kubectl get hpa -n eoapi -o wide || true
261+ kubectl describe hpa -n eoapi || true
262+
263+ echo "=== Custom Metrics API ==="
264+ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" || true
265+
266+ echo "=== Pod Metrics ==="
267+ kubectl top pods -n eoapi || true
268+
269+ echo "=== Recent Events ==="
270+ kubectl get events -n eoapi --sort-by='.lastTimestamp' | tail -20 || true
271+
272+ echo "=== Component Logs ==="
273+ kubectl logs -l app.kubernetes.io/name=prometheus-adapter -n eoapi --tail=50 || true
274+ kubectl logs -l app.kubernetes.io/name=grafana -n eoapi --tail=30 || true
275+
276+ - name : Cleanup observability test
277+ if : always()
278+ run : |
279+ helm uninstall "$RELEASE_NAME" || true
0 commit comments