diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 2af833fc..3637cfcb 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -22,11 +22,16 @@ jobs: with: version: ${{ env.HELM_VERSION }} - - name: Run Helm unit tests - run: make tests + - name: Install helm unittest plugin + run: helm plugin install https://github.com/helm-unittest/helm-unittest - integration: - name: Integration Tests (K3s) + - run: | + cd charts + helm unittest eoapi -f 'tests/*.yaml' -v eoapi/test-helm-values.yaml + # Run autoscaling-specific unit tests + helm unittest eoapi -f 'tests/autoscaling_tests.yaml' + + k3s-integration-tests: if: github.event.pull_request.head.repo.full_name == github.repository permissions: contents: 'read' @@ -47,106 +52,171 @@ jobs: - name: Set release name run: echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV" - - name: Deploy eoAPI - id: deploy - continue-on-error: true + - name: Wait for K3s to be fully ready run: | - echo "=== Starting eoAPI deployment ===" - export RELEASE_NAME="$RELEASE_NAME" - export PGO_VERSION="${{ env.PGO_VERSION }}" - export GITHUB_SHA="${{ github.sha }}" - ./scripts/deploy.sh --ci + echo "=== Waiting for K3s to be fully ready ===" - - name: Check deployment status - id: check - if: steps.deploy.outcome == 'success' - run: | - echo "=== Checking deployment status ===" - export RELEASE_NAME="$RELEASE_NAME" - ./scripts/test.sh check-deployment --debug + # Wait for core K3s components to be ready + echo "Waiting for kube-system pods to be ready..." + kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s + + # Wait for API server to be fully responsive + echo "Checking API server responsiveness..." + kubectl get nodes + kubectl get pods --all-namespaces - - name: Debug pgstac jobs if deployment failed - if: steps.deploy.outcome == 'failure' - continue-on-error: true + # Give K3s a moment to initialize all CRDs + echo "Waiting for K3s initialization to complete..." + sleep 10 + + echo "✅ K3s is ready" + + - name: Install Knative Serving run: | - echo "=== Debugging pgstac job failures ===" - - # Check pgstac-migrate job - echo "===== pgstac-migrate Job Status =====" - kubectl get jobs -l app.kubernetes.io/name=pgstac-migrate -o wide || echo "No pgstac-migrate jobs found" - - MIGRATE_PODS=$(kubectl get pods -l app.kubernetes.io/name=pgstac-migrate -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) - if [ -n "$MIGRATE_PODS" ]; then - for POD in $MIGRATE_PODS; do - echo "--- Logs from migrate pod $POD ---" - kubectl logs "$POD" --tail=100 || true - echo "--- Description of migrate pod $POD ---" - kubectl describe pod "$POD" - done - fi - - # Check pgstac-load-samples job - echo "===== pgstac-load-samples Job Status =====" - kubectl get jobs -l app.kubernetes.io/name=pgstac-load-samples -o wide || echo "No pgstac-load-samples jobs found" - - SAMPLES_PODS=$(kubectl get pods -l app.kubernetes.io/name=pgstac-load-samples -o jsonpath='{.items[*].metadata.name}' 2>/dev/null) - if [ -n "$SAMPLES_PODS" ]; then - for POD in $SAMPLES_PODS; do - echo "--- Logs from samples pod $POD ---" - kubectl logs "$POD" --tail=100 || true - echo "--- Description of samples pod $POD ---" - kubectl describe pod "$POD" + echo "=== Installing Knative Serving ===" + # Install Knative Serving CRDs + kubectl apply -f https://github.com/knative/serving/releases/download/knative-v1.17.0/serving-crds.yaml + # Install Knative Serving core components + kubectl apply -f https://github.com/knative/serving/releases/download/knative-v1.17.0/serving-core.yaml + # Install Kourier networking layer for Knative + kubectl apply -f https://github.com/knative/net-kourier/releases/download/knative-v1.17.0/kourier.yaml + # Configure Knative to use Kourier + kubectl patch configmap/config-network \ + --namespace knative-serving \ + --type merge \ + --patch '{"data":{"ingress-class":"kourier.ingress.networking.knative.dev"}}' + # Wait for Knative Serving to be ready + echo "Waiting for Knative Serving to be ready..." + kubectl wait --for=condition=Ready pod -l app=controller -n knative-serving --timeout=300s + kubectl wait --for=condition=Ready pod -l app=webhook -n knative-serving --timeout=300s + kubectl wait --for=condition=Ready pod -l app=3scale-kourier-gateway -n kourier-system --timeout=300s + + - name: Install Knative Eventing + run: | + echo "=== Installing Knative Eventing ===" + # Install Knative Eventing CRDs (includes SinkBinding) + kubectl apply -f https://github.com/knative/eventing/releases/download/knative-v1.17.0/eventing-crds.yaml + # Install Knative Eventing core components + kubectl apply -f https://github.com/knative/eventing/releases/download/knative-v1.17.0/eventing-core.yaml + # Wait for Knative Eventing to be ready + echo "Waiting for Knative Eventing to be ready..." + kubectl wait --for=condition=Ready pod -l app=eventing-controller -n knative-eventing --timeout=300s + kubectl wait --for=condition=Ready pod -l app=eventing-webhook -n knative-eventing --timeout=300s + + - name: Deploy CloudEvents sink for eoapi-notifier + run: | + echo "=== Deploying CloudEvents sink ===" + # Create the namespace first + kubectl create namespace eoapi || true + # Deploy the CloudEvents sink service + kubectl apply -f charts/eoapi/samples/cloudevents-sink.yaml + # Wait for the Knative service to be ready + echo "Waiting for CloudEvents sink to be ready..." + kubectl wait --for=condition=Ready ksvc/eoapi-cloudevents-sink -n eoapi --timeout=300s + + - name: Wait for Traefik to be ready + run: | + echo "=== Waiting for Traefik to be ready ===" + + # Wait for Traefik pods to be ready first + echo "Waiting for Traefik controller to be ready..." + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s + + # Wait for essential Traefik CRDs to be available + echo "Checking for Traefik CRDs..." + timeout=300 + counter=0 + required_crds=("middlewares.traefik.io" "ingressroutes.traefik.io") + + for crd in "${required_crds[@]}"; do + echo "Checking for CRD: $crd" + counter=0 + while [ $counter -lt $timeout ]; do + if kubectl get crd "$crd" &>/dev/null; then + echo "✅ $crd is available" + break + fi + echo "⏳ Waiting for $crd... ($counter/$timeout)" + sleep 3 + counter=$((counter + 3)) done - fi - - # Check database status - echo "===== Database Pod Status =====" - kubectl get pods -l postgres-operator.crunchydata.com/cluster -o wide - kubectl get postgrescluster -o wide - # Check ConfigMaps - echo "===== Relevant ConfigMaps =====" - kubectl get configmaps | grep -E "initdb|pgstac" || echo "No relevant configmaps found" + if [ $counter -ge $timeout ]; then + echo "❌ Timeout waiting for $crd" + echo "Available Traefik CRDs:" + kubectl get crd | grep traefik || echo "No Traefik CRDs found" + echo "All CRDs:" + kubectl get crd + exit 1 + fi + done - # Check for any related events - echo "===== Related Kubernetes Events =====" - kubectl get events | grep -E "pgstac|initdb" || echo "No relevant events found" + echo "✅ All required Traefik CRDs are ready" - # Check notification system status - echo "===== Notification System Status =====" - kubectl get deployments -l app.kubernetes.io/name=eoapi-notifier -o wide || echo "No eoapi-notifier deployment found" - kubectl get ksvc -l app.kubernetes.io/component=cloudevents-sink -o wide || echo "No Knative CloudEvents sink found" + - name: Deploy eoAPI + id: deploy + run: | + echo "=== Starting eoAPI deployment ===" + export RELEASE_NAME="$RELEASE_NAME" + export PGO_VERSION="${{ env.PGO_VERSION }}" + export GITHUB_SHA="${{ github.sha }}" + ./scripts/deploy.sh --ci - exit 1 - name: Run integration tests - if: steps.deploy.outcome == 'success' run: | echo "=== Running integration tests ===" export RELEASE_NAME="$RELEASE_NAME" ./scripts/test.sh integration --debug - - name: Debug deployment status - if: always() + - name: Debug failed deployment + if: failure() run: | - echo "=== Final Deployment Status ===" + echo "=== Deployment failed - collecting debug information ===" kubectl get pods -o wide kubectl get jobs -o wide kubectl get services -o wide - kubectl get ingress + kubectl get events --sort-by='.lastTimestamp' | tail -20 || true + + # Check Knative installation status + echo "=== Knative Installation Status ===" + kubectl get pods -n knative-serving -o wide || echo "Knative Serving not installed" + kubectl get pods -n knative-eventing -o wide || echo "Knative Eventing not installed" + kubectl get pods -n kourier-system -o wide || echo "Kourier not installed" + # Check Knative CRDs + echo "=== Knative CRDs Status ===" + kubectl get crd | grep knative || echo "No Knative CRDs found" + kubectl get crd sinkbindings.sources.knative.dev || echo "SinkBinding CRD not found" + + # Check Traefik status + echo "=== Traefik Status ===" + kubectl get pods -n kube-system -l app.kubernetes.io/name=traefik -o wide || echo "No Traefik pods found" + kubectl get crd | grep traefik || echo "No Traefik CRDs found" + kubectl get crd middlewares.traefik.io || echo "Middleware CRD not found" + kubectl get crd ingressroutes.traefik.io || echo "IngressRoute CRD not found" # Check notification system final status echo "=== Notification System Final Status ===" kubectl get deployments -l app.kubernetes.io/name=eoapi-notifier -o wide || echo "No eoapi-notifier deployment" kubectl get pods -l app.kubernetes.io/name=eoapi-notifier -o wide || echo "No eoapi-notifier pods" - kubectl get ksvc -l app.kubernetes.io/component=cloudevents-sink -o wide || echo "No Knative CloudEvents sink" - kubectl get pods -l serving.knative.dev/service -o wide || echo "No Knative CloudEvents sink pods" + kubectl get ksvc -n eoapi -o wide || echo "No Knative services in eoapi namespace" + kubectl get ksvc eoapi-cloudevents-sink -n eoapi -o wide || echo "No eoapi-cloudevents-sink Knative service" + kubectl get pods -l serving.knative.dev/service=eoapi-cloudevents-sink -n eoapi -o wide || echo "No CloudEvents sink pods" + # Check SinkBinding resources + echo "=== SinkBinding Resources ===" + kubectl get sinkbindings -A -o wide || echo "No SinkBinding resources found" # Show notification logs if they exist echo "=== eoapi-notifier Logs ===" kubectl logs -l app.kubernetes.io/name=eoapi-notifier --tail=20 || echo "No eoapi-notifier logs" echo "=== Knative CloudEvents Sink Logs ===" - kubectl logs -l serving.knative.dev/service --tail=20 || echo "No Knative CloudEvents sink logs" + kubectl logs -l serving.knative.dev/service=eoapi-cloudevents-sink -n eoapi --tail=20 || echo "No CloudEvents sink logs" + # Show Knative system logs if there are issues + echo "=== Knative Serving Controller Logs ===" + kubectl logs -n knative-serving -l app=controller --tail=20 || echo "No Knative Serving controller logs" + echo "=== Knative Eventing Controller Logs ===" + kubectl logs -n knative-eventing -l app=eventing-controller --tail=20 || echo "No Knative Eventing controller logs" - name: Cleanup diff --git a/.gitignore b/.gitignore index 35f7b4e8..469ec3dc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ charts/config.yaml charts/eoapi/charts/*.tgz config_ingress.yaml __pycache__ + +CLAUDE.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a09ab58..c20b3be4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- CI/CD pipeline: improved job waiting, error handling, and Helm unittests, +- Added resource discovery and error context in test scripts + ## [0.7.12] - 2025-10-17 - Bumped eoapi-notifier dependency version to 0.0.7 diff --git a/charts/eoapi/test-k3s-unittest-values.yaml b/charts/eoapi/test-k3s-unittest-values.yaml index 33a4ec56..a1e83a16 100644 --- a/charts/eoapi/test-k3s-unittest-values.yaml +++ b/charts/eoapi/test-k3s-unittest-values.yaml @@ -62,7 +62,7 @@ eoapi-notifier: channel: pgstac_items_change connection: existingSecret: - name: "eoapi-test-pguser-eoapi" + name: "" keys: username: "user" password: "password" diff --git a/charts/eoapi/test-local-values.yaml b/charts/eoapi/test-local-values.yaml new file mode 100644 index 00000000..59e139dd --- /dev/null +++ b/charts/eoapi/test-local-values.yaml @@ -0,0 +1,109 @@ +# Local test configuration for minikube/local development +# Based on test-k3s-unittest-values.yaml with minimal changes for local environment + +testing: true +ingress: + enabled: true + className: "nginx" # Changed from "traefik" for minikube + pathType: "Prefix" + host: "eoapi.local" + +pgstacBootstrap: + enabled: true + settings: + resources: + requests: + cpu: "256m" + memory: "1024Mi" + limits: + cpu: "512m" + memory: "1024Mi" + +raster: + enabled: true + settings: + resources: + limits: + cpu: "768m" + memory: "2048Mi" # Reduced from 4096Mi for local + requests: + cpu: "256m" + memory: "1024Mi" + +stac: + enabled: true + settings: + resources: + limits: + cpu: "1280m" + memory: "1536Mi" + requests: + cpu: "512m" + memory: "1024Mi" + +vector: + enabled: true + settings: + resources: + limits: + cpu: "768m" + memory: "1536Mi" + requests: + cpu: "256m" + memory: "1024Mi" + envVars: + TIPG_DEBUG: "True" + +eoapi-notifier: + enabled: true + config: + logLevel: DEBUG + sources: + - type: pgstac + config: + channel: pgstac_items_change + connection: + existingSecret: + name: "" # Set dynamically by deploy script + keys: + username: "user" + password: "password" + host: "host" + port: "port" + database: "dbname" + outputs: + - type: cloudevents + config: + source: /eoapi/pgstac + event_type: org.eoapi.stac.item + destination: + ref: + apiVersion: serving.knative.dev/v1 + kind: Service + name: eoapi-cloudevents-sink + resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "200m" + memory: "128Mi" + +# Reduce PostgreSQL resources for local development +postgrescluster: + instances: + - name: "postgres" + replicas: 1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "1Gi" # Reduced for local + resources: + requests: + cpu: "100m" # Reduced for local + memory: "512Mi" # Reduced for local + limits: + cpu: "500m" # Reduced for local + memory: "1Gi" # Reduced for local diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 0b356109..3a6873ce 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -140,10 +140,17 @@ deploy_eoapi() { HELM_CMD="$HELM_CMD -f ./eoapi/values.yaml" fi - # CI-specific configuration + # Environment-specific configuration if [ "$CI_MODE" = true ] && [ -f "./eoapi/test-k3s-unittest-values.yaml" ]; then log_info "Using CI test configuration..." HELM_CMD="$HELM_CMD -f ./eoapi/test-k3s-unittest-values.yaml" + # Fix eoapi-notifier secret name dynamically + HELM_CMD="$HELM_CMD --set eoapi-notifier.config.sources[0].config.connection.existingSecret.name=$RELEASE_NAME-pguser-eoapi" + elif [ -f "./eoapi/test-local-values.yaml" ]; then + log_info "Using local test configuration..." + HELM_CMD="$HELM_CMD -f ./eoapi/test-local-values.yaml" + # Fix eoapi-notifier secret name dynamically for local mode too + HELM_CMD="$HELM_CMD --set eoapi-notifier.config.sources[0].config.connection.existingSecret.name=$RELEASE_NAME-pguser-eoapi" fi # Set git SHA if available @@ -164,8 +171,67 @@ deploy_eoapi() { log_info "Verifying deployment..." kubectl get pods -n "$NAMESPACE" -o wide + # Wait for pgstac jobs to complete first + if kubectl get job -n "$NAMESPACE" -l "app=$RELEASE_NAME-pgstac-migrate" >/dev/null 2>&1; then + log_info "Waiting for pgstac-migrate job to complete..." + if ! kubectl wait --for=condition=complete job -l "app=$RELEASE_NAME-pgstac-migrate" -n "$NAMESPACE" --timeout=600s; then + log_error "pgstac-migrate job failed to complete" + kubectl describe job -l "app=$RELEASE_NAME-pgstac-migrate" -n "$NAMESPACE" + kubectl logs -l "app=$RELEASE_NAME-pgstac-migrate" -n "$NAMESPACE" --tail=50 || true + exit 1 + fi + fi + + if kubectl get job -n "$NAMESPACE" -l "app=$RELEASE_NAME-pgstac-load-samples" >/dev/null 2>&1; then + log_info "Waiting for pgstac-load-samples job to complete..." + if ! kubectl wait --for=condition=complete job -l "app=$RELEASE_NAME-pgstac-load-samples" -n "$NAMESPACE" --timeout=300s; then + log_error "pgstac-load-samples job failed to complete" + kubectl describe job -l "app=$RELEASE_NAME-pgstac-load-samples" -n "$NAMESPACE" + kubectl logs -l "app=$RELEASE_NAME-pgstac-load-samples" -n "$NAMESPACE" --tail=50 || true + exit 1 + fi + fi + + # Wait for service pods to be ready + log_info "Waiting for eoAPI services to be ready..." + local services=("stac" "raster" "vector") + local failed_services=() + + for service in "${services[@]}"; do + # Try different label patterns to find pods + local found=false + local patterns=( + "app.kubernetes.io/instance=$RELEASE_NAME,app.kubernetes.io/name=$service" + "app=$RELEASE_NAME-$service" + ) + + for pattern in "${patterns[@]}"; do + if kubectl get pods -n "$NAMESPACE" -l "$pattern" >/dev/null 2>&1; then + log_info "Waiting for $service service pods to be ready..." + if kubectl wait --for=condition=Ready pod -l "$pattern" -n "$NAMESPACE" --timeout=300s; then + found=true + break + else + log_warn "$service service pods found but failed readiness check" + kubectl describe pods -n "$NAMESPACE" -l "$pattern" || true + fi + fi + done + + if [ "$found" = false ]; then + failed_services+=("$service") + fi + done + + if [ ${#failed_services[@]} -ne 0 ]; then + log_error "Failed to start services: ${failed_services[*]}" + kubectl get pods -n "$NAMESPACE" -o wide + kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' | tail -20 || true + exit 1 + fi + log_info "eoAPI deployment completed successfully!" - log_info "Services available in namespace: $NAMESPACE" + log_info "All services are ready in namespace: $NAMESPACE" if [ "$CI_MODE" != true ]; then log_info "To run integration tests: make integration" diff --git a/scripts/test.sh b/scripts/test.sh index 9fea0df8..e7fda4fe 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -11,7 +11,7 @@ source "$SCRIPT_DIR/lib/common.sh" # Global variables DEBUG_MODE=false -NAMESPACE="" +NAMESPACE="${NAMESPACE:-}" COMMAND="" # Auto-detect CI environment @@ -127,9 +127,9 @@ install_test_deps() { python_cmd="python3" fi - if ! $python_cmd -m pip install --quiet pytest httpx >/dev/null 2>&1; then - log_error "Failed to install test dependencies (pytest, httpx)" - log_error "Please install manually: pip install pytest httpx" + if ! $python_cmd -m pip install --quiet pytest httpx psycopg2-binary >/dev/null 2>&1; then + log_error "Failed to install test dependencies (pytest, httpx, psycopg2-binary)" + log_error "Please install manually: pip install pytest httpx psycopg2-binary" exit 1 fi @@ -226,27 +226,50 @@ detect_deployment() { show_debug_info() { log_info "=== Enhanced Debug Information ===" - log_info "=== Current Pod Status ===" + log_info "=== Environment Variables ===" + log_info "RELEASE_NAME: ${RELEASE_NAME:-}" + log_info "NAMESPACE: ${NAMESPACE:-}" + log_info "CI: ${CI:-}" + + log_info "=== All Namespaces ===" + kubectl get namespaces || true + + log_info "=== All Pods Across All Namespaces ===" + kubectl get pods -A -o wide || true + + log_info "=== Release-Specific Resources ===" + if [ -n "${RELEASE_NAME:-}" ]; then + log_info "Looking for resources with release name: $RELEASE_NAME" + kubectl get pods -A -l "app.kubernetes.io/instance=$RELEASE_NAME" -o wide || true + kubectl get pods -A | grep "$RELEASE_NAME" || echo "No pods found with release name $RELEASE_NAME" + kubectl get jobs -A -l "app=$RELEASE_NAME-pgstac-migrate" -o wide || true + kubectl get jobs -A -l "app=$RELEASE_NAME-pgstac-load-samples" -o wide || true + fi + + log_info "=== Current Namespace ($NAMESPACE) Pod Status ===" kubectl get pods -n "$NAMESPACE" -o wide || true - log_info "=== Pod Phase Summary ===" + log_info "=== Pod Phase Summary in $NAMESPACE ===" kubectl get pods -n "$NAMESPACE" --no-headers | awk '{print $3}' | sort | uniq -c || true log_info "=== Services Status ===" - kubectl get services -n "$NAMESPACE" || true + kubectl get services -n "$NAMESPACE" -o wide || true log_info "=== Ingress Status ===" - kubectl get ingress -n "$NAMESPACE" || true + kubectl get ingress -n "$NAMESPACE" -o wide || true log_info "=== Jobs Status ===" kubectl get jobs -n "$NAMESPACE" -o wide || true log_info "=== PostgreSQL Status ===" - kubectl get postgrescluster -o wide || true - kubectl get pods -l postgres-operator.crunchydata.com/cluster -o wide || true + kubectl get postgrescluster -A + kubectl get pods -l postgres-operator.crunchydata.com/cluster -A -o wide + + log_info "=== Recent Events in $NAMESPACE ===" + kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' | tail -10 || true - log_info "=== Recent Events ===" - kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' | tail -30 || true + log_info "=== Recent Events Across All Namespaces ===" + kubectl get events -A --sort-by='.lastTimestamp' | tail -20 || true } # Check if eoapi is deployed