diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09b9420e..5cd285af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,12 +49,20 @@ jobs: steps: - uses: actions/checkout@v5 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + pip install pytest pytest-timeout requests types-requests psycopg2-binary + - name: Start K3s cluster uses: jupyterhub/action-k3s-helm@v4 with: k3s-channel: latest helm-version: ${{ env.HELM_VERSION }} - metrics-enabled: false + metrics-enabled: true docker-enabled: true - name: Set release name @@ -81,6 +89,13 @@ jobs: [ $counter -ge $timeout ] && { echo "❌ Timeout waiting for $crd"; exit 1; } done + # Wait for metrics-server to be ready (required for HPA) + echo "Waiting for metrics-server..." + kubectl wait --for=condition=Ready pod -l k8s-app=metrics-server -n kube-system --timeout=300s || { + echo "⚠️ Metrics-server not ready, checking status..." + kubectl get pods -n kube-system -l k8s-app=metrics-server + } + echo "✅ K3s cluster ready" - name: Deploy eoAPI @@ -104,6 +119,11 @@ jobs: export RELEASE_NAME="$RELEASE_NAME" ./scripts/test.sh integration --debug + - name: Run autoscaling tests + run: | + export RELEASE_NAME="$RELEASE_NAME" + python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow" + - name: Debug failed deployment if: failure() run: | diff --git a/.github/workflows/tests/test_autoscaling.py b/.github/workflows/tests/test_autoscaling.py new file mode 100644 index 00000000..463bc14c --- /dev/null +++ b/.github/workflows/tests/test_autoscaling.py @@ -0,0 +1,649 @@ +"""Test autoscaling behavior and HPA functionality.""" + +import json +import os +import subprocess +import threading +import time +from typing import Any, Dict, List, Optional, cast + +import pytest +import requests + + +def get_namespace() -> str: + return os.environ.get("NAMESPACE", "eoapi") + + +def get_release_name() -> str: + return os.environ.get("RELEASE_NAME", "eoapi") + + +def get_base_url() -> str: + namespace = get_namespace() + + # Check if we have an ingress + result = subprocess.run( + ["kubectl", "get", "ingress", "-n", namespace, "-o", "json"], + capture_output=True, + text=True, + ) + + if result.returncode == 0: + ingress_data = json.loads(result.stdout) + if ingress_data["items"]: + ingress = ingress_data["items"][0] + rules = ingress.get("spec", {}).get("rules", []) + if rules: + host = rules[0].get("host", "localhost") + # Check if host is accessible + try: + response = requests.get( + f"http://{host}/stac/collections", timeout=5 + ) + if response.status_code == 200: + return f"http://{host}" + except requests.RequestException: + pass + + return "http://localhost:8080" + + +def kubectl_get( + resource: str, + namespace: Optional[str] = None, + label_selector: Optional[str] = None, + output: str = "json", +) -> subprocess.CompletedProcess[str]: + cmd = ["kubectl", "get", resource] + + if namespace: + cmd.extend(["-n", namespace]) + + if label_selector: + cmd.extend(["-l", label_selector]) + + if output: + cmd.extend(["-o", output]) + + result = subprocess.run(cmd, capture_output=True, text=True) + return result + + +def get_pod_metrics(namespace: str, service_name: str) -> List[Dict[str, str]]: + release_name = get_release_name() + result = subprocess.run( + [ + "kubectl", + "top", + "pods", + "-n", + namespace, + "-l", + f"app={release_name}-{service_name}", + "--no-headers", + ], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + return [] + + metrics: List[Dict[str, str]] = [] + for line in result.stdout.strip().split("\n"): + if line.strip(): + parts = line.split() + if len(parts) >= 3: + pod_name = parts[0] + cpu = parts[1] # e.g., "25m" + memory = parts[2] # e.g., "128Mi" + metrics.append({"pod": pod_name, "cpu": cpu, "memory": memory}) + + return metrics + + +def get_hpa_status(namespace: str, hpa_name: str) -> Optional[Dict[str, Any]]: + """Get HPA status for a specific HPA.""" + result = kubectl_get("hpa", namespace=namespace, output="json") + if result.returncode != 0: + return None + + hpas = json.loads(result.stdout) + for hpa in hpas["items"]: + if hpa["metadata"]["name"] == hpa_name: + return cast(Dict[str, Any], hpa) + + return None + + +def get_pod_count(namespace: str, service_name: str) -> int: + release_name = get_release_name() + result = kubectl_get( + "pods", + namespace=namespace, + label_selector=f"app={release_name}-{service_name}", + ) + + if result.returncode != 0: + return 0 + + pods = json.loads(result.stdout) + running_pods = [ + pod for pod in pods["items"] if pod["status"]["phase"] == "Running" + ] + + return len(running_pods) + + +def make_request(url: str, timeout: int = 10) -> bool: + """Make a single HTTP request and return success status.""" + try: + response = requests.get(url, timeout=timeout) + return response.status_code == 200 + except requests.RequestException: + return False + + +def generate_load( + base_url: str, + endpoints: List[str], + duration: int = 60, + concurrent_requests: int = 5, + delay: float = 0.1, +) -> Dict[str, Any]: + """Generate HTTP load against specified endpoints.""" + end_time = time.time() + duration + success_count = 0 + error_count = 0 + + def worker() -> None: + nonlocal success_count, error_count + while time.time() < end_time: + for endpoint in endpoints: + url = f"{base_url}{endpoint}" + if make_request(url): + success_count += 1 + else: + error_count += 1 + time.sleep(delay) + + # Start concurrent workers + threads = [] + for _ in range(concurrent_requests): + thread = threading.Thread(target=worker) + thread.start() + threads.append(thread) + + # Wait for all threads to complete + for thread in threads: + thread.join() + + return { + "total_requests": success_count + error_count, + "successful_requests": success_count, + "failed_requests": error_count, + "success_rate": success_count / (success_count + error_count) + if (success_count + error_count) > 0 + else 0, + } + + +class TestHPAConfiguration: + def test_hpa_resources_properly_configured(self) -> None: + namespace = get_namespace() + result = kubectl_get("hpa", namespace=namespace) + + if result.returncode != 0: + pytest.skip("No HPA resources found - autoscaling not enabled") + + hpas = json.loads(result.stdout) + assert len(hpas["items"]) > 0, "No HPA resources configured" + + for hpa in hpas["items"]: + spec = hpa["spec"] + hpa_name = hpa["metadata"]["name"] + + assert "scaleTargetRef" in spec, ( + f"HPA {hpa_name} missing scaleTargetRef" + ) + assert "minReplicas" in spec, f"HPA {hpa_name} missing minReplicas" + assert "maxReplicas" in spec, f"HPA {hpa_name} missing maxReplicas" + assert "metrics" in spec, ( + f"HPA {hpa_name} missing metrics configuration" + ) + + min_replicas = spec["minReplicas"] + max_replicas = spec["maxReplicas"] + assert min_replicas > 0, f"HPA {hpa_name} minReplicas must be > 0" + assert max_replicas > min_replicas, ( + f"HPA {hpa_name} maxReplicas must be > minReplicas" + ) + + metrics = spec["metrics"] + assert len(metrics) > 0, f"HPA {hpa_name} has no metrics configured" + + cpu_metrics = [ + m + for m in metrics + if m.get("type") == "Resource" + and m.get("resource", {}).get("name") == "cpu" + ] + assert len(cpu_metrics) > 0, ( + f"HPA {hpa_name} must have CPU metric configured" + ) + + print( + f"✅ HPA {hpa_name}: {min_replicas}-{max_replicas} replicas, {len(metrics)} metrics" + ) + + def test_target_deployments_exist(self) -> None: + namespace = get_namespace() + result = kubectl_get("hpa", namespace=namespace) + + if result.returncode != 0: + pytest.skip("No HPA resources found") + + hpas = json.loads(result.stdout) + + for hpa in hpas["items"]: + target_ref = hpa["spec"]["scaleTargetRef"] + target_name = target_ref["name"] + hpa_name = hpa["metadata"]["name"] + + # Check target deployment exists + deploy_result = kubectl_get( + "deployment", namespace=namespace, output="json" + ) + assert deploy_result.returncode == 0, "Cannot list deployments" + + deployments = json.loads(deploy_result.stdout) + target_deployment = next( + ( + d + for d in deployments["items"] + if d["metadata"]["name"] == target_name + ), + None, + ) + + assert target_deployment is not None, ( + f"HPA {hpa_name} target deployment {target_name} not found" + ) + + # Check deployment has ready replicas + status = target_deployment.get("status", {}) + ready_replicas = status.get("readyReplicas", 0) + assert ready_replicas > 0, ( + f"Target deployment {target_name} has no ready replicas" + ) + + print( + f"✅ HPA {hpa_name} target deployment {target_name} is ready ({ready_replicas} replicas)" + ) + + +class TestCPUScaling: + def test_cpu_metrics_collection(self) -> None: + """Verify CPU metrics are being collected for HPA targets.""" + namespace = get_namespace() + services = ["stac", "raster", "vector"] + + metrics_available = [] + + for service in services: + try: + pod_metrics = get_pod_metrics(namespace, service) + if pod_metrics: + metrics_available.append(service) + for metric in pod_metrics: + print( + f"✅ {service} pod {metric['pod']}: CPU={metric['cpu']}, Memory={metric['memory']}" + ) + except Exception as e: + print(f"⚠️ Cannot get metrics for {service}: {e}") + + assert len(metrics_available) > 0, ( + "No CPU metrics available for any service" + ) + + def test_hpa_cpu_utilization_calculation(self) -> None: + """Verify HPA calculates CPU utilization correctly.""" + namespace = get_namespace() + result = kubectl_get("hpa", namespace=namespace) + + if result.returncode != 0: + pytest.skip("No HPA resources found") + + hpas = json.loads(result.stdout) + + for hpa in hpas["items"]: + hpa_name = hpa["metadata"]["name"] + status = hpa.get("status", {}) + + # Check if HPA has current metrics + current_metrics = status.get("currentMetrics", []) + cpu_metrics = [ + m + for m in current_metrics + if m.get("type") == "Resource" + and m.get("resource", {}).get("name") == "cpu" + ] + + if cpu_metrics: + cpu_utilization = cpu_metrics[0]["resource"]["current"].get( + "averageUtilization" + ) + if cpu_utilization is not None: + assert 0 <= cpu_utilization <= 1000, ( + f"Invalid CPU utilization: {cpu_utilization}%" + ) + print( + f"✅ HPA {hpa_name} CPU utilization: {cpu_utilization}%" + ) + else: + print( + f"⚠️ HPA {hpa_name} CPU metric exists but no utilization value" + ) + else: + # Check conditions for why metrics might not be available + conditions = status.get("conditions", []) + for condition in conditions: + if ( + condition["type"] == "ScalingActive" + and condition["status"] == "False" + ): + print( + f"⚠️ HPA {hpa_name} scaling not active: {condition.get('message', 'Unknown reason')}" + ) + break + else: + print(f"⚠️ HPA {hpa_name} no CPU metrics available yet") + + def test_cpu_resource_requests_alignment(self) -> None: + """Verify CPU resource requests are properly set for percentage calculations.""" + namespace = get_namespace() + services = ["stac", "raster", "vector"] + + for service in services: + release_name = get_release_name() + result = kubectl_get( + "pods", + namespace=namespace, + label_selector=f"app={release_name}-{service}", + ) + + if result.returncode != 0: + continue + + pods = json.loads(result.stdout) + running_pods = [ + p for p in pods["items"] if p["status"]["phase"] == "Running" + ] + + if not running_pods: + continue + + pod = running_pods[0] # Check first running pod + containers = pod["spec"]["containers"] + + main_container = next( + (c for c in containers if c["name"] == service), None + ) + if not main_container: + continue + + resources = main_container.get("resources", {}) + requests = resources.get("requests", {}) + + if "cpu" not in requests: + print( + f"⚠️ Service {service} missing CPU requests - HPA percentage calculation may be inaccurate" + ) + continue + + cpu_request = requests["cpu"] + print(f"✅ Service {service} CPU request: {cpu_request}") + + # Parse CPU request to verify it's reasonable + if cpu_request.endswith("m"): + cpu_millicores = int(cpu_request[:-1]) + assert cpu_millicores > 0, ( + f"Service {service} has zero CPU request" + ) + assert cpu_millicores <= 2000, ( + f"Service {service} has very high CPU request: {cpu_millicores}m" + ) + + +class TestScalingBehavior: + """Test actual scaling behavior under load.""" + + @pytest.mark.slow + def test_load_response_scaling(self) -> None: + """Generate load and verify scaling response (when possible).""" + namespace = get_namespace() + base_url = get_base_url() + + # Test endpoints that should generate CPU load + load_endpoints = [ + "/stac/collections", + "/stac/search?collections=noaa-emergency-response&limit=50", + "/raster/collections", + "/vector/collections", + ] + + # Check initial state + initial_pod_counts: Dict[str, int] = {} + services = ["stac", "raster", "vector"] + + for service in services: + initial_pod_counts[service] = get_pod_count(namespace, service) + + print(f"Initial pod counts: {initial_pod_counts}") + + # Skip test if we can't connect to services + try: + response = requests.get(f"{base_url}/stac/collections", timeout=5) + if response.status_code != 200: + pytest.skip("Cannot access API endpoints for load testing") + except requests.RequestException: + pytest.skip("API endpoints not accessible for load testing") + + # Generate moderate load for limited time (suitable for CI) + load_duration = 90 # 1.5 minutes + concurrent_requests = 8 + + print( + f"Generating load: {concurrent_requests} concurrent requests for {load_duration}s" + ) + + # Start load generation + load_stats = generate_load( + base_url=base_url, + endpoints=load_endpoints, + duration=load_duration, + concurrent_requests=concurrent_requests, + delay=0.05, # 20 requests/second per worker + ) + + print(f"Load test completed: {load_stats}") + + # Wait a bit for metrics to propagate and scaling to potentially occur + print("Waiting for metrics to propagate and potential scaling...") + time.sleep(30) + + # Check final state + final_pod_counts: Dict[str, int] = {} + for service in services: + final_pod_counts[service] = get_pod_count(namespace, service) + + print(f"Final pod counts: {final_pod_counts}") + + # Check HPA metrics after load + result = kubectl_get("hpa", namespace=namespace) + if result.returncode == 0: + hpas = json.loads(result.stdout) + for hpa in hpas["items"]: + hpa_name = hpa["metadata"]["name"] + status = hpa.get("status", {}) + current_metrics = status.get("currentMetrics", []) + + cpu_metrics = [ + m + for m in current_metrics + if m.get("type") == "Resource" + and m.get("resource", {}).get("name") == "cpu" + ] + + if cpu_metrics: + cpu_utilization = cpu_metrics[0]["resource"]["current"].get( + "averageUtilization" + ) + print(f"Post-load HPA {hpa_name} CPU: {cpu_utilization}%") + + assert load_stats["success_rate"] > 0.8, ( + f"Load test had low success rate: {load_stats['success_rate']:.2%}" + ) + assert load_stats["total_requests"] > 100, ( + "Load test generated insufficient requests" + ) + + # Note: In CI environments with limited resources, actual scaling may not occur + # The important thing is that the system handled the load successfully + scaling_occurred = any( + final_pod_counts[svc] > initial_pod_counts[svc] + for svc in services + if svc in initial_pod_counts and svc in final_pod_counts + ) + + if scaling_occurred: + print("✅ Scaling occurred during load test") + else: + print( + "⚠️ No scaling occurred - may be due to CI resource constraints or low load thresholds" + ) + + def test_scaling_stabilization_windows(self) -> None: + """Verify HPA respects stabilization windows in configuration.""" + namespace = get_namespace() + result = kubectl_get("hpa", namespace=namespace) + + if result.returncode != 0: + pytest.skip("No HPA resources found") + + hpas = json.loads(result.stdout) + + for hpa in hpas["items"]: + hpa_name = hpa["metadata"]["name"] + spec = hpa["spec"] + + behavior = spec.get("behavior", {}) + if not behavior: + print(f"⚠️ HPA {hpa_name} has no scaling behavior configured") + continue + + # Check scale up behavior + scale_up = behavior.get("scaleUp", {}) + if scale_up: + stabilization = scale_up.get("stabilizationWindowSeconds", 0) + policies = scale_up.get("policies", []) + print( + f"✅ HPA {hpa_name} scale-up: {stabilization}s stabilization, {len(policies)} policies" + ) + + # Check scale down behavior + scale_down = behavior.get("scaleDown", {}) + if scale_down: + stabilization = scale_down.get("stabilizationWindowSeconds", 0) + policies = scale_down.get("policies", []) + print( + f"✅ HPA {hpa_name} scale-down: {stabilization}s stabilization, {len(policies)} policies" + ) + + +class TestRequestRateScaling: + """Test request rate-based autoscaling (when available).""" + + def test_custom_metrics_for_request_rate(self) -> None: + """Check if custom metrics for request rate scaling are available.""" + namespace = get_namespace() + + # Check if custom metrics API has request rate metrics + result = subprocess.run( + ["kubectl", "get", "--raw", "/apis/custom.metrics.k8s.io/v1beta1"], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + pytest.skip("Custom metrics API not available") + + api_response = json.loads(result.stdout) + resources = api_response.get("resources", []) + + # Look for nginx ingress controller metrics + request_rate_metrics = [ + r + for r in resources + if "nginx_ingress_controller" in r.get("name", "") + and "requests" in r.get("name", "") + ] + + if request_rate_metrics: + print(f"✅ Found {len(request_rate_metrics)} request rate metrics") + for metric in request_rate_metrics: + print(f" - {metric['name']}") + else: + print( + "⚠️ No request rate metrics available - may require ingress controller metrics configuration" + ) + + def test_hpa_request_rate_metrics(self) -> None: + """Verify HPA can access request rate metrics (when configured).""" + namespace = get_namespace() + result = kubectl_get("hpa", namespace=namespace) + + if result.returncode != 0: + pytest.skip("No HPA resources found") + + hpas = json.loads(result.stdout) + + for hpa in hpas["items"]: + hpa_name = hpa["metadata"]["name"] + status = hpa.get("status", {}) + current_metrics = status.get("currentMetrics", []) + + # Look for custom metrics (request rate) + custom_metrics = [ + m + for m in current_metrics + if m.get("type") in ["Pods", "Object"] + and "nginx_ingress_controller" in str(m) + ] + + if custom_metrics: + print(f"✅ HPA {hpa_name} has custom metrics available") + for metric in custom_metrics: + print(f" - {metric}") + else: + # Check if it's configured but not yet available + spec_metrics = hpa["spec"]["metrics"] + configured_custom = [ + m + for m in spec_metrics + if m.get("type") in ["Pods", "Object"] + ] + + if configured_custom: + print( + f"⚠️ HPA {hpa_name} has custom metrics configured but not available yet" + ) + else: + print( + f"ℹ️ HPA {hpa_name} uses only CPU metrics (no request rate scaling)" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8898da3c..a47c8a24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Automatic queue processor CronJob created when `use_queue` is "true" (configurable schedule via `queueProcessor.schedule`) - Automatic extent updater CronJob created when `update_collection_extent` is "false" (configurable schedule via `extentUpdater.schedule`) - Added ConfigMap checksum annotations to automatically restart pods when configuration changes [#344](https://github.com/developmentseed/eoapi-k8s/pull/344) +- Tests for autoscaling ### Changed diff --git a/charts/eoapi/tests/autoscaling_tests.yaml b/charts/eoapi/tests/autoscaling_tests.yaml new file mode 100644 index 00000000..18cd9452 --- /dev/null +++ b/charts/eoapi/tests/autoscaling_tests.yaml @@ -0,0 +1,241 @@ +suite: autoscaling tests +templates: + - templates/services/stac/hpa.yaml + - templates/services/raster/hpa.yaml + - templates/services/vector/hpa.yaml + - templates/services/multidim/hpa.yaml +tests: + - it: "autoscaling disabled by default" + set: + stac.autoscaling.enabled: false + raster.autoscaling.enabled: false + vector.autoscaling.enabled: false + multidim.autoscaling.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: "stac hpa not created when autoscaling disabled" + set: + stac.enabled: true + stac.autoscaling.enabled: false + template: templates/services/stac/hpa.yaml + asserts: + - hasDocuments: + count: 0 + + - it: "stac hpa created with cpu autoscaling" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "cpu" + stac.autoscaling.targets.cpu: 70 + template: templates/services/stac/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: metadata.name + value: "RELEASE-NAME-stac-hpa" + - equal: + path: spec.minReplicas + value: 1 + - isNotEmpty: + path: spec.maxReplicas + - equal: + path: spec.metrics[0].type + value: "Resource" + - equal: + path: spec.metrics[0].resource.name + value: "cpu" + - equal: + path: spec.metrics[0].resource.target.averageUtilization + value: 70 + + - it: "stac hpa created with request rate autoscaling" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "requestRate" + stac.autoscaling.targets.requestRate: "50000m" + template: templates/services/stac/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.minReplicas + value: 1 + - isNotEmpty: + path: spec.maxReplicas + - equal: + path: spec.metrics[0].type + value: "Pods" + - equal: + path: spec.metrics[0].pods.metric.name + value: "nginx_ingress_controller_requests" + - equal: + path: spec.metrics[0].pods.target.averageValue + value: "50000m" + + - it: "stac hpa created with both cpu and request rate autoscaling" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "both" + stac.autoscaling.targets.cpu: 70 + stac.autoscaling.targets.requestRate: "50000m" + template: templates/services/stac/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.metrics[0].type + value: "Resource" + - equal: + path: spec.metrics[0].resource.name + value: "cpu" + - equal: + path: spec.metrics[1].type + value: "Pods" + - equal: + path: spec.metrics[1].pods.metric.name + value: "nginx_ingress_controller_requests" + + - it: "raster hpa created with request rate autoscaling" + set: + raster.enabled: true + raster.autoscaling.enabled: true + raster.autoscaling.type: "requestRate" + raster.autoscaling.targets.requestRate: "30000m" + template: templates/services/raster/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.metrics[0].pods.metric.name + value: "nginx_ingress_controller_requests" + - equal: + path: spec.metrics[0].pods.target.averageValue + value: "30000m" + + - it: "vector hpa created with request rate autoscaling" + set: + vector.enabled: true + vector.autoscaling.enabled: true + vector.autoscaling.type: "requestRate" + vector.autoscaling.targets.requestRate: "40000m" + template: templates/services/vector/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.metrics[0].pods.metric.name + value: "nginx_ingress_controller_requests" + - equal: + path: spec.metrics[0].pods.target.averageValue + value: "40000m" + + - it: "multidim hpa not created when service disabled" + set: + multidim.enabled: false + multidim.autoscaling.enabled: true + template: templates/services/multidim/hpa.yaml + asserts: + - hasDocuments: + count: 0 + + - it: "multidim hpa created when enabled" + set: + multidim.enabled: true + multidim.autoscaling.enabled: true + multidim.autoscaling.type: "cpu" + multidim.autoscaling.targets.cpu: 80 + template: templates/services/multidim/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.metrics[0].resource.target.averageUtilization + value: 80 + + - it: "hpa scaleTargetRef points to correct deployment" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "cpu" + template: templates/services/stac/hpa.yaml + asserts: + - equal: + path: spec.scaleTargetRef.name + value: "RELEASE-NAME-stac" + - equal: + path: spec.scaleTargetRef.kind + value: "Deployment" + + - it: "hpa custom replica configuration" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "cpu" + stac.autoscaling.minReplicas: 2 + stac.autoscaling.maxReplicas: 20 + template: templates/services/stac/hpa.yaml + asserts: + - equal: + path: spec.minReplicas + value: 2 + - equal: + path: spec.maxReplicas + value: 20 + + - it: "hpa includes proper labels" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "cpu" + template: templates/services/stac/hpa.yaml + asserts: + - equal: + path: metadata.labels.app + value: "RELEASE-NAME-stac" + + - it: "hpa behavior configuration applied when set" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.type: "cpu" + stac.autoscaling.behavior.scaleUp.stabilizationWindowSeconds: 120 + stac.autoscaling.behavior.scaleDown.stabilizationWindowSeconds: 300 + template: templates/services/stac/hpa.yaml + asserts: + - equal: + path: spec.behavior.scaleUp.stabilizationWindowSeconds + value: 120 + - equal: + path: spec.behavior.scaleDown.stabilizationWindowSeconds + value: 300 + + - it: "stac hpa production configuration with higher minReplicas" + set: + stac.enabled: true + stac.autoscaling.enabled: true + stac.autoscaling.minReplicas: 2 + stac.autoscaling.maxReplicas: 20 + stac.autoscaling.type: "requestRate" + stac.autoscaling.targets.requestRate: "50000m" + template: templates/services/stac/hpa.yaml + asserts: + - isKind: + of: HorizontalPodAutoscaler + - equal: + path: spec.minReplicas + value: 2 + - equal: + path: spec.maxReplicas + value: 20 + - equal: + path: spec.metrics[0].type + value: "Pods" + - equal: + path: spec.metrics[0].pods.target.averageValue + value: "50000m" diff --git a/charts/eoapi/values.schema.json b/charts/eoapi/values.schema.json index ce836105..ca487fb6 100644 --- a/charts/eoapi/values.schema.json +++ b/charts/eoapi/values.schema.json @@ -528,7 +528,7 @@ "enum": ["cpu", "requestRate", "both"], "description": "Autoscaling metric type" }, - "behaviour": { + "behavior": { "type": "object", "description": "Autoscaling behavior configuration" }, diff --git a/docs/autoscaling.md b/docs/autoscaling.md index 56b6555b..fc2c2f14 100644 --- a/docs/autoscaling.md +++ b/docs/autoscaling.md @@ -167,6 +167,10 @@ vector: requestRate: 75000m ``` +## Configuration Examples + +For complete configuration examples, see the [examples directory](../examples/). + ## Resource Requirements ### Autoscaling Components diff --git a/docs/examples/values-autoscaling.yaml b/docs/examples/values-autoscaling.yaml new file mode 100644 index 00000000..e971946e --- /dev/null +++ b/docs/examples/values-autoscaling.yaml @@ -0,0 +1,208 @@ +# Example values for eoAPI with core monitoring and autoscaling enabled +# +# To use this configuration: +# +# 1. Update the ingress.host to your actual domain +# 2. Adjust scaling targets based on your load testing results +# 3. Monitor resource usage and adjust requests/limits accordingly +# 4. Consider enabling TLS for production deployments +# +# IMPORTANT: This configuration enables monitoring components that are +# disabled by default. This is required for autoscaling to work. +# +# For observability and dashboards, install the separate eoapi-observability chart: +# helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi +# +# Load testing recommendations: +# - Test each service endpoint individually +# - Monitor HPA metrics: kubectl get hpa -n eoapi -w +# - Check custom metrics: kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" +# - Review Prometheus targets to ensure metrics collection is working + +gitSha: "latest" + +###################### +# INGRESS +###################### +ingress: + enabled: true + className: "nginx" + # IMPORTANT: Set a proper hostname for metrics collection + # nginx ingress controller requires a specific host (not wildcard) to expose metrics + host: "your-eoapi.example.com" # Replace with your domain + tls: + enabled: true + secretName: eoapi-tls + +###################### +# DATABASE +###################### +# Using default PostgreSQL cluster configuration +postgrescluster: + enabled: true + instances: + - name: eoapi + replicas: 1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "50Gi" # Increased for production workloads + cpu: "2048m" # More CPU for database under load + memory: "4096Mi" # More memory for database performance + +###################### +# MONITORING & AUTOSCALING +###################### +# Essential monitoring components for autoscaling +monitoring: + metricsServer: + enabled: true + apiService: + create: true + prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: true + prometheus-node-exporter: + enabled: true + resources: + limits: + cpu: 10m + memory: 30Mi + requests: + cpu: 10m + memory: 30Mi + server: + service: + type: ClusterIP + +# Custom metrics for request-rate based autoscaling +prometheusAdapter: + enabled: true + +###################### +# SERVICE CONFIGURATION WITH AUTOSCALING +###################### + +# STAC API Service +stac: + enabled: true + autoscaling: + enabled: true + minReplicas: 2 # Start with 2 replicas for availability + maxReplicas: 20 # Scale up to handle high loads + type: "requestRate" # Scale based on request rate + behavior: + scaleDown: + stabilizationWindowSeconds: 300 # Wait 5 minutes before scaling down + scaleUp: + stabilizationWindowSeconds: 30 # Scale up quickly (30 seconds) + targets: + requestRate: 50000m # Scale when average > 50 requests/second + settings: + resources: + limits: + cpu: "1000m" + memory: "2048Mi" + requests: + cpu: "500m" # Higher baseline for autoscaling + memory: "1024Mi" + +# Raster Service (TiTiler) +raster: + enabled: true + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 15 + type: "requestRate" + behavior: + scaleDown: + stabilizationWindowSeconds: 180 # Scale down slower for raster (3 min) + scaleUp: + stabilizationWindowSeconds: 60 # Scale up moderately fast + targets: + requestRate: 30000m # Scale when average > 30 requests/second (raster is more resource intensive) + settings: + resources: + limits: + cpu: "1536m" # Raster processing needs more CPU + memory: "6144Mi" # Raster processing needs more memory + requests: + cpu: "768m" + memory: "3072Mi" + envVars: + # Optimized GDAL settings for autoscaling + GDAL_CACHEMAX: "512" # Increased cache for better performance + WEB_CONCURRENCY: "8" # More workers for higher throughput + +# Vector Service (TIPG) +vector: + enabled: true + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 10 + type: "requestRate" + behavior: + scaleDown: + stabilizationWindowSeconds: 240 + scaleUp: + stabilizationWindowSeconds: 45 + targets: + requestRate: 75000m # Vector is typically lighter, can handle more requests + settings: + resources: + limits: + cpu: "1000m" + memory: "2048Mi" + requests: + cpu: "512m" + memory: "1024Mi" + +# Multidimensional Service (optional) +multidim: + enabled: false # Disabled by default + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 8 + type: "requestRate" + targets: + requestRate: 25000m # Conservative scaling for multidim + settings: + resources: + limits: + cpu: "2048m" # Multidim can be very CPU intensive + memory: "8192Mi" # Large memory requirements for multidim data + requests: + cpu: "1024m" + memory: "4096Mi" + +###################### +# STAC BROWSER +###################### +browser: + enabled: true + replicaCount: 2 # Static replicas (browser is just static files) + +###################### +# PGSTAC BOOTSTRAP +###################### +pgstacBootstrap: + enabled: true + settings: + loadSamples: false # Disable sample data for production + resources: + requests: + cpu: "1024m" + memory: "2048Mi" + limits: + cpu: "1024m" + memory: "2048Mi" diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 40ed3726..fc49e8e5 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -425,6 +425,22 @@ deploy_eoapi() { HELM_CMD="$HELM_CMD --set eoapi-notifier.enabled=true" # Fix eoapi-notifier secret name dynamically HELM_CMD="$HELM_CMD --set eoapi-notifier.config.sources[0].config.connection.existingSecret.name=$RELEASE_NAME-pguser-eoapi" + # Enable autoscaling for CI tests + HELM_CMD="$HELM_CMD --set stac.autoscaling.enabled=true" + HELM_CMD="$HELM_CMD --set stac.autoscaling.type=cpu" + HELM_CMD="$HELM_CMD --set stac.autoscaling.targets.cpu=75" + HELM_CMD="$HELM_CMD --set stac.autoscaling.minReplicas=1" + HELM_CMD="$HELM_CMD --set stac.autoscaling.maxReplicas=3" + HELM_CMD="$HELM_CMD --set raster.autoscaling.enabled=true" + HELM_CMD="$HELM_CMD --set raster.autoscaling.type=cpu" + HELM_CMD="$HELM_CMD --set raster.autoscaling.targets.cpu=75" + HELM_CMD="$HELM_CMD --set raster.autoscaling.minReplicas=1" + HELM_CMD="$HELM_CMD --set raster.autoscaling.maxReplicas=3" + HELM_CMD="$HELM_CMD --set vector.autoscaling.enabled=true" + HELM_CMD="$HELM_CMD --set vector.autoscaling.type=cpu" + HELM_CMD="$HELM_CMD --set vector.autoscaling.targets.cpu=75" + HELM_CMD="$HELM_CMD --set vector.autoscaling.minReplicas=1" + HELM_CMD="$HELM_CMD --set vector.autoscaling.maxReplicas=3" elif [ -f "./eoapi/test-local-values.yaml" ]; then log_info "Using local test configuration..." HELM_CMD="$HELM_CMD -f ./eoapi/test-local-values.yaml"