diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 09b9420e..5cd285af 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -49,12 +49,20 @@ jobs:
     steps:
       - uses: actions/checkout@v5
 
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install Python dependencies
+        run: |
+          pip install pytest pytest-timeout requests types-requests psycopg2-binary
+
       - name: Start K3s cluster
         uses: jupyterhub/action-k3s-helm@v4
         with:
           k3s-channel: latest
           helm-version: ${{ env.HELM_VERSION }}
-          metrics-enabled: false
+          metrics-enabled: true
           docker-enabled: true
 
       - name: Set release name
@@ -81,6 +89,13 @@ jobs:
             [ $counter -ge $timeout ] && { echo "❌ Timeout waiting for $crd"; exit 1; }
           done
 
+          # Wait for metrics-server to be ready (required for HPA)
+          echo "Waiting for metrics-server..."
+          kubectl wait --for=condition=Ready pod -l k8s-app=metrics-server -n kube-system --timeout=300s || {
+            echo "⚠️ Metrics-server not ready, checking status..."
+            kubectl get pods -n kube-system -l k8s-app=metrics-server
+          }
+
           echo "✅ K3s cluster ready"
 
       - name: Deploy eoAPI
@@ -104,6 +119,11 @@ jobs:
           export RELEASE_NAME="$RELEASE_NAME"
           ./scripts/test.sh integration --debug
 
+      - name: Run autoscaling tests
+        run: |
+          export RELEASE_NAME="$RELEASE_NAME"
+          python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow"
+
       - name: Debug failed deployment
         if: failure()
         run: |
diff --git a/.github/workflows/tests/test_autoscaling.py b/.github/workflows/tests/test_autoscaling.py
new file mode 100644
index 00000000..463bc14c
--- /dev/null
+++ b/.github/workflows/tests/test_autoscaling.py
@@ -0,0 +1,649 @@
+"""Test autoscaling behavior and HPA functionality."""
+
+import json
+import os
+import subprocess
+import threading
+import time
+from typing import Any, Dict, List, Optional, cast
+
+import pytest
+import requests
+
+
+def get_namespace() -> str:
+    return os.environ.get("NAMESPACE", "eoapi")
+
+
+def get_release_name() -> str:
+    return os.environ.get("RELEASE_NAME", "eoapi")
+
+
+def get_base_url() -> str:
+    namespace = get_namespace()
+
+    # Check if we have an ingress
+    result = subprocess.run(
+        ["kubectl", "get", "ingress", "-n", namespace, "-o", "json"],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode == 0:
+        ingress_data = json.loads(result.stdout)
+        if ingress_data["items"]:
+            ingress = ingress_data["items"][0]
+            rules = ingress.get("spec", {}).get("rules", [])
+            if rules:
+                host = rules[0].get("host", "localhost")
+                # Check if host is accessible
+                try:
+                    response = requests.get(
+                        f"http://{host}/stac/collections", timeout=5
+                    )
+                    if response.status_code == 200:
+                        return f"http://{host}"
+                except requests.RequestException:
+                    pass
+
+    return "http://localhost:8080"
+
+
+def kubectl_get(
+    resource: str,
+    namespace: Optional[str] = None,
+    label_selector: Optional[str] = None,
+    output: str = "json",
+) -> subprocess.CompletedProcess[str]:
+    cmd = ["kubectl", "get", resource]
+
+    if namespace:
+        cmd.extend(["-n", namespace])
+
+    if label_selector:
+        cmd.extend(["-l", label_selector])
+
+    if output:
+        cmd.extend(["-o", output])
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return result
+
+
+def get_pod_metrics(namespace: str, service_name: str) -> List[Dict[str, str]]:
+    release_name = get_release_name()
+    result = subprocess.run(
+        [
+            "kubectl",
+            "top",
+            "pods",
+            "-n",
+            namespace,
+            "-l",
+            f"app={release_name}-{service_name}",
+            "--no-headers",
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        return []
+
+    metrics: List[Dict[str, str]] = []
+    for line in result.stdout.strip().split("\n"):
+        if line.strip():
+            parts = line.split()
+            if len(parts) >= 3:
+                pod_name = parts[0]
+                cpu = parts[1]  # e.g., "25m"
+                memory = parts[2]  # e.g., "128Mi"
+                metrics.append({"pod": pod_name, "cpu": cpu, "memory": memory})
+
+    return metrics
+
+
+def get_hpa_status(namespace: str, hpa_name: str) -> Optional[Dict[str, Any]]:
+    """Get HPA status for a specific HPA."""
+    result = kubectl_get("hpa", namespace=namespace, output="json")
+    if result.returncode != 0:
+        return None
+
+    hpas = json.loads(result.stdout)
+    for hpa in hpas["items"]:
+        if hpa["metadata"]["name"] == hpa_name:
+            return cast(Dict[str, Any], hpa)
+
+    return None
+
+
+def get_pod_count(namespace: str, service_name: str) -> int:
+    release_name = get_release_name()
+    result = kubectl_get(
+        "pods",
+        namespace=namespace,
+        label_selector=f"app={release_name}-{service_name}",
+    )
+
+    if result.returncode != 0:
+        return 0
+
+    pods = json.loads(result.stdout)
+    running_pods = [
+        pod for pod in pods["items"] if pod["status"]["phase"] == "Running"
+    ]
+
+    return len(running_pods)
+
+
+def make_request(url: str, timeout: int = 10) -> bool:
+    """Make a single HTTP request and return success status."""
+    try:
+        response = requests.get(url, timeout=timeout)
+        return response.status_code == 200
+    except requests.RequestException:
+        return False
+
+
+def generate_load(
+    base_url: str,
+    endpoints: List[str],
+    duration: int = 60,
+    concurrent_requests: int = 5,
+    delay: float = 0.1,
+) -> Dict[str, Any]:
+    """Generate HTTP load against specified endpoints."""
+    end_time = time.time() + duration
+    success_count = 0
+    error_count = 0
+
+    def worker() -> None:
+        nonlocal success_count, error_count
+        while time.time() < end_time:
+            for endpoint in endpoints:
+                url = f"{base_url}{endpoint}"
+                if make_request(url):
+                    success_count += 1
+                else:
+                    error_count += 1
+                time.sleep(delay)
+
+    # Start concurrent workers
+    threads = []
+    for _ in range(concurrent_requests):
+        thread = threading.Thread(target=worker)
+        thread.start()
+        threads.append(thread)
+
+    # Wait for all threads to complete
+    for thread in threads:
+        thread.join()
+
+    return {
+        "total_requests": success_count + error_count,
+        "successful_requests": success_count,
+        "failed_requests": error_count,
+        "success_rate": success_count / (success_count + error_count)
+        if (success_count + error_count) > 0
+        else 0,
+    }
+
+
+class TestHPAConfiguration:
+    def test_hpa_resources_properly_configured(self) -> None:
+        namespace = get_namespace()
+        result = kubectl_get("hpa", namespace=namespace)
+
+        if result.returncode != 0:
+            pytest.skip("No HPA resources found - autoscaling not enabled")
+
+        hpas = json.loads(result.stdout)
+        assert len(hpas["items"]) > 0, "No HPA resources configured"
+
+        for hpa in hpas["items"]:
+            spec = hpa["spec"]
+            hpa_name = hpa["metadata"]["name"]
+
+            assert "scaleTargetRef" in spec, (
+                f"HPA {hpa_name} missing scaleTargetRef"
+            )
+            assert "minReplicas" in spec, f"HPA {hpa_name} missing minReplicas"
+            assert "maxReplicas" in spec, f"HPA {hpa_name} missing maxReplicas"
+            assert "metrics" in spec, (
+                f"HPA {hpa_name} missing metrics configuration"
+            )
+
+            min_replicas = spec["minReplicas"]
+            max_replicas = spec["maxReplicas"]
+            assert min_replicas > 0, f"HPA {hpa_name} minReplicas must be > 0"
+            assert max_replicas > min_replicas, (
+                f"HPA {hpa_name} maxReplicas must be > minReplicas"
+            )
+
+            metrics = spec["metrics"]
+            assert len(metrics) > 0, f"HPA {hpa_name} has no metrics configured"
+
+            cpu_metrics = [
+                m
+                for m in metrics
+                if m.get("type") == "Resource"
+                and m.get("resource", {}).get("name") == "cpu"
+            ]
+            assert len(cpu_metrics) > 0, (
+                f"HPA {hpa_name} must have CPU metric configured"
+            )
+
+            print(
+                f"✅ HPA {hpa_name}: {min_replicas}-{max_replicas} replicas, {len(metrics)} metrics"
+            )
+
+    def test_target_deployments_exist(self) -> None:
+        namespace = get_namespace()
+        result = kubectl_get("hpa", namespace=namespace)
+
+        if result.returncode != 0:
+            pytest.skip("No HPA resources found")
+
+        hpas = json.loads(result.stdout)
+
+        for hpa in hpas["items"]:
+            target_ref = hpa["spec"]["scaleTargetRef"]
+            target_name = target_ref["name"]
+            hpa_name = hpa["metadata"]["name"]
+
+            # Check target deployment exists
+            deploy_result = kubectl_get(
+                "deployment", namespace=namespace, output="json"
+            )
+            assert deploy_result.returncode == 0, "Cannot list deployments"
+
+            deployments = json.loads(deploy_result.stdout)
+            target_deployment = next(
+                (
+                    d
+                    for d in deployments["items"]
+                    if d["metadata"]["name"] == target_name
+                ),
+                None,
+            )
+
+            assert target_deployment is not None, (
+                f"HPA {hpa_name} target deployment {target_name} not found"
+            )
+
+            # Check deployment has ready replicas
+            status = target_deployment.get("status", {})
+            ready_replicas = status.get("readyReplicas", 0)
+            assert ready_replicas > 0, (
+                f"Target deployment {target_name} has no ready replicas"
+            )
+
+            print(
+                f"✅ HPA {hpa_name} target deployment {target_name} is ready ({ready_replicas} replicas)"
+            )
+
+
+class TestCPUScaling:
+    def test_cpu_metrics_collection(self) -> None:
+        """Verify CPU metrics are being collected for HPA targets."""
+        namespace = get_namespace()
+        services = ["stac", "raster", "vector"]
+
+        metrics_available = []
+
+        for service in services:
+            try:
+                pod_metrics = get_pod_metrics(namespace, service)
+                if pod_metrics:
+                    metrics_available.append(service)
+                    for metric in pod_metrics:
+                        print(
+                            f"✅ {service} pod {metric['pod']}: CPU={metric['cpu']}, Memory={metric['memory']}"
+                        )
+            except Exception as e:
+                print(f"⚠️  Cannot get metrics for {service}: {e}")
+
+        assert len(metrics_available) > 0, (
+            "No CPU metrics available for any service"
+        )
+
+    def test_hpa_cpu_utilization_calculation(self) -> None:
+        """Verify HPA calculates CPU utilization correctly."""
+        namespace = get_namespace()
+        result = kubectl_get("hpa", namespace=namespace)
+
+        if result.returncode != 0:
+            pytest.skip("No HPA resources found")
+
+        hpas = json.loads(result.stdout)
+
+        for hpa in hpas["items"]:
+            hpa_name = hpa["metadata"]["name"]
+            status = hpa.get("status", {})
+
+            # Check if HPA has current metrics
+            current_metrics = status.get("currentMetrics", [])
+            cpu_metrics = [
+                m
+                for m in current_metrics
+                if m.get("type") == "Resource"
+                and m.get("resource", {}).get("name") == "cpu"
+            ]
+
+            if cpu_metrics:
+                cpu_utilization = cpu_metrics[0]["resource"]["current"].get(
+                    "averageUtilization"
+                )
+                if cpu_utilization is not None:
+                    assert 0 <= cpu_utilization <= 1000, (
+                        f"Invalid CPU utilization: {cpu_utilization}%"
+                    )
+                    print(
+                        f"✅ HPA {hpa_name} CPU utilization: {cpu_utilization}%"
+                    )
+                else:
+                    print(
+                        f"⚠️  HPA {hpa_name} CPU metric exists but no utilization value"
+                    )
+            else:
+                # Check conditions for why metrics might not be available
+                conditions = status.get("conditions", [])
+                for condition in conditions:
+                    if (
+                        condition["type"] == "ScalingActive"
+                        and condition["status"] == "False"
+                    ):
+                        print(
+                            f"⚠️  HPA {hpa_name} scaling not active: {condition.get('message', 'Unknown reason')}"
+                        )
+                        break
+                else:
+                    print(f"⚠️  HPA {hpa_name} no CPU metrics available yet")
+
+    def test_cpu_resource_requests_alignment(self) -> None:
+        """Verify CPU resource requests are properly set for percentage calculations."""
+        namespace = get_namespace()
+        services = ["stac", "raster", "vector"]
+
+        for service in services:
+            release_name = get_release_name()
+            result = kubectl_get(
+                "pods",
+                namespace=namespace,
+                label_selector=f"app={release_name}-{service}",
+            )
+
+            if result.returncode != 0:
+                continue
+
+            pods = json.loads(result.stdout)
+            running_pods = [
+                p for p in pods["items"] if p["status"]["phase"] == "Running"
+            ]
+
+            if not running_pods:
+                continue
+
+            pod = running_pods[0]  # Check first running pod
+            containers = pod["spec"]["containers"]
+
+            main_container = next(
+                (c for c in containers if c["name"] == service), None
+            )
+            if not main_container:
+                continue
+
+            resources = main_container.get("resources", {})
+            requests = resources.get("requests", {})
+
+            if "cpu" not in requests:
+                print(
+                    f"⚠️  Service {service} missing CPU requests - HPA percentage calculation may be inaccurate"
+                )
+                continue
+
+            cpu_request = requests["cpu"]
+            print(f"✅ Service {service} CPU request: {cpu_request}")
+
+            # Parse CPU request to verify it's reasonable
+            if cpu_request.endswith("m"):
+                cpu_millicores = int(cpu_request[:-1])
+                assert cpu_millicores > 0, (
+                    f"Service {service} has zero CPU request"
+                )
+                assert cpu_millicores <= 2000, (
+                    f"Service {service} has very high CPU request: {cpu_millicores}m"
+                )
+
+
+class TestScalingBehavior:
+    """Test actual scaling behavior under load."""
+
+    @pytest.mark.slow
+    def test_load_response_scaling(self) -> None:
+        """Generate load and verify scaling response (when possible)."""
+        namespace = get_namespace()
+        base_url = get_base_url()
+
+        # Test endpoints that should generate CPU load
+        load_endpoints = [
+            "/stac/collections",
+            "/stac/search?collections=noaa-emergency-response&limit=50",
+            "/raster/collections",
+            "/vector/collections",
+        ]
+
+        # Check initial state
+        initial_pod_counts: Dict[str, int] = {}
+        services = ["stac", "raster", "vector"]
+
+        for service in services:
+            initial_pod_counts[service] = get_pod_count(namespace, service)
+
+        print(f"Initial pod counts: {initial_pod_counts}")
+
+        # Skip test if we can't connect to services
+        try:
+            response = requests.get(f"{base_url}/stac/collections", timeout=5)
+            if response.status_code != 200:
+                pytest.skip("Cannot access API endpoints for load testing")
+        except requests.RequestException:
+            pytest.skip("API endpoints not accessible for load testing")
+
+        # Generate moderate load for limited time (suitable for CI)
+        load_duration = 90  # 1.5 minutes
+        concurrent_requests = 8
+
+        print(
+            f"Generating load: {concurrent_requests} concurrent requests for {load_duration}s"
+        )
+
+        # Start load generation
+        load_stats = generate_load(
+            base_url=base_url,
+            endpoints=load_endpoints,
+            duration=load_duration,
+            concurrent_requests=concurrent_requests,
+            delay=0.05,  # 20 requests/second per worker
+        )
+
+        print(f"Load test completed: {load_stats}")
+
+        # Wait a bit for metrics to propagate and scaling to potentially occur
+        print("Waiting for metrics to propagate and potential scaling...")
+        time.sleep(30)
+
+        # Check final state
+        final_pod_counts: Dict[str, int] = {}
+        for service in services:
+            final_pod_counts[service] = get_pod_count(namespace, service)
+
+        print(f"Final pod counts: {final_pod_counts}")
+
+        # Check HPA metrics after load
+        result = kubectl_get("hpa", namespace=namespace)
+        if result.returncode == 0:
+            hpas = json.loads(result.stdout)
+            for hpa in hpas["items"]:
+                hpa_name = hpa["metadata"]["name"]
+                status = hpa.get("status", {})
+                current_metrics = status.get("currentMetrics", [])
+
+                cpu_metrics = [
+                    m
+                    for m in current_metrics
+                    if m.get("type") == "Resource"
+                    and m.get("resource", {}).get("name") == "cpu"
+                ]
+
+                if cpu_metrics:
+                    cpu_utilization = cpu_metrics[0]["resource"]["current"].get(
+                        "averageUtilization"
+                    )
+                    print(f"Post-load HPA {hpa_name} CPU: {cpu_utilization}%")
+
+        assert load_stats["success_rate"] > 0.8, (
+            f"Load test had low success rate: {load_stats['success_rate']:.2%}"
+        )
+        assert load_stats["total_requests"] > 100, (
+            "Load test generated insufficient requests"
+        )
+
+        # Note: In CI environments with limited resources, actual scaling may not occur
+        # The important thing is that the system handled the load successfully
+        scaling_occurred = any(
+            final_pod_counts[svc] > initial_pod_counts[svc]
+            for svc in services
+            if svc in initial_pod_counts and svc in final_pod_counts
+        )
+
+        if scaling_occurred:
+            print("✅ Scaling occurred during load test")
+        else:
+            print(
+                "⚠️  No scaling occurred - may be due to CI resource constraints or low load thresholds"
+            )
+
+    def test_scaling_stabilization_windows(self) -> None:
+        """Verify HPA respects stabilization windows in configuration."""
+        namespace = get_namespace()
+        result = kubectl_get("hpa", namespace=namespace)
+
+        if result.returncode != 0:
+            pytest.skip("No HPA resources found")
+
+        hpas = json.loads(result.stdout)
+
+        for hpa in hpas["items"]:
+            hpa_name = hpa["metadata"]["name"]
+            spec = hpa["spec"]
+
+            behavior = spec.get("behavior", {})
+            if not behavior:
+                print(f"⚠️  HPA {hpa_name} has no scaling behavior configured")
+                continue
+
+            # Check scale up behavior
+            scale_up = behavior.get("scaleUp", {})
+            if scale_up:
+                stabilization = scale_up.get("stabilizationWindowSeconds", 0)
+                policies = scale_up.get("policies", [])
+                print(
+                    f"✅ HPA {hpa_name} scale-up: {stabilization}s stabilization, {len(policies)} policies"
+                )
+
+            # Check scale down behavior
+            scale_down = behavior.get("scaleDown", {})
+            if scale_down:
+                stabilization = scale_down.get("stabilizationWindowSeconds", 0)
+                policies = scale_down.get("policies", [])
+                print(
+                    f"✅ HPA {hpa_name} scale-down: {stabilization}s stabilization, {len(policies)} policies"
+                )
+
+
+class TestRequestRateScaling:
+    """Test request rate-based autoscaling (when available)."""
+
+    def test_custom_metrics_for_request_rate(self) -> None:
+        """Check if custom metrics for request rate scaling are available."""
+        namespace = get_namespace()
+
+        # Check if custom metrics API has request rate metrics
+        result = subprocess.run(
+            ["kubectl", "get", "--raw", "/apis/custom.metrics.k8s.io/v1beta1"],
+            capture_output=True,
+            text=True,
+        )
+
+        if result.returncode != 0:
+            pytest.skip("Custom metrics API not available")
+
+        api_response = json.loads(result.stdout)
+        resources = api_response.get("resources", [])
+
+        # Look for nginx ingress controller metrics
+        request_rate_metrics = [
+            r
+            for r in resources
+            if "nginx_ingress_controller" in r.get("name", "")
+            and "requests" in r.get("name", "")
+        ]
+
+        if request_rate_metrics:
+            print(f"✅ Found {len(request_rate_metrics)} request rate metrics")
+            for metric in request_rate_metrics:
+                print(f"  - {metric['name']}")
+        else:
+            print(
+                "⚠️  No request rate metrics available - may require ingress controller metrics configuration"
+            )
+
+    def test_hpa_request_rate_metrics(self) -> None:
+        """Verify HPA can access request rate metrics (when configured)."""
+        namespace = get_namespace()
+        result = kubectl_get("hpa", namespace=namespace)
+
+        if result.returncode != 0:
+            pytest.skip("No HPA resources found")
+
+        hpas = json.loads(result.stdout)
+
+        for hpa in hpas["items"]:
+            hpa_name = hpa["metadata"]["name"]
+            status = hpa.get("status", {})
+            current_metrics = status.get("currentMetrics", [])
+
+            # Look for custom metrics (request rate)
+            custom_metrics = [
+                m
+                for m in current_metrics
+                if m.get("type") in ["Pods", "Object"]
+                and "nginx_ingress_controller" in str(m)
+            ]
+
+            if custom_metrics:
+                print(f"✅ HPA {hpa_name} has custom metrics available")
+                for metric in custom_metrics:
+                    print(f"  - {metric}")
+            else:
+                # Check if it's configured but not yet available
+                spec_metrics = hpa["spec"]["metrics"]
+                configured_custom = [
+                    m
+                    for m in spec_metrics
+                    if m.get("type") in ["Pods", "Object"]
+                ]
+
+                if configured_custom:
+                    print(
+                        f"⚠️  HPA {hpa_name} has custom metrics configured but not available yet"
+                    )
+                else:
+                    print(
+                        f"ℹ️  HPA {hpa_name} uses only CPU metrics (no request rate scaling)"
+                    )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8898da3c..a47c8a24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - Automatic queue processor CronJob created when `use_queue` is "true" (configurable schedule via `queueProcessor.schedule`)
   - Automatic extent updater CronJob created when `update_collection_extent` is "false" (configurable schedule via `extentUpdater.schedule`)
 - Added ConfigMap checksum annotations to automatically restart pods when configuration changes [#344](https://github.com/developmentseed/eoapi-k8s/pull/344)
+- Tests for autoscaling
 
 ### Changed
 
diff --git a/charts/eoapi/tests/autoscaling_tests.yaml b/charts/eoapi/tests/autoscaling_tests.yaml
new file mode 100644
index 00000000..18cd9452
--- /dev/null
+++ b/charts/eoapi/tests/autoscaling_tests.yaml
@@ -0,0 +1,241 @@
+suite: autoscaling tests
+templates:
+  - templates/services/stac/hpa.yaml
+  - templates/services/raster/hpa.yaml
+  - templates/services/vector/hpa.yaml
+  - templates/services/multidim/hpa.yaml
+tests:
+  - it: "autoscaling disabled by default"
+    set:
+      stac.autoscaling.enabled: false
+      raster.autoscaling.enabled: false
+      vector.autoscaling.enabled: false
+      multidim.autoscaling.enabled: false
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: "stac hpa not created when autoscaling disabled"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: false
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: "stac hpa created with cpu autoscaling"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "cpu"
+      stac.autoscaling.targets.cpu: 70
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: metadata.name
+          value: "RELEASE-NAME-stac-hpa"
+      - equal:
+          path: spec.minReplicas
+          value: 1
+      - isNotEmpty:
+          path: spec.maxReplicas
+      - equal:
+          path: spec.metrics[0].type
+          value: "Resource"
+      - equal:
+          path: spec.metrics[0].resource.name
+          value: "cpu"
+      - equal:
+          path: spec.metrics[0].resource.target.averageUtilization
+          value: 70
+
+  - it: "stac hpa created with request rate autoscaling"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "requestRate"
+      stac.autoscaling.targets.requestRate: "50000m"
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.minReplicas
+          value: 1
+      - isNotEmpty:
+          path: spec.maxReplicas
+      - equal:
+          path: spec.metrics[0].type
+          value: "Pods"
+      - equal:
+          path: spec.metrics[0].pods.metric.name
+          value: "nginx_ingress_controller_requests"
+      - equal:
+          path: spec.metrics[0].pods.target.averageValue
+          value: "50000m"
+
+  - it: "stac hpa created with both cpu and request rate autoscaling"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "both"
+      stac.autoscaling.targets.cpu: 70
+      stac.autoscaling.targets.requestRate: "50000m"
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.metrics[0].type
+          value: "Resource"
+      - equal:
+          path: spec.metrics[0].resource.name
+          value: "cpu"
+      - equal:
+          path: spec.metrics[1].type
+          value: "Pods"
+      - equal:
+          path: spec.metrics[1].pods.metric.name
+          value: "nginx_ingress_controller_requests"
+
+  - it: "raster hpa created with request rate autoscaling"
+    set:
+      raster.enabled: true
+      raster.autoscaling.enabled: true
+      raster.autoscaling.type: "requestRate"
+      raster.autoscaling.targets.requestRate: "30000m"
+    template: templates/services/raster/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.metrics[0].pods.metric.name
+          value: "nginx_ingress_controller_requests"
+      - equal:
+          path: spec.metrics[0].pods.target.averageValue
+          value: "30000m"
+
+  - it: "vector hpa created with request rate autoscaling"
+    set:
+      vector.enabled: true
+      vector.autoscaling.enabled: true
+      vector.autoscaling.type: "requestRate"
+      vector.autoscaling.targets.requestRate: "40000m"
+    template: templates/services/vector/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.metrics[0].pods.metric.name
+          value: "nginx_ingress_controller_requests"
+      - equal:
+          path: spec.metrics[0].pods.target.averageValue
+          value: "40000m"
+
+  - it: "multidim hpa not created when service disabled"
+    set:
+      multidim.enabled: false
+      multidim.autoscaling.enabled: true
+    template: templates/services/multidim/hpa.yaml
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: "multidim hpa created when enabled"
+    set:
+      multidim.enabled: true
+      multidim.autoscaling.enabled: true
+      multidim.autoscaling.type: "cpu"
+      multidim.autoscaling.targets.cpu: 80
+    template: templates/services/multidim/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.metrics[0].resource.target.averageUtilization
+          value: 80
+
+  - it: "hpa scaleTargetRef points to correct deployment"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "cpu"
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - equal:
+          path: spec.scaleTargetRef.name
+          value: "RELEASE-NAME-stac"
+      - equal:
+          path: spec.scaleTargetRef.kind
+          value: "Deployment"
+
+  - it: "hpa custom replica configuration"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "cpu"
+      stac.autoscaling.minReplicas: 2
+      stac.autoscaling.maxReplicas: 20
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - equal:
+          path: spec.minReplicas
+          value: 2
+      - equal:
+          path: spec.maxReplicas
+          value: 20
+
+  - it: "hpa includes proper labels"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "cpu"
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - equal:
+          path: metadata.labels.app
+          value: "RELEASE-NAME-stac"
+
+  - it: "hpa behavior configuration applied when set"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.type: "cpu"
+      stac.autoscaling.behavior.scaleUp.stabilizationWindowSeconds: 120
+      stac.autoscaling.behavior.scaleDown.stabilizationWindowSeconds: 300
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - equal:
+          path: spec.behavior.scaleUp.stabilizationWindowSeconds
+          value: 120
+      - equal:
+          path: spec.behavior.scaleDown.stabilizationWindowSeconds
+          value: 300
+
+  - it: "stac hpa production configuration with higher minReplicas"
+    set:
+      stac.enabled: true
+      stac.autoscaling.enabled: true
+      stac.autoscaling.minReplicas: 2
+      stac.autoscaling.maxReplicas: 20
+      stac.autoscaling.type: "requestRate"
+      stac.autoscaling.targets.requestRate: "50000m"
+    template: templates/services/stac/hpa.yaml
+    asserts:
+      - isKind:
+          of: HorizontalPodAutoscaler
+      - equal:
+          path: spec.minReplicas
+          value: 2
+      - equal:
+          path: spec.maxReplicas
+          value: 20
+      - equal:
+          path: spec.metrics[0].type
+          value: "Pods"
+      - equal:
+          path: spec.metrics[0].pods.target.averageValue
+          value: "50000m"
diff --git a/charts/eoapi/values.schema.json b/charts/eoapi/values.schema.json
index ce836105..ca487fb6 100644
--- a/charts/eoapi/values.schema.json
+++ b/charts/eoapi/values.schema.json
@@ -528,7 +528,7 @@
               "enum": ["cpu", "requestRate", "both"],
               "description": "Autoscaling metric type"
             },
-            "behaviour": {
+            "behavior": {
               "type": "object",
               "description": "Autoscaling behavior configuration"
             },
diff --git a/docs/autoscaling.md b/docs/autoscaling.md
index 56b6555b..fc2c2f14 100644
--- a/docs/autoscaling.md
+++ b/docs/autoscaling.md
@@ -167,6 +167,10 @@ vector:
       requestRate: 75000m
 ```
 
+## Configuration Examples
+
+For complete configuration examples, see the [examples directory](../examples/).
+
 ## Resource Requirements
 
 ### Autoscaling Components
diff --git a/docs/examples/values-autoscaling.yaml b/docs/examples/values-autoscaling.yaml
new file mode 100644
index 00000000..e971946e
--- /dev/null
+++ b/docs/examples/values-autoscaling.yaml
@@ -0,0 +1,208 @@
+# Example values for eoAPI with core monitoring and autoscaling enabled
+#
+# To use this configuration:
+#
+# 1. Update the ingress.host to your actual domain
+# 2. Adjust scaling targets based on your load testing results
+# 3. Monitor resource usage and adjust requests/limits accordingly
+# 4. Consider enabling TLS for production deployments
+#
+# IMPORTANT: This configuration enables monitoring components that are
+# disabled by default. This is required for autoscaling to work.
+#
+# For observability and dashboards, install the separate eoapi-observability chart:
+# helm install eoapi-obs eoapi/eoapi-observability --namespace eoapi
+#
+# Load testing recommendations:
+# - Test each service endpoint individually
+# - Monitor HPA metrics: kubectl get hpa -n eoapi -w
+# - Check custom metrics: kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1"
+# - Review Prometheus targets to ensure metrics collection is working
+
+gitSha: "latest"
+
+######################
+# INGRESS
+######################
+ingress:
+  enabled: true
+  className: "nginx"
+  # IMPORTANT: Set a proper hostname for metrics collection
+  # nginx ingress controller requires a specific host (not wildcard) to expose metrics
+  host: "your-eoapi.example.com"  # Replace with your domain
+  tls:
+    enabled: true
+    secretName: eoapi-tls
+
+######################
+# DATABASE
+######################
+# Using default PostgreSQL cluster configuration
+postgrescluster:
+  enabled: true
+  instances:
+  - name: eoapi
+    replicas: 1
+    dataVolumeClaimSpec:
+      accessModes:
+      - "ReadWriteOnce"
+      resources:
+        requests:
+          storage: "50Gi"  # Increased for production workloads
+          cpu: "2048m"     # More CPU for database under load
+          memory: "4096Mi" # More memory for database performance
+
+######################
+# MONITORING & AUTOSCALING
+######################
+# Essential monitoring components for autoscaling
+monitoring:
+  metricsServer:
+    enabled: true
+    apiService:
+      create: true
+  prometheus:
+    enabled: true
+    alertmanager:
+      enabled: false
+    prometheus-pushgateway:
+      enabled: false
+    kube-state-metrics:
+      enabled: true
+    prometheus-node-exporter:
+      enabled: true
+      resources:
+        limits:
+          cpu: 10m
+          memory: 30Mi
+        requests:
+          cpu: 10m
+          memory: 30Mi
+    server:
+      service:
+        type: ClusterIP
+
+# Custom metrics for request-rate based autoscaling
+prometheusAdapter:
+  enabled: true
+
+######################
+# SERVICE CONFIGURATION WITH AUTOSCALING
+######################
+
+# STAC API Service
+stac:
+  enabled: true
+  autoscaling:
+    enabled: true
+    minReplicas: 2      # Start with 2 replicas for availability
+    maxReplicas: 20     # Scale up to handle high loads
+    type: "requestRate" # Scale based on request rate
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 300  # Wait 5 minutes before scaling down
+      scaleUp:
+        stabilizationWindowSeconds: 30   # Scale up quickly (30 seconds)
+    targets:
+      requestRate: 50000m  # Scale when average > 50 requests/second
+  settings:
+    resources:
+      limits:
+        cpu: "1000m"
+        memory: "2048Mi"
+      requests:
+        cpu: "500m"      # Higher baseline for autoscaling
+        memory: "1024Mi"
+
+# Raster Service (TiTiler)
+raster:
+  enabled: true
+  autoscaling:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 15
+    type: "requestRate"
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 180  # Scale down slower for raster (3 min)
+      scaleUp:
+        stabilizationWindowSeconds: 60   # Scale up moderately fast
+    targets:
+      requestRate: 30000m  # Scale when average > 30 requests/second (raster is more resource intensive)
+  settings:
+    resources:
+      limits:
+        cpu: "1536m"     # Raster processing needs more CPU
+        memory: "6144Mi" # Raster processing needs more memory
+      requests:
+        cpu: "768m"
+        memory: "3072Mi"
+    envVars:
+      # Optimized GDAL settings for autoscaling
+      GDAL_CACHEMAX: "512"  # Increased cache for better performance
+      WEB_CONCURRENCY: "8"  # More workers for higher throughput
+
+# Vector Service (TIPG)
+vector:
+  enabled: true
+  autoscaling:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 10
+    type: "requestRate"
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 240
+      scaleUp:
+        stabilizationWindowSeconds: 45
+    targets:
+      requestRate: 75000m  # Vector is typically lighter, can handle more requests
+  settings:
+    resources:
+      limits:
+        cpu: "1000m"
+        memory: "2048Mi"
+      requests:
+        cpu: "512m"
+        memory: "1024Mi"
+
+# Multidimensional Service (optional)
+multidim:
+  enabled: false  # Disabled by default
+  autoscaling:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 8
+    type: "requestRate"
+    targets:
+      requestRate: 25000m  # Conservative scaling for multidim
+  settings:
+    resources:
+      limits:
+        cpu: "2048m"     # Multidim can be very CPU intensive
+        memory: "8192Mi" # Large memory requirements for multidim data
+      requests:
+        cpu: "1024m"
+        memory: "4096Mi"
+
+######################
+# STAC BROWSER
+######################
+browser:
+  enabled: true
+  replicaCount: 2  # Static replicas (browser is just static files)
+
+######################
+# PGSTAC BOOTSTRAP
+######################
+pgstacBootstrap:
+  enabled: true
+  settings:
+    loadSamples: false  # Disable sample data for production
+    resources:
+      requests:
+        cpu: "1024m"
+        memory: "2048Mi"
+      limits:
+        cpu: "1024m"
+        memory: "2048Mi"
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index 40ed3726..fc49e8e5 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -425,6 +425,22 @@ deploy_eoapi() {
         HELM_CMD="$HELM_CMD --set eoapi-notifier.enabled=true"
         # Fix eoapi-notifier secret name dynamically
         HELM_CMD="$HELM_CMD --set eoapi-notifier.config.sources[0].config.connection.existingSecret.name=$RELEASE_NAME-pguser-eoapi"
+        # Enable autoscaling for CI tests
+        HELM_CMD="$HELM_CMD --set stac.autoscaling.enabled=true"
+        HELM_CMD="$HELM_CMD --set stac.autoscaling.type=cpu"
+        HELM_CMD="$HELM_CMD --set stac.autoscaling.targets.cpu=75"
+        HELM_CMD="$HELM_CMD --set stac.autoscaling.minReplicas=1"
+        HELM_CMD="$HELM_CMD --set stac.autoscaling.maxReplicas=3"
+        HELM_CMD="$HELM_CMD --set raster.autoscaling.enabled=true"
+        HELM_CMD="$HELM_CMD --set raster.autoscaling.type=cpu"
+        HELM_CMD="$HELM_CMD --set raster.autoscaling.targets.cpu=75"
+        HELM_CMD="$HELM_CMD --set raster.autoscaling.minReplicas=1"
+        HELM_CMD="$HELM_CMD --set raster.autoscaling.maxReplicas=3"
+        HELM_CMD="$HELM_CMD --set vector.autoscaling.enabled=true"
+        HELM_CMD="$HELM_CMD --set vector.autoscaling.type=cpu"
+        HELM_CMD="$HELM_CMD --set vector.autoscaling.targets.cpu=75"
+        HELM_CMD="$HELM_CMD --set vector.autoscaling.minReplicas=1"
+        HELM_CMD="$HELM_CMD --set vector.autoscaling.maxReplicas=3"
     elif [ -f "./eoapi/test-local-values.yaml" ]; then
         log_info "Using local test configuration..."
         HELM_CMD="$HELM_CMD -f ./eoapi/test-local-values.yaml"