i-am-bee
diff --git a/‎.github/workflows/integration-test.yml‎
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/integration-test.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎apps/agentstack-server/.vscode/launch.json‎
Lines changed: 15 additions & 0 deletions b/‎apps/agentstack-server/.vscode/launch.json‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎apps/agentstack-server/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎apps/agentstack-server/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/agentstack-server/src/agentstack_server/api/middleware/proxy_headers.py‎
Lines changed: 131 additions & 0 deletions b/‎apps/agentstack-server/src/agentstack_server/api/middleware/proxy_headers.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎apps/agentstack-server/src/agentstack_server/api/middleware/rate_limit.py‎
Lines changed: 123 additions & 0 deletions b/‎apps/agentstack-server/src/agentstack_server/api/middleware/rate_limit.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎apps/agentstack-server/src/agentstack_server/application.py‎
Lines changed: 13 additions & 5 deletions b/‎apps/agentstack-server/src/agentstack_server/application.py‎
Lines changed: 13 additions & 5 deletions
@@ -23,6 +23,13 @@ jobs:
   integration-test:
     runs-on: ubuntu-latest
     steps:
+      - name: Maximize build space
+        uses: easimon/maximize-build-space@master
+        with:
+          root-reserve-mb: 15360
+          temp-reserve-mb: 2048
+          swap-size-mb: 1024
+          remove-dotnet: 'true'
       - uses: actions/checkout@v4
       - name: "Set up Lima"
         uses: lima-vm/lima-actions/setup@v1
 
@@ -4,6 +4,7 @@
         {
             "name": "agentstack-server",
             "type": "debugpy",
+            "justMyCode": false,
             "request": "launch",
             "module": "uvicorn",
             "args": [
@@ -13,6 +14,20 @@
                 "--timeout-keep-alive=60",
                 "--timeout-graceful-shutdown=2"
             ],
+        },
+        {
+            "name": "Python: Debug Tests",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "purpose": [
+                "debug-test"
+            ],
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "presentation": {
+                "hidden": true, // keep original launch order in 'run and debug' tab
+            }
         }
     ]
 }
@@ -47,6 +47,7 @@ dependencies = [
     "mcp>=1.13.1",
     "opentelemetry-instrumentation-httpx>=0.59b0",
     "opentelemetry-instrumentation-fastapi>=0.59b0",
+    "limits[async-redis]>=5.3.0",
 ]
 
 [dependency-groups]
 
@@ -0,0 +1,131 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import ipaddress
+
+from starlette.types import ASGIApp, Receive, Scope, Send
+
+
+class ProxyHeadersMiddleware:
+    """
+    Modified https://github.com/Kludex/uvicorn/blob/main/uvicorn/middleware/proxy_headers.py
+    Removed "for"
+    Added "host" support
+    """
+
+    def __init__(self, app: ASGIApp, trusted_hosts: list[str] | str = "127.0.0.1") -> None:
+        self.app = app
+        self.trusted_hosts = _TrustedHosts(trusted_hosts)
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] == "lifespan":
+            return await self.app(scope, receive, send)
+
+        client_addr = scope.get("client")
+        client_host = client_addr[0] if client_addr else None
+
+        if client_host in self.trusted_hosts:
+            headers = dict(scope["headers"])
+
+            proto = None
+            if b"x-forwarded-proto" in headers:
+                proto = headers[b"x-forwarded-proto"].decode("latin1").strip()
+
+            host = None
+            if b"x-forwarded-host" in headers:
+                host = headers[b"x-forwarded-host"].decode("latin1").strip()
+
+            # X-Forwarded-For: client, proxy1, proxy2
+            client_ip = None
+            if b"x-forwarded-for" in headers:
+                client_ip = headers[b"x-forwarded-for"].decode("latin1").split(",")[0].strip()
+
+            if b"forwarded" in headers:
+                for forwarded in headers[b"forwarded"].decode("latin1").split(","):
+                    directives = dict([(val.strip() for val in seg.split("=")) for seg in forwarded.split(";")])
+                    if "proto" in directives or "host" in directives or "for" in directives:
+                        proto = directives.get("proto")
+                        host = directives.get("host")
+                        if "for" in directives:
+                            client_ip = directives.get("for", "").strip('"[]') or None
+                        break
+
+            if proto in {"http", "https", "ws", "wss"}:
+                if scope["type"] == "websocket":
+                    scope["scheme"] = proto.replace("http", "ws")
+                else:
+                    scope["scheme"] = proto
+
+            if host:
+                scope["headers"] = [
+                    (key, value) if key != b"host" else (b"host", host.encode()) for key, value in scope["headers"]
+                ]
+                scope["server"] = (host, None)
+
+            if client_ip:
+                scope["client"] = (client_ip, 0)
+
+        return await self.app(scope, receive, send)
+
+
+def _parse_raw_hosts(value: str) -> list[str]:
+    return [item.strip() for item in value.split(",")]
+
+
+class _TrustedHosts:
+    """Container for trusted hosts and networks"""
+
+    def __init__(self, trusted_hosts: list[str] | str) -> None:
+        self.always_trust: bool = trusted_hosts in ("*", ["*"])
+
+        self.trusted_literals: set[str] = set()
+        self.trusted_hosts: set[ipaddress.IPv4Address | ipaddress.IPv6Address] = set()
+        self.trusted_networks: set[ipaddress.IPv4Network | ipaddress.IPv6Network] = set()
+
+        # Notes:
+        # - We separate hosts from literals as there are many ways to write
+        #   an IPv6 Address so we need to compare by object.
+        # - We don't convert IP Address to single host networks (e.g. /32 / 128) as
+        #   it more efficient to do an address lookup in a set than check for
+        #   membership in each network.
+        # - We still allow literals as it might be possible that we receive a
+        #   something that isn't an IP Address e.g. a unix socket.
+
+        if not self.always_trust:
+            if isinstance(trusted_hosts, str):
+                trusted_hosts = _parse_raw_hosts(trusted_hosts)
+
+            for host in trusted_hosts:
+                # Note: because we always convert invalid IP types to literals it
+                # is not possible for the user to know they provided a malformed IP
+                # type - this may lead to unexpected / difficult to debug behaviour.
+
+                if "/" in host:
+                    # Looks like a network
+                    try:
+                        self.trusted_networks.add(ipaddress.ip_network(host))
+                    except ValueError:
+                        # Was not a valid IP Network
+                        self.trusted_literals.add(host)
+                else:
+                    try:
+                        self.trusted_hosts.add(ipaddress.ip_address(host))
+                    except ValueError:
+                        # Was not a valid IP Address
+                        self.trusted_literals.add(host)
+
+    def __contains__(self, host: str | None) -> bool:
+        if self.always_trust:
+            return True
+
+        if not host:
+            return False
+
+        try:
+            ip = ipaddress.ip_address(host)
+            if ip in self.trusted_hosts:
+                return True
+            return any(ip in net for net in self.trusted_networks)
+
+        except ValueError:
+            return host in self.trusted_literals
@@ -0,0 +1,123 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import hashlib
+import logging
+import time
+from typing import Final, override
+
+from fastapi import Request, Response, status
+from fastapi.responses import JSONResponse
+from limits import RateLimitItem
+from limits.aio.storage import Storage
+from limits.aio.strategies import STRATEGIES, RateLimiter
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+from starlette.types import ASGIApp
+
+from agentstack_server.configuration import RateLimitConfiguration
+
+logger = logging.getLogger(__name__)
+
+
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    """
+    Rate limiting middleware that uses the limits library.
+
+    Supports both Redis and in-memory storage backends.
+    Rate limit keys are generated based on authentication type:
+    - Bearer tokens (OAuth/JWT): hashes the token
+    - Basic auth: hashes the credentials
+    - No auth: uses client IP address
+    """
+
+    def __init__(
+        self,
+        app: ASGIApp,
+        limiter_storage: Storage,
+        configuration: RateLimitConfiguration,
+    ):
+        super().__init__(app)
+        self.enabled: Final[bool] = configuration.enabled
+        self.limits: Final[list[RateLimitItem]] = sorted(configuration.limits_parsed)
+        self.limiter: Final[RateLimiter] = STRATEGIES[configuration.strategy](limiter_storage)
+
+        logger.info(
+            "Rate limiting initialized\n:"
+            + f"  Storage class: {type(limiter_storage).__name__}\n"
+            + f"  Strategy class: {type(self.limiter).__name__}\n"
+            + f"  Limits: {[str(limit) for limit in self.limits]}"
+        )
+
+    def _hash_secret(self, secret: str) -> str:
+        return hashlib.sha256(secret.encode()).hexdigest()
+
+    def _extract_auth_key(self, request: Request) -> str:
+        """
+        Extract authentication key from request for rate limiting.
+
+        Priority:
+        1. Bearer token (OAuth/JWT or internal JWT)
+        2. Basic auth credentials (hashed)
+        3. Client IP address
+        """
+        # Check for Bearer token
+        auth_header = request.headers.get("authorization", "")
+        if auth_header.startswith("Bearer "):
+            token = auth_header[7:]  # Remove "Bearer " prefix
+            return f"bearer:{self._hash_secret(token)}"
+
+        # Check for Basic auth
+        if auth_header.startswith("Basic "):
+            credentials = auth_header[6:]  # Remove "Basic " prefix
+            return f"basic:{self._hash_secret(credentials)}"
+
+        # Fallback to client IP
+        client_host = request.client.host if request.client else "unknown"
+        return f"ip:{client_host}"
+
+    @override
+    async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
+        """Process request with rate limiting."""
+        if not self.enabled or not self.limits or request.url.path == "/healthcheck":
+            return await call_next(request)
+
+        # Generate rate limit key
+        rate_limit_key = self._extract_auth_key(request)
+
+        response: Response
+
+        # Check all configured limits
+        header_limit = self.limits[0]  # return the first limit which should be the shortest time period
+
+        for limit in self.limits:
+            if not await self.limiter.hit(limit, rate_limit_key):
+                logger.warning(
+                    f"Rate limit exceeded for key '{rate_limit_key[:20]}...' "
+                    + f"on {request.method} {request.url.path} (limit: {limit})"
+                )
+
+                header_limit = limit
+                response = JSONResponse(
+                    status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                    content={"error": "rate_limit_exceeded", "detail": f"Rate limit exceeded: {limit}"},
+                )
+                break
+        else:
+            response = await call_next(request)
+
+        reset_time, remaining = await self.limiter.get_window_stats(header_limit, rate_limit_key)
+
+        if existing_retry_after_header := response.headers.get("Retry-After"):
+            try:
+                retry_after = int(existing_retry_after_header)
+                retry_after_timestamp = time.time() + retry_after
+                reset_time = max(reset_time, retry_after_timestamp)
+            except ValueError:
+                logger.warning(f"Invalid Retry-After header value: {existing_retry_after_header}")
+
+        response.headers["X-RateLimit-Limit"] = str(header_limit.amount)
+        response.headers["X-RateLimit-Remaining"] = str(remaining)
+        response.headers["X-RateLimit-Reset"] = str(reset_time)
+        response.headers["Retry-After"] = str(int(reset_time - time.time()))
+
+        return response
@@ -4,7 +4,7 @@
 import logging
 import time
 from collections.abc import Iterable
-from contextlib import asynccontextmanager, suppress
+from contextlib import asynccontextmanager, nullcontext, suppress
 from importlib.metadata import PackageNotFoundError, version
 
 import procrastinate
@@ -14,10 +14,13 @@
 from fastapi.openapi.utils import get_openapi
 from fastapi.responses import JSONResponse, ORJSONResponse
 from kink import Container, di, inject
+from limits.aio.storage import Storage
 from opentelemetry.metrics import CallbackOptions, Observation, get_meter
 from procrastinate.exceptions import AlreadyEnqueued
 from starlette.status import HTTP_401_UNAUTHORIZED, HTTP_500_INTERNAL_SERVER_ERROR
 
+from agentstack_server.api.middleware.proxy_headers import ProxyHeadersMiddleware
+from agentstack_server.api.middleware.rate_limit import RateLimitMiddleware
 from agentstack_server.api.routes.a2a import router as a2a_router
 from agentstack_server.api.routes.auth import well_known_router as auth_well_known_router
 from agentstack_server.api.routes.configurations import router as configuration_router
@@ -44,7 +47,6 @@
 from agentstack_server.run_workers import run_workers
 from agentstack_server.service_layer.services.mcp import McpService
 from agentstack_server.telemetry import INSTRUMENTATION_NAME, shutdown_telemetry
-from agentstack_server.utils.fastapi import ProxyHeadersMiddleware
 
 logger = logging.getLogger(__name__)
 
@@ -177,7 +179,7 @@ def scrape_platform_status(options: CallbackOptions) -> Iterable[Observation]:
     # meter.create_observable_gauge("providers_by_status", callbacks=[scrape_providers_by_status])
 
 
-def app(*, dependency_overrides: Container | None = None) -> FastAPI:
+def app(*, dependency_overrides: Container | None = None, enable_workers: bool = True) -> FastAPI:
     """Entrypoint for API application, called by Uvicorn"""
 
     logger.info("Bootstrapping dependencies...")
@@ -189,7 +191,11 @@ def app(*, dependency_overrides: Container | None = None) -> FastAPI:
     async def lifespan(_app: FastAPI, procrastinate_app: procrastinate.App, mcp_service: McpService):
         try:
             register_telemetry()
-            async with procrastinate_app.open_async(), run_workers(app=procrastinate_app), mcp_service:
+            async with (
+                procrastinate_app.open_async(),
+                run_workers(app=procrastinate_app) if enable_workers else nullcontext(),
+                mcp_service,
+            ):
                 with suppress(AlreadyEnqueued):
                     # Force initial sync of the registry immediately
                     await check_registry.defer_async(timestamp=int(time.time()))
@@ -212,7 +218,9 @@ async def lifespan(_app: FastAPI, procrastinate_app: procrastinate.App, mcp_serv
     logger.info("Mounting routes...")
     mount_routes(app)
 
+    # Execution order is important here: https://fastapi.tiangolo.com/tutorial/middleware/#multiple-middleware-execution-order
+    app.add_middleware(RateLimitMiddleware, limiter_storage=di[Storage], configuration=configuration.rate_limit)
     app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*" if configuration.trust_proxy_headers else "")
-    register_global_exception_handlers(app)
 
+    register_global_exception_handlers(app)
     return app
Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ dependencies = [`
`47`	`47`	`"mcp>=1.13.1",`
`48`	`48`	`"opentelemetry-instrumentation-httpx>=0.59b0",`
`49`	`49`	`"opentelemetry-instrumentation-fastapi>=0.59b0",`
	`50`	`+ "limits[async-redis]>=5.3.0",`
`50`	`51`	`]`
`51`	`52`
`52`	`53`	`[dependency-groups]`