agent-governance-toolkit/packages/agent-os/src/agent_os/integrations/rate_limiter.py at main · microsoft/agent-governance-toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tool-call rate limiting tied to governance policy.

This module enforces token-bucket limits for tool invocations, optionally scoped
per agent and governed by ``GovernancePolicy.max_tool_calls``.

See also:
    - hypervisor.security.rate_limiter: runtime-layer per-agent/per-ring limits.
    - agentmesh.services.rate_limiter: service/proxy-level limits in Agent Mesh.
    - agentmesh.services.rate_limit_middleware: HTTP edge middleware in Agent Mesh.
    - agent_os.policies.rate_limiting: shared token-bucket primitives.
"""

import threading
import time
from dataclasses import dataclass
from typing import Optional

from .base import GovernancePolicy


@dataclass(frozen=True)
class RateLimitStatus:
    """Snapshot of an agent's rate-limit state."""
    allowed: bool
    remaining_calls: int
    reset_at: float
    wait_seconds: float


class RateLimiter:
    """Thread-safe token-bucket rate limiter for tool calls.

    Args:
        max_calls: Maximum number of calls allowed per time window (bucket size).
        time_window: Duration of the time window in seconds.
        per_agent: If ``True``, limits are tracked independently per agent.
            If ``False``, a single global bucket is used for all agents.
        policy: Optional GovernancePolicy whose ``max_tool_calls`` overrides
            *max_calls*.
    """

    _GLOBAL_KEY = "__global__"

    def __init__(
        self,
        max_calls: int = 10,
        time_window: float = 60.0,
        per_agent: bool = True,
        policy: Optional[GovernancePolicy] = None,
    ) -> None:
        if max_calls <= 0:
            raise ValueError("max_calls must be positive")
        if time_window <= 0:
            raise ValueError("time_window must be positive")

        self._max_calls = policy.max_tool_calls if policy is not None else max_calls
        self._time_window = float(time_window)
        self._per_agent = per_agent
        self._lock = threading.Lock()
        # Each bucket: (tokens: float, last_refill: float)
        self._buckets: dict[str, list] = {}

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------

    def _key(self, agent_id: str) -> str:
        return agent_id if self._per_agent else self._GLOBAL_KEY

    def _refill(self, bucket: list, now: float) -> None:
        """Add tokens accrued since the last refill."""
        elapsed = now - bucket[1]
        if elapsed > 0:
            rate = self._max_calls / self._time_window
            bucket[0] = min(self._max_calls, bucket[0] + elapsed * rate)
            bucket[1] = now

    def _get_bucket(self, key: str, now: float) -> list:
        bucket = self._buckets.get(key)
        if bucket is None:
            bucket = [float(self._max_calls), now]
            self._buckets[key] = bucket
        else:
            self._refill(bucket, now)
        return bucket

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def allow(self, agent_id: str) -> bool:
        """Try to consume one token. Returns ``True`` if the call is allowed."""
        now = time.monotonic()
        with self._lock:
            bucket = self._get_bucket(self._key(agent_id), now)
            if bucket[0] >= 1.0:
                bucket[0] -= 1.0
                return True
            return False

    def check(self, agent_id: str) -> RateLimitStatus:
        """Return current rate-limit status without consuming a token."""
        now = time.monotonic()
        with self._lock:
            bucket = self._get_bucket(self._key(agent_id), now)
            remaining = int(bucket[0])
            allowed = remaining >= 1
            if allowed:
                wait = 0.0
            else:
                rate = self._max_calls / self._time_window
                wait = (1.0 - bucket[0]) / rate if rate > 0 else 0.0
            reset_at = now + self._time_window
        return RateLimitStatus(
            allowed=allowed,
            remaining_calls=remaining,
            reset_at=reset_at,
            wait_seconds=wait,
        )

    def wait_time(self, agent_id: str) -> float:
        """Return seconds until at least one token is available (0.0 if available now)."""
        return self.check(agent_id).wait_seconds

    def reset(self, agent_id: str) -> None:
        """Reset the bucket for *agent_id* (or the global bucket if ``per_agent=False``)."""
        with self._lock:
            self._buckets.pop(self._key(agent_id), None)