Skip to content

Commit d1c1420

Browse files
authored
feat(server): add per-role rate limits to openai proxy (#1698)
Signed-off-by: Radek Ježek <radek.jezek@ibm.com>
1 parent 3aa7c09 commit d1c1420

File tree

36 files changed

+1481
-589
lines changed

36 files changed

+1481
-589
lines changed

apps/agentstack-cli/src/agentstack_cli/commands/build.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,10 @@ async def _server_side_build(
152152
verbose: bool = False,
153153
) -> ProviderBuild:
154154
build = None
155-
try:
156-
from agentstack_cli.commands.agent import select_provider
157-
from agentstack_cli.configuration import Configuration
155+
from agentstack_cli.commands.agent import select_provider
156+
from agentstack_cli.configuration import Configuration
158157

158+
try:
159159
if replace and add:
160160
raise ValueError("Cannot specify both replace and add options.")
161161

@@ -181,8 +181,9 @@ async def _server_side_build(
181181
print_log(message, ansi_mode=True, out_console=err_console)
182182
return await build.get()
183183
except (KeyboardInterrupt, CancelledError):
184-
if build:
185-
await build.delete()
184+
async with Configuration().use_platform_client():
185+
if build:
186+
await build.delete()
186187
console.error("Build aborted.")
187188
raise
188189

apps/agentstack-cli/src/agentstack_cli/commands/model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,8 @@ async def _select_default_model(capability: ModelCapability) -> str | None:
354354
if capability == ModelCapability.LLM:
355355
test_response = await client.chat.completions.create(
356356
model=selected_model,
357-
max_completion_tokens=500, # reasoning models need some tokens to think about this
357+
# reasoning models need some tokens to think about this
358+
max_completion_tokens=500 if not selected_model.startswith("mistral") else None,
358359
messages=[
359360
{
360361
"role": "system",

apps/agentstack-sdk-py/src/agentstack_sdk/util/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ async def parse_stream(response: httpx.Response) -> AsyncIterator[dict[str, Any]
2828
raise HTTPStatusError(message=error, request=response.request, response=response)
2929
async for line in response.aiter_lines():
3030
if line:
31-
yield json.loads(re.sub("^data:", "", line).strip())
31+
data = re.sub("^data:", "", line).strip()
32+
try:
33+
yield json.loads(data)
34+
except json.JSONDecodeError:
35+
yield {"event": data}
3236

3337

3438
def extract_messages(exc: BaseException) -> list[tuple[str, str]]:

apps/agentstack-server/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,5 @@ lint.ignore = [
119119
ignore = ["tests/**"]
120120
venvPath = "."
121121
venv = ".venv"
122+
enableTypeIgnoreComments = true
123+
reportIgnoreCommentWithoutRule="none"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
AGENTSTACK_PROXY_VERSION = 1

apps/agentstack-server/src/agentstack_server/api/dependencies.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import logging
5-
from typing import Annotated
5+
from typing import Annotated, Final
66
from uuid import UUID
77

88
from fastapi import Depends, HTTPException, Path, Query, Request, status
99
from fastapi.security import HTTPAuthorizationCredentials, HTTPBasic, HTTPBasicCredentials, HTTPBearer
1010
from kink import di
11+
from limits.aio.storage import Storage
1112
from pydantic import ConfigDict
1213

1314
from agentstack_server.api.auth.auth import (
@@ -18,6 +19,7 @@
1819
verify_internal_jwt,
1920
)
2021
from agentstack_server.api.auth.utils import create_resource_uri
22+
from agentstack_server.api.rate_limiter import RateLimit, UserRateLimiter
2123
from agentstack_server.configuration import Configuration
2224
from agentstack_server.domain.models.permissions import AuthorizedUser, Permissions
2325
from agentstack_server.domain.models.user import UserRole
@@ -214,3 +216,23 @@ def __call__(self, user: Annotated[AuthorizedUser, Depends(authorized_user)]) ->
214216
if user.global_permissions.check(self):
215217
return user
216218
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient permissions")
219+
220+
221+
def user_rate_limiter(
222+
user: Annotated[AuthorizedUser, Depends(authorized_user)],
223+
configuration: ConfigurationDependency,
224+
storage: Annotated[Storage, Depends(lambda: di[Storage])],
225+
) -> UserRateLimiter:
226+
return UserRateLimiter(user=user.user, configuration=configuration, storage=storage)
227+
228+
229+
UserRateLimiterDependency = Annotated[UserRateLimiter, Depends(user_rate_limiter)]
230+
231+
232+
class ActivatedUserRateLimiterDependency:
233+
def __init__(self, limit: RateLimit) -> None:
234+
self._limit: Final[RateLimit] = limit
235+
236+
async def __call__(self, user_rate_limiter: UserRateLimiterDependency) -> UserRateLimiter:
237+
await user_rate_limiter.hit(self._limit)
238+
return user_rate_limiter

apps/agentstack-server/src/agentstack_server/api/middleware/rate_limit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(
3838
):
3939
super().__init__(app)
4040
self.enabled: Final[bool] = configuration.enabled
41-
self.limits: Final[list[RateLimitItem]] = sorted(configuration.limits_parsed)
41+
self.limits: Final[list[RateLimitItem]] = sorted(configuration.global_limits_parsed)
4242
self.limiter: Final[RateLimiter] = STRATEGIES[configuration.strategy](limiter_storage)
4343

4444
logger.info(
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from contextlib import asynccontextmanager
5+
from enum import StrEnum
6+
from typing import Final, cast
7+
8+
from limits import RateLimitItem
9+
from limits.aio.storage import Storage
10+
from limits.aio.strategies import STRATEGIES, RateLimiter
11+
12+
from agentstack_server.configuration import Configuration, RoleRateLimits
13+
from agentstack_server.domain.models.user import User, UserRole
14+
from agentstack_server.exceptions import RateLimitExceededError
15+
16+
17+
class RateLimit(StrEnum):
18+
assert RoleRateLimits.openai_chat_completion_tokens_parsed.attrname
19+
assert RoleRateLimits.openai_chat_completion_requests_parsed.attrname
20+
assert RoleRateLimits.openai_embedding_inputs_parsed.attrname
21+
22+
OPENAI_CHAT_COMPLETION_TOKENS = RoleRateLimits.openai_chat_completion_tokens_parsed.attrname
23+
OPENAI_CHAT_COMPLETION_REQUESTS = RoleRateLimits.openai_chat_completion_requests_parsed.attrname
24+
OPENAI_EMBEDDING_ITEMS = RoleRateLimits.openai_embedding_inputs_parsed.attrname
25+
26+
27+
class UserRateLimiter:
28+
def __init__(self, user: User, configuration: Configuration, storage: Storage):
29+
self._enabled: bool = configuration.rate_limit.enabled
30+
self._user: Final[User] = user
31+
self._limiter: Final[RateLimiter] = STRATEGIES[configuration.rate_limit.strategy](storage=storage)
32+
self._role_limits: Final[dict[UserRole, RoleRateLimits]] = {
33+
UserRole.USER: configuration.rate_limit.role_based_limits.user,
34+
UserRole.DEVELOPER: configuration.rate_limit.role_based_limits.developer,
35+
UserRole.ADMIN: configuration.rate_limit.role_based_limits.admin,
36+
}
37+
self._key: Final[str] = str(user.id)
38+
39+
def _get_limits(self, limit: RateLimit) -> list[RateLimitItem]:
40+
if not self._enabled:
41+
return []
42+
return cast(list[RateLimitItem], getattr(self._role_limits[self._user.role], RateLimit(limit).value))
43+
44+
async def hit(self, limit: RateLimit, cost: int = 1) -> None:
45+
for configured_limit in self._get_limits(limit):
46+
if not await self._limiter.hit(configured_limit, self._key, cost=cost):
47+
reset_time, remaining = await self._limiter.get_window_stats(configured_limit, self._key)
48+
amount = configured_limit.amount
49+
raise RateLimitExceededError(key=self._key, amount=amount, remaining=remaining, reset_time=reset_time)
50+
51+
async def test(self, limit: RateLimit, cost: int = 1) -> None:
52+
for configured_limit in self._get_limits(limit):
53+
if not await self._limiter.test(configured_limit, self._key, cost=cost):
54+
reset_time, remaining = await self._limiter.get_window_stats(configured_limit, self._key)
55+
amount = configured_limit.amount
56+
raise RateLimitExceededError(key=self._key, amount=amount, remaining=remaining, reset_time=reset_time)
57+
58+
@asynccontextmanager
59+
async def limit(self, limit: RateLimit, cost: int = 1):
60+
if self._enabled:
61+
await self.hit(limit, cost)
62+
yield

0 commit comments

Comments
 (0)