Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions apps/agentstack-cli/src/agentstack_cli/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ async def _server_side_build(
verbose: bool = False,
) -> ProviderBuild:
build = None
try:
from agentstack_cli.commands.agent import select_provider
from agentstack_cli.configuration import Configuration
from agentstack_cli.commands.agent import select_provider
from agentstack_cli.configuration import Configuration

try:
if replace and add:
raise ValueError("Cannot specify both replace and add options.")

Expand All @@ -181,8 +181,9 @@ async def _server_side_build(
print_log(message, ansi_mode=True, out_console=err_console)
return await build.get()
except (KeyboardInterrupt, CancelledError):
if build:
await build.delete()
async with Configuration().use_platform_client():
if build:
await build.delete()
console.error("Build aborted.")
raise

Expand Down
3 changes: 2 additions & 1 deletion apps/agentstack-cli/src/agentstack_cli/commands/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ async def _select_default_model(capability: ModelCapability) -> str | None:
if capability == ModelCapability.LLM:
test_response = await client.chat.completions.create(
model=selected_model,
max_completion_tokens=500, # reasoning models need some tokens to think about this
# reasoning models need some tokens to think about this
max_completion_tokens=500 if not selected_model.startswith("mistral") else None,
messages=[
{
"role": "system",
Expand Down
6 changes: 5 additions & 1 deletion apps/agentstack-sdk-py/src/agentstack_sdk/util/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ async def parse_stream(response: httpx.Response) -> AsyncIterator[dict[str, Any]
raise HTTPStatusError(message=error, request=response.request, response=response)
async for line in response.aiter_lines():
if line:
yield json.loads(re.sub("^data:", "", line).strip())
data = re.sub("^data:", "", line).strip()
try:
yield json.loads(data)
except json.JSONDecodeError:
yield {"event": data}


def extract_messages(exc: BaseException) -> list[tuple[str, str]]:
Expand Down
2 changes: 2 additions & 0 deletions apps/agentstack-server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,5 @@ lint.ignore = [
ignore = ["tests/**"]
venvPath = "."
venv = ".venv"
enableTypeIgnoreComments = true
reportIgnoreCommentWithoutRule="none"
4 changes: 4 additions & 0 deletions apps/agentstack-server/src/agentstack_server/api/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

AGENTSTACK_PROXY_VERSION = 1
24 changes: 23 additions & 1 deletion apps/agentstack-server/src/agentstack_server/api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
# SPDX-License-Identifier: Apache-2.0

import logging
from typing import Annotated
from typing import Annotated, Final
from uuid import UUID

from fastapi import Depends, HTTPException, Path, Query, Request, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBasic, HTTPBasicCredentials, HTTPBearer
from kink import di
from limits.aio.storage import Storage
from pydantic import ConfigDict

from agentstack_server.api.auth.auth import (
Expand All @@ -18,6 +19,7 @@
verify_internal_jwt,
)
from agentstack_server.api.auth.utils import create_resource_uri
from agentstack_server.api.rate_limiter import RateLimit, UserRateLimiter
from agentstack_server.configuration import Configuration
from agentstack_server.domain.models.permissions import AuthorizedUser, Permissions
from agentstack_server.domain.models.user import UserRole
Expand Down Expand Up @@ -214,3 +216,23 @@ def __call__(self, user: Annotated[AuthorizedUser, Depends(authorized_user)]) ->
if user.global_permissions.check(self):
return user
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient permissions")


def user_rate_limiter(
user: Annotated[AuthorizedUser, Depends(authorized_user)],
configuration: ConfigurationDependency,
storage: Annotated[Storage, Depends(lambda: di[Storage])],
) -> UserRateLimiter:
return UserRateLimiter(user=user.user, configuration=configuration, storage=storage)


UserRateLimiterDependency = Annotated[UserRateLimiter, Depends(user_rate_limiter)]


class ActivatedUserRateLimiterDependency:
def __init__(self, limit: RateLimit) -> None:
self._limit: Final[RateLimit] = limit

async def __call__(self, user_rate_limiter: UserRateLimiterDependency) -> UserRateLimiter:
await user_rate_limiter.hit(self._limit)
return user_rate_limiter
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(
):
super().__init__(app)
self.enabled: Final[bool] = configuration.enabled
self.limits: Final[list[RateLimitItem]] = sorted(configuration.limits_parsed)
self.limits: Final[list[RateLimitItem]] = sorted(configuration.global_limits_parsed)
self.limiter: Final[RateLimiter] = STRATEGIES[configuration.strategy](limiter_storage)

logger.info(
Expand Down
62 changes: 62 additions & 0 deletions apps/agentstack-server/src/agentstack_server/api/rate_limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

from contextlib import asynccontextmanager
from enum import StrEnum
from typing import Final, cast

from limits import RateLimitItem
from limits.aio.storage import Storage
from limits.aio.strategies import STRATEGIES, RateLimiter

from agentstack_server.configuration import Configuration, RoleRateLimits
from agentstack_server.domain.models.user import User, UserRole
from agentstack_server.exceptions import RateLimitExceededError


class RateLimit(StrEnum):
assert RoleRateLimits.openai_chat_completion_tokens_parsed.attrname
assert RoleRateLimits.openai_chat_completion_requests_parsed.attrname
assert RoleRateLimits.openai_embedding_inputs_parsed.attrname

OPENAI_CHAT_COMPLETION_TOKENS = RoleRateLimits.openai_chat_completion_tokens_parsed.attrname
OPENAI_CHAT_COMPLETION_REQUESTS = RoleRateLimits.openai_chat_completion_requests_parsed.attrname
OPENAI_EMBEDDING_ITEMS = RoleRateLimits.openai_embedding_inputs_parsed.attrname


class UserRateLimiter:
def __init__(self, user: User, configuration: Configuration, storage: Storage):
self._enabled: bool = configuration.rate_limit.enabled
self._user: Final[User] = user
self._limiter: Final[RateLimiter] = STRATEGIES[configuration.rate_limit.strategy](storage=storage)
self._role_limits: Final[dict[UserRole, RoleRateLimits]] = {
UserRole.USER: configuration.rate_limit.role_based_limits.user,
UserRole.DEVELOPER: configuration.rate_limit.role_based_limits.developer,
UserRole.ADMIN: configuration.rate_limit.role_based_limits.admin,
}
self._key: Final[str] = str(user.id)

def _get_limits(self, limit: RateLimit) -> list[RateLimitItem]:
if not self._enabled:
return []
return cast(list[RateLimitItem], getattr(self._role_limits[self._user.role], RateLimit(limit).value))

async def hit(self, limit: RateLimit, cost: int = 1) -> None:
for configured_limit in self._get_limits(limit):
if not await self._limiter.hit(configured_limit, self._key, cost=cost):
reset_time, remaining = await self._limiter.get_window_stats(configured_limit, self._key)
amount = configured_limit.amount
raise RateLimitExceededError(key=self._key, amount=amount, remaining=remaining, reset_time=reset_time)

async def test(self, limit: RateLimit, cost: int = 1) -> None:
for configured_limit in self._get_limits(limit):
if not await self._limiter.test(configured_limit, self._key, cost=cost):
reset_time, remaining = await self._limiter.get_window_stats(configured_limit, self._key)
amount = configured_limit.amount
raise RateLimitExceededError(key=self._key, amount=amount, remaining=remaining, reset_time=reset_time)

@asynccontextmanager
async def limit(self, limit: RateLimit, cost: int = 1):
if self._enabled:
await self.hit(limit, cost)
yield
Loading