Skip to content

Commit 3c5131e

Browse files
authored
feat(server): make agent env and timeout configurable in helm chart (#1281)
Signed-off-by: Radek Ježek <radek.jezek@ibm.com>
1 parent ae697e8 commit 3c5131e

File tree

12 files changed

+139
-42
lines changed

12 files changed

+139
-42
lines changed

apps/beeai-sdk/src/beeai_sdk/platform/provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class EnvVar(pydantic.BaseModel):
2727

2828
class Provider(pydantic.BaseModel):
2929
id: str
30-
auto_stop_timeout: timedelta
30+
auto_stop_timeout: timedelta | None = None
3131
source: str
3232
registry: str | None = None
3333
auto_remove: bool = False

apps/beeai-server/src/beeai_server/api/routes/providers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
22
# SPDX-License-Identifier: Apache-2.0
3+
from datetime import timedelta
34
from typing import Annotated
45
from uuid import UUID
56

@@ -37,6 +38,7 @@ async def create_provider(
3738
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Auto remove functionality is disabled")
3839
return await provider_service.create_provider(
3940
user=user.user,
41+
auto_stop_timeout=timedelta(seconds=request.auto_stop_timeout_sec),
4042
location=request.location,
4143
agent_card=request.agent_card,
4244
auto_remove=auto_remove,
Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
22
# SPDX-License-Identifier: Apache-2.0
33

4+
from datetime import timedelta
5+
46
from a2a.types import AgentCard
5-
from pydantic import BaseModel
7+
from pydantic import BaseModel, Field
68

79
from beeai_server.domain.models.provider import ProviderLocation
810

@@ -11,3 +13,12 @@ class CreateProviderRequest(BaseModel):
1113
location: ProviderLocation
1214
agent_card: AgentCard | None = None
1315
variables: dict[str, str] | None = None
16+
auto_stop_timeout_sec: int = Field(
17+
default=int(timedelta(minutes=5).total_seconds()),
18+
gt=0,
19+
le=600,
20+
description=(
21+
"Timeout after which the agent provider will be automatically downscaled if unused."
22+
"Contact administrator if you need to increase this value."
23+
),
24+
)

apps/beeai-server/src/beeai_server/application.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import logging
5+
import time
56
from collections.abc import Iterable
6-
from contextlib import asynccontextmanager
7+
from contextlib import asynccontextmanager, suppress
78

89
import procrastinate
910
from fastapi import APIRouter, FastAPI, HTTPException
1011
from fastapi.exception_handlers import http_exception_handler
1112
from fastapi.responses import ORJSONResponse
1213
from kink import Container, di, inject
1314
from opentelemetry.metrics import CallbackOptions, Observation, get_meter
15+
from procrastinate.exceptions import AlreadyEnqueued
1416
from starlette.requests import Request
1517
from starlette.status import HTTP_401_UNAUTHORIZED, HTTP_500_INTERNAL_SERVER_ERROR
1618

@@ -34,6 +36,7 @@
3436
ManifestLoadError,
3537
PlatformError,
3638
)
39+
from beeai_server.jobs.crons.provider import check_registry
3740
from beeai_server.run_workers import run_workers
3841
from beeai_server.service_layer.services.mcp import McpService
3942
from beeai_server.telemetry import INSTRUMENTATION_NAME, shutdown_telemetry
@@ -149,6 +152,9 @@ async def lifespan(_app: FastAPI, procrastinate_app: procrastinate.App, mcp_serv
149152
try:
150153
register_telemetry()
151154
async with procrastinate_app.open_async(), run_workers(app=procrastinate_app), mcp_service:
155+
with suppress(AlreadyEnqueued):
156+
# Force initial sync of the registry immediately
157+
await check_registry.defer_async(timestamp=int(time.time()))
152158
try:
153159
yield
154160
finally:

apps/beeai-server/src/beeai_server/domain/models/registry.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
22
# SPDX-License-Identifier: Apache-2.0
3-
3+
from datetime import timedelta
44
from typing import TYPE_CHECKING, Any
55

66
import httpx
77
import yaml
88
from anyio import Path
9-
from pydantic import BaseModel, FileUrl, HttpUrl, RootModel
9+
from pydantic import BaseModel, Field, FileUrl, HttpUrl, RootModel, computed_field, field_validator
1010

1111
from beeai_server.utils.github import GithubUrl
1212

@@ -16,22 +16,42 @@
1616
from beeai_server.domain.models.provider import ProviderLocation
1717

1818

19-
def parse_providers_manifest(content: dict[str, Any]) -> list["ProviderLocation"]:
20-
from beeai_server.domain.models.provider import ProviderLocation
19+
class ProviderRegistryRecord(BaseModel, extra="allow"):
20+
location: "ProviderLocation"
21+
auto_stop_timeout_sec: int | None = Field(default=int(timedelta(minutes=5).total_seconds()), ge=0)
22+
variables: dict[str, str] = {}
23+
24+
@computed_field
25+
@property
26+
def auto_stop_timeout(self) -> timedelta | None:
27+
return timedelta(seconds=self.auto_stop_timeout_sec) if self.auto_stop_timeout_sec else None
28+
29+
@field_validator("variables", mode="before")
30+
@classmethod
31+
def convert_variables_to_str(cls, v: Any | None):
32+
if v is None:
33+
return {}
34+
if not isinstance(v, dict):
35+
raise ValueError("env must be a dictionary")
36+
return {str(k): str(v) for k, v in v.items()}
2137

22-
class ProviderRegistryRecord(BaseModel, extra="allow"):
23-
location: ProviderLocation
2438

25-
class RegistryManifest(BaseModel):
26-
providers: list[ProviderRegistryRecord]
39+
class RegistryManifest(BaseModel):
40+
providers: list[ProviderRegistryRecord]
41+
42+
43+
def parse_providers_manifest(content: dict[str, Any]) -> list[ProviderRegistryRecord]:
44+
from beeai_server.domain.models.provider import ProviderLocation
45+
46+
_ = ProviderLocation # make sure this is imported
2747

28-
return [p.location for p in RegistryManifest.model_validate(content).providers]
48+
return RegistryManifest.model_validate(content).providers
2949

3050

3151
class NetworkRegistryLocation(RootModel):
3252
root: HttpUrl
3353

34-
async def load(self) -> list["ProviderLocation"]:
54+
async def load(self) -> list[ProviderRegistryRecord]:
3555
async with httpx.AsyncClient(
3656
headers={"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache", "Expires": "0"}
3757
) as client:
@@ -42,7 +62,7 @@ async def load(self) -> list["ProviderLocation"]:
4262
class GithubRegistryLocation(RootModel):
4363
root: GithubUrl
4464

45-
async def load(self) -> list["ProviderLocation"]:
65+
async def load(self) -> list[ProviderRegistryRecord]:
4666
resolved_url = await self.root.resolve_version()
4767
url = await resolved_url.get_raw_url()
4868
network_location = NetworkRegistryLocation(root=HttpUrl(url))
@@ -52,7 +72,7 @@ async def load(self) -> list["ProviderLocation"]:
5272
class FileSystemRegistryLocation(RootModel):
5373
root: FileUrl
5474

55-
async def load(self) -> list["ProviderLocation"]:
75+
async def load(self) -> list[ProviderRegistryRecord]:
5676
content = await Path(self.root.path).read_text()
5777
return parse_providers_manifest(yaml.safe_load(content))
5878

apps/beeai-server/src/beeai_server/domain/repositories/env.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@ async def update(
3737
parent_entity_id: UUID,
3838
variables: dict[str, str | None] | dict[str, str],
3939
) -> None: ...
40+
41+
async def delete(self, parent_entity: EnvStoreEntity, parent_entity_id: UUID) -> None: ...

apps/beeai-server/src/beeai_server/infrastructure/persistence/repositories/env.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ async def update(
139139
)
140140
)
141141

142+
async def delete(self, parent_entity: EnvStoreEntity, parent_entity_id: UUID):
143+
query = variables_table.delete().where(self._parent_filter(parent_entity, parent_entity_id))
144+
await self.connection.execute(query)
145+
142146
async def get(
143147
self,
144148
*,

apps/beeai-server/src/beeai_server/jobs/crons/provider.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
from contextlib import suppress
66
from datetime import timedelta
7+
from uuid import UUID
78

89
import anyio
910
import httpx
@@ -15,6 +16,7 @@
1516
from beeai_server import get_configuration
1617
from beeai_server.configuration import Configuration
1718
from beeai_server.domain.models.provider import ProviderLocation
19+
from beeai_server.domain.models.registry import ProviderRegistryRecord, RegistryLocation
1820
from beeai_server.exceptions import EntityNotFoundError
1921
from beeai_server.jobs.queues import Queues
2022
from beeai_server.service_layer.services.providers import ProviderService
@@ -49,8 +51,8 @@ async def check_registry(
4951

5052
user = await user_service.get_user_by_email("admin@beeai.dev")
5153

52-
registry_by_provider_id = {}
53-
desired_providers = {}
54+
registry_by_provider_id: dict[UUID, RegistryLocation] = {}
55+
desired_providers: dict[UUID, ProviderRegistryRecord] = {}
5456
errors = []
5557

5658
try:
@@ -59,10 +61,10 @@ async def check_registry(
5961
errors.extend(ex.exceptions if isinstance(ex, ExceptionGroup) else [ex])
6062

6163
for registry in configuration.agent_registry.locations.values():
62-
for provider_location in await registry.load():
64+
for provider_record in await registry.load():
6365
try:
64-
provider_id = RootModel[ProviderLocation](root=provider_location).root.provider_id
65-
desired_providers[provider_id] = provider_location
66+
provider_id = RootModel[ProviderLocation](root=provider_record.location).root.provider_id
67+
desired_providers[provider_id] = provider_record
6668
registry_by_provider_id[provider_id] = registry
6769
except ValueError as e:
6870
errors.append(e)
@@ -79,31 +81,38 @@ async def check_registry(
7981
for provider_id in old_providers:
8082
provider = managed_providers[provider_id]
8183
try:
82-
await provider_service.delete_provider(provider_id=provider.id)
84+
await provider_service.delete_provider(provider_id=provider.id, user=user)
8385
logger.info(f"Removed provider {provider.source}")
8486
except Exception as ex:
8587
errors.append(RuntimeError(f"[{provider.source}]: Failed to remove provider: {ex}"))
8688

8789
for provider_id in new_providers:
88-
provider_location = desired_providers[provider_id]
90+
provider_record = desired_providers[provider_id]
8991
try:
9092
await provider_service.create_provider(
9193
user=user,
92-
location=provider_location,
94+
location=provider_record.location,
9395
registry=registry_by_provider_id[provider_id],
96+
auto_stop_timeout=provider_record.auto_stop_timeout,
97+
variables=provider_record.variables,
9498
)
95-
logger.info(f"Added provider {provider_location}")
99+
logger.info(f"Added provider {provider_record}")
96100
except Exception as ex:
97-
errors.append(RuntimeError(f"[{provider_location}]: Failed to add provider: {ex}"))
101+
errors.append(RuntimeError(f"[{provider_record}]: Failed to add provider: {ex}"))
98102

99103
for provider_id in existing_providers:
100-
provider_location = desired_providers[provider_id]
104+
provider_record = desired_providers[provider_id]
101105
try:
102-
result = await provider_service.upgrade_provider(provider_id=provider_id, location=provider_location)
106+
result = await provider_service.upgrade_provider(
107+
provider_id=provider_id,
108+
location=provider_record.location,
109+
auto_stop_timeout=provider_record.auto_stop_timeout,
110+
env=provider_record.variables,
111+
)
103112
if managed_providers[provider_id].source.root != result.source.root:
104-
logger.info(f"Upgraded provider {provider_location}")
113+
logger.info(f"Upgraded provider {provider_record}")
105114
except Exception as ex:
106-
errors.append(RuntimeError(f"[{provider_location}]: Failed to add provider: {ex}"))
115+
errors.append(RuntimeError(f"[{provider_record}]: Failed to add provider: {ex}"))
107116

108117
if errors:
109118
raise ExceptionGroup("Exceptions occurred when reloading providers", errors)

apps/beeai-server/src/beeai_server/service_layer/services/providers.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import logging
99
import uuid
1010
from collections.abc import AsyncIterator, Callable
11+
from datetime import timedelta
1112
from uuid import UUID
1213

1314
from a2a.types import AgentCard
@@ -46,6 +47,7 @@ async def create_provider(
4647
*,
4748
user: User,
4849
location: ProviderLocation,
50+
auto_stop_timeout: timedelta | None,
4951
registry: RegistryLocation | None = None,
5052
auto_remove: bool = False,
5153
agent_card: AgentCard | None = None,
@@ -55,7 +57,12 @@ async def create_provider(
5557
if not agent_card:
5658
agent_card = await location.load_agent_card()
5759
provider = Provider(
58-
source=location, registry=registry, auto_remove=auto_remove, agent_card=agent_card, created_by=user.id
60+
source=location,
61+
registry=registry,
62+
auto_remove=auto_remove,
63+
agent_card=agent_card,
64+
created_by=user.id,
65+
auto_stop_timeout=auto_stop_timeout,
5966
)
6067
except ValueError as ex:
6168
raise ManifestLoadError(location=location, message=str(ex), status_code=HTTP_400_BAD_REQUEST) from ex
@@ -73,13 +80,33 @@ async def create_provider(
7380
return provider_response
7481

7582
async def upgrade_provider(
76-
self, *, provider_id: UUID, location: ProviderLocation, force: bool = False
83+
self,
84+
*,
85+
provider_id: UUID,
86+
location: ProviderLocation,
87+
force: bool = False,
88+
auto_stop_timeout: timedelta | None,
89+
env: dict[str, str] | None = None,
7790
) -> ProviderWithState:
91+
env = env or {}
7892
async with self._uow() as uow:
7993
provider = await uow.providers.get(provider_id=provider_id)
94+
old_env = (
95+
await uow.env.get_all(
96+
parent_entity=EnvStoreEntity.PROVIDER,
97+
parent_entity_ids=[provider.id],
98+
)
99+
)[provider.id]
80100

81-
if provider.source.root == location.root and not force:
82-
return (await self._get_providers_with_state([provider]))[0]
101+
should_update = (
102+
provider.source.root != location.root
103+
or provider.auto_stop_timeout != auto_stop_timeout
104+
or env != old_env
105+
or force
106+
)
107+
108+
if not should_update:
109+
return (await self._get_providers_with_state(providers=[provider]))[0]
83110

84111
try:
85112
agent_card = await location.load_agent_card()
@@ -90,12 +117,16 @@ async def upgrade_provider(
90117

91118
provider.source = location
92119
provider.agent_card = agent_card
120+
provider.auto_stop_timeout = auto_stop_timeout
93121

94122
async with self._uow() as uow:
95123
await uow.providers.update(provider=provider)
96-
env = await uow.env.get_all(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_ids=[provider.id])
124+
125+
if old_env != env:
126+
await uow.env.delete(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider.id)
127+
await uow.env.update(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider.id, variables=env)
97128
# Rotate the provider (inside the transaction)
98-
await self._rotate_provider(provider=provider, env=env[provider.id])
129+
await self._rotate_provider(provider=provider, env=env)
99130
await uow.commit()
100131
[provider_response] = await self._get_providers_with_state(providers=[provider])
101132
return provider_response
@@ -216,7 +247,9 @@ async def _rotate_provider(self, provider: Provider, env: dict[str, str]):
216247
):
217248
await self._deployment_manager.create_or_replace(provider=provider, env=env)
218249

219-
async def update_provider_env(self, *, provider_id: UUID, env: dict[str, str | None], user: User) -> None:
250+
async def update_provider_env(
251+
self, *, provider_id: UUID, env: dict[str, str | None] | dict[str, str], user: User
252+
) -> None:
220253
user_id = user.id if user.role != UserRole.ADMIN else None
221254
provider = None
222255
try:

helm/templates/config/providers.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
{{- $root := . }}
22
apiVersion: v1
3-
kind: ConfigMap
3+
kind: Secret
44
metadata:
5-
name: beeai-platform-providers-cm
6-
data:
5+
name: beeai-platform-providers-secret
6+
type: Opaque
7+
stringData:
78
registry.yaml: |
89
{{- if or .Values.providers .Values.unmanagedProviders }}
910
providers:
1011
{{- range $idx, $p := $root.Values.unmanagedProviders }}
1112
- location: "http://{{ include "agent.fullname" (dict "root" $root "image" $p.location) }}:{{ $root.Values.agent.service.port }}"
1213
{{- end }}
1314
{{- range $idx, $p := $root.Values.providers }}
14-
- location: {{ $p.location }}
15+
- {{ $p | toYaml | nindent 8 | trim }}
1516
{{- end }}
1617
{{- else }}
1718
providers: [ ]

0 commit comments

Comments
 (0)