Skip to content

Commit 0cb7469

Browse files
feat: lazy sandbox initialization for faster task creation
2 parents 61e6a64 + c497964 commit 0cb7469

File tree

7 files changed

+196
-100
lines changed

7 files changed

+196
-100
lines changed

backend/agent/api.py

Lines changed: 57 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ async def start_agent(
294294
body: AgentStartRequest = Body(...),
295295
user_id: str = Depends(get_current_user_id_from_jwt)
296296
):
297-
"""Start an agent for a specific thread in the background."""
297+
"""Start an agent for a specific thread in the background"""
298298
structlog.contextvars.bind_contextvars(
299299
thread_id=thread_id,
300300
)
@@ -321,7 +321,9 @@ async def start_agent(
321321
client = await db.client
322322

323323
await verify_thread_access(client, thread_id, user_id)
324+
324325
thread_result = await client.table('threads').select('project_id', 'account_id', 'metadata').eq('thread_id', thread_id).execute()
326+
325327
if not thread_result.data:
326328
raise HTTPException(status_code=404, detail="Thread not found")
327329
thread_data = thread_result.data[0]
@@ -349,7 +351,7 @@ async def start_agent(
349351
logger.info(f"[AGENT LOAD] Agent loading flow:")
350352
logger.info(f" - body.agent_id: {body.agent_id}")
351353
logger.info(f" - effective_agent_id: {effective_agent_id}")
352-
354+
353355
if effective_agent_id:
354356
logger.info(f"[AGENT LOAD] Querying for agent: {effective_agent_id}")
355357
# Get agent
@@ -390,7 +392,7 @@ async def start_agent(
390392
source = "request" if body.agent_id else "fallback"
391393
else:
392394
logger.info(f"[AGENT LOAD] No effective_agent_id, will try default agent")
393-
395+
394396
if not agent_config:
395397
logger.info(f"[AGENT LOAD] No agent config yet, querying for default agent")
396398
default_agent_result = await client.table('agents').select('*').eq('account_id', account_id).eq('is_default', True).execute()
@@ -424,22 +426,25 @@ async def start_agent(
424426
logger.info(f"Using default agent: {agent_config['name']} ({agent_config['agent_id']}) - no version data")
425427
else:
426428
logger.warning(f"[AGENT LOAD] No default agent found for account {account_id}")
427-
429+
428430
logger.info(f"[AGENT LOAD] Final agent_config: {agent_config is not None}")
429431
if agent_config:
430432
logger.info(f"[AGENT LOAD] Agent config keys: {list(agent_config.keys())}")
431433
logger.info(f"Using agent {agent_config['agent_id']} for this agent run (thread remains agent-agnostic)")
432434

433435
can_use, model_message, allowed_models = await can_use_model(client, account_id, model_name)
436+
434437
if not can_use:
435438
raise HTTPException(status_code=403, detail={"message": model_message, "allowed_models": allowed_models})
436439

437440
can_run, message, subscription = await check_billing_status(client, account_id)
441+
438442
if not can_run:
439443
raise HTTPException(status_code=402, detail={"message": message, "subscription": subscription})
440444

441445
# Check agent run limit (maximum parallel runs in past 24 hours)
442446
limit_check = await check_agent_run_limit(client, account_id)
447+
443448
if not limit_check['can_start']:
444449
error_detail = {
445450
"message": f"Maximum of {config.MAX_PARALLEL_AGENT_RUNS} parallel agent runs allowed within 24 hours. You currently have {limit_check['running_count']} running.",
@@ -450,23 +455,6 @@ async def start_agent(
450455
logger.warning(f"Agent run limit exceeded for account {account_id}: {limit_check['running_count']} running agents")
451456
raise HTTPException(status_code=429, detail=error_detail)
452457

453-
try:
454-
project_result = await client.table('projects').select('*').eq('project_id', project_id).execute()
455-
if not project_result.data:
456-
raise HTTPException(status_code=404, detail="Project not found")
457-
458-
project_data = project_result.data[0]
459-
sandbox_info = project_data.get('sandbox', {})
460-
if not sandbox_info.get('id'):
461-
raise HTTPException(status_code=404, detail="No sandbox found for this project")
462-
463-
sandbox_id = sandbox_info['id']
464-
sandbox = await get_or_start_sandbox(sandbox_id)
465-
logger.info(f"Successfully started sandbox {sandbox_id} for project {project_id}")
466-
except Exception as e:
467-
logger.error(f"Failed to start sandbox for project {project_id}: {str(e)}")
468-
raise HTTPException(status_code=500, detail=f"Failed to initialize sandbox: {str(e)}")
469-
470458
agent_run = await client.table('agent_runs').insert({
471459
"thread_id": thread_id, "status": "running",
472460
"started_at": datetime.now(timezone.utc).isoformat(),
@@ -479,6 +467,7 @@ async def start_agent(
479467
"enable_context_manager": body.enable_context_manager
480468
}
481469
}).execute()
470+
482471
agent_run_id = agent_run.data[0]['id']
483472
structlog.contextvars.bind_contextvars(
484473
agent_run_id=agent_run_id,
@@ -1084,47 +1073,56 @@ async def initiate_agent_with_files(
10841073
project_id = project.data[0]['project_id']
10851074
logger.info(f"Created new project: {project_id}")
10861075

1087-
# 2. Create Sandbox
1076+
# 2. Create Sandbox (lazy): only create now if files were uploaded and need the
1077+
# sandbox immediately. Otherwise leave sandbox creation to `_ensure_sandbox()`
1078+
# which will create it lazily when tools require it.
10881079
sandbox_id = None
1089-
try:
1090-
sandbox_pass = str(uuid.uuid4())
1091-
sandbox = await create_sandbox(sandbox_pass, project_id)
1092-
sandbox_id = sandbox.id
1093-
logger.info(f"Created new sandbox {sandbox_id} for project {project_id}")
1094-
1095-
# Get preview links
1096-
vnc_link = await sandbox.get_preview_link(6080)
1097-
website_link = await sandbox.get_preview_link(8080)
1098-
vnc_url = vnc_link.url if hasattr(vnc_link, 'url') else str(vnc_link).split("url='")[1].split("'")[0]
1099-
website_url = website_link.url if hasattr(website_link, 'url') else str(website_link).split("url='")[1].split("'")[0]
1100-
token = None
1101-
if hasattr(vnc_link, 'token'):
1102-
token = vnc_link.token
1103-
elif "token='" in str(vnc_link):
1104-
token = str(vnc_link).split("token='")[1].split("'")[0]
1105-
except Exception as e:
1106-
logger.error(f"Error creating sandbox: {str(e)}")
1107-
await client.table('projects').delete().eq('project_id', project_id).execute()
1108-
if sandbox_id:
1109-
try: await delete_sandbox(sandbox_id)
1110-
except Exception as e: pass
1111-
raise Exception("Failed to create sandbox")
1112-
1080+
sandbox = None
1081+
sandbox_pass = None
1082+
vnc_url = None
1083+
website_url = None
1084+
token = None
11131085

1114-
# Update project with sandbox info
1115-
update_result = await client.table('projects').update({
1116-
'sandbox': {
1117-
'id': sandbox_id, 'pass': sandbox_pass, 'vnc_preview': vnc_url,
1118-
'sandbox_url': website_url, 'token': token
1119-
}
1120-
}).eq('project_id', project_id).execute()
1086+
if files:
1087+
try:
1088+
sandbox_pass = str(uuid.uuid4())
1089+
sandbox = await create_sandbox(sandbox_pass, project_id)
1090+
sandbox_id = sandbox.id
1091+
logger.info(f"Created new sandbox {sandbox_id} for project {project_id}")
1092+
1093+
# Get preview links
1094+
vnc_link = await sandbox.get_preview_link(6080)
1095+
website_link = await sandbox.get_preview_link(8080)
1096+
vnc_url = vnc_link.url if hasattr(vnc_link, 'url') else str(vnc_link).split("url='")[1].split("'")[0]
1097+
website_url = website_link.url if hasattr(website_link, 'url') else str(website_link).split("url='")[1].split("'")[0]
1098+
token = None
1099+
if hasattr(vnc_link, 'token'):
1100+
token = vnc_link.token
1101+
elif "token='" in str(vnc_link):
1102+
token = str(vnc_link).split("token='")[1].split("'")[0]
1103+
1104+
# Update project with sandbox info
1105+
update_result = await client.table('projects').update({
1106+
'sandbox': {
1107+
'id': sandbox_id, 'pass': sandbox_pass, 'vnc_preview': vnc_url,
1108+
'sandbox_url': website_url, 'token': token
1109+
}
1110+
}).eq('project_id', project_id).execute()
11211111

1122-
if not update_result.data:
1123-
logger.error(f"Failed to update project {project_id} with new sandbox {sandbox_id}")
1124-
if sandbox_id:
1125-
try: await delete_sandbox(sandbox_id)
1126-
except Exception as e: logger.error(f"Error deleting sandbox: {str(e)}")
1127-
raise Exception("Database update failed")
1112+
if not update_result.data:
1113+
logger.error(f"Failed to update project {project_id} with new sandbox {sandbox_id}")
1114+
if sandbox_id:
1115+
try: await delete_sandbox(sandbox_id)
1116+
except Exception as e: logger.error(f"Error deleting sandbox: {str(e)}")
1117+
raise Exception("Database update failed")
1118+
except Exception as e:
1119+
logger.error(f"Error creating sandbox: {str(e)}")
1120+
await client.table('projects').delete().eq('project_id', project_id).execute()
1121+
if sandbox_id:
1122+
try: await delete_sandbox(sandbox_id)
1123+
except Exception:
1124+
pass
1125+
raise Exception("Failed to create sandbox")
11281126

11291127
# 3. Create Thread
11301128
thread_data = {

backend/agent/run.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,10 @@ async def setup(self):
390390
project_data = project.data[0]
391391
sandbox_info = project_data.get('sandbox', {})
392392
if not sandbox_info.get('id'):
393-
raise ValueError(f"No sandbox found for project {self.config.project_id}")
393+
# Sandbox is created lazily by tools when required. Do not fail setup
394+
# if no sandbox is present — tools will call `_ensure_sandbox()`
395+
# which will create and persist the sandbox metadata when needed.
396+
logger.info(f"No sandbox found for project {self.config.project_id}; will create lazily when needed")
394397

395398
async def setup_tools(self):
396399
tool_manager = ToolManager(self.thread_manager, self.config.project_id, self.config.thread_id)

backend/agent/utils.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
from typing import Optional, List, Dict, Any
33
from datetime import datetime, timezone, timedelta
4+
from utils.cache import Cache
45
from utils.logger import logger
56
from utils.config import config
67
from services import redis
@@ -88,6 +89,10 @@ async def check_agent_run_limit(client, account_id: str) -> Dict[str, Any]:
8889
Dict with 'can_start' (bool), 'running_count' (int), 'running_thread_ids' (list)
8990
"""
9091
try:
92+
result = await Cache.get(f"agent_run_limit:{account_id}")
93+
if result:
94+
return result
95+
9196
# Calculate 24 hours ago
9297
twenty_four_hours_ago = datetime.now(timezone.utc) - timedelta(hours=24)
9398
twenty_four_hours_ago_iso = twenty_four_hours_ago.isoformat()
@@ -117,17 +122,19 @@ async def check_agent_run_limit(client, account_id: str) -> Dict[str, Any]:
117122

118123
logger.info(f"Account {account_id} has {running_count} running agent runs in the past 24 hours")
119124

120-
return {
125+
result = {
121126
'can_start': running_count < config.MAX_PARALLEL_AGENT_RUNS,
122127
'running_count': running_count,
123128
'running_thread_ids': running_thread_ids
124129
}
125-
130+
await Cache.set(f"agent_run_limit:{account_id}", result)
131+
return result
132+
126133
except Exception as e:
127134
logger.error(f"Error checking agent run limit for account {account_id}: {str(e)}")
128135
# In case of error, allow the run to proceed but log the error
129136
return {
130137
'can_start': True,
131138
'running_count': 0,
132139
'running_thread_ids': []
133-
}
140+
}

backend/sandbox/tool_base.py

Lines changed: 62 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from typing import Optional
2+
import uuid
23

34
from agentpress.thread_manager import ThreadManager
45
from agentpress.tool import Tool
56
from daytona_sdk import AsyncSandbox
6-
from sandbox.sandbox import get_or_start_sandbox
7+
from sandbox.sandbox import get_or_start_sandbox, create_sandbox, delete_sandbox
78
from utils.logger import logger
89
from utils.files_utils import clean_path
910

@@ -23,48 +24,78 @@ def __init__(self, project_id: str, thread_manager: Optional[ThreadManager] = No
2324
self._sandbox_pass = None
2425

2526
async def _ensure_sandbox(self) -> AsyncSandbox:
26-
"""Ensure we have a valid sandbox instance, retrieving it from the project if needed."""
27+
"""Ensure we have a valid sandbox instance, retrieving it from the project if needed.
28+
29+
If the project does not yet have a sandbox, create it lazily and persist
30+
the metadata to the `projects` table so subsequent calls can reuse it.
31+
"""
2732
if self._sandbox is None:
2833
try:
2934
# Get database client
3035
client = await self.thread_manager.db.client
31-
36+
3237
# Get project data
3338
project = await client.table('projects').select('*').eq('project_id', self.project_id).execute()
3439
if not project.data or len(project.data) == 0:
3540
raise ValueError(f"Project {self.project_id} not found")
36-
41+
3742
project_data = project.data[0]
38-
sandbox_info = project_data.get('sandbox', {})
39-
43+
sandbox_info = project_data.get('sandbox') or {}
44+
45+
# If there is no sandbox recorded for this project, create one lazily
4046
if not sandbox_info.get('id'):
41-
raise ValueError(f"No sandbox found for project {self.project_id}")
42-
43-
# Store sandbox info
44-
self._sandbox_id = sandbox_info['id']
45-
self._sandbox_pass = sandbox_info.get('pass')
46-
47-
# Get or start the sandbox
48-
self._sandbox = await get_or_start_sandbox(self._sandbox_id)
49-
50-
# # Log URLs if not already printed
51-
# if not SandboxToolsBase._urls_printed:
52-
# vnc_link = self._sandbox.get_preview_link(6080)
53-
# website_link = self._sandbox.get_preview_link(8080)
54-
55-
# vnc_url = vnc_link.url if hasattr(vnc_link, 'url') else str(vnc_link)
56-
# website_url = website_link.url if hasattr(website_link, 'url') else str(website_link)
57-
58-
# print("\033[95m***")
59-
# print(f"VNC URL: {vnc_url}")
60-
# print(f"Website URL: {website_url}")
61-
# print("***\033[0m")
62-
# SandboxToolsBase._urls_printed = True
63-
47+
logger.info(f"No sandbox recorded for project {self.project_id}; creating lazily")
48+
sandbox_pass = str(uuid.uuid4())
49+
sandbox_obj = await create_sandbox(sandbox_pass, self.project_id)
50+
sandbox_id = sandbox_obj.id
51+
52+
# Gather preview links and token (best-effort parsing)
53+
try:
54+
vnc_link = await sandbox_obj.get_preview_link(6080)
55+
website_link = await sandbox_obj.get_preview_link(8080)
56+
vnc_url = vnc_link.url if hasattr(vnc_link, 'url') else str(vnc_link).split("url='")[1].split("'")[0]
57+
website_url = website_link.url if hasattr(website_link, 'url') else str(website_link).split("url='")[1].split("'")[0]
58+
token = vnc_link.token if hasattr(vnc_link, 'token') else (str(vnc_link).split("token='")[1].split("'")[0] if "token='" in str(vnc_link) else None)
59+
except Exception:
60+
# If preview link extraction fails, still proceed but leave fields None
61+
logger.warning(f"Failed to extract preview links for sandbox {sandbox_id}", exc_info=True)
62+
vnc_url = None
63+
website_url = None
64+
token = None
65+
66+
# Persist sandbox metadata to project record
67+
update_result = await client.table('projects').update({
68+
'sandbox': {
69+
'id': sandbox_id,
70+
'pass': sandbox_pass,
71+
'vnc_preview': vnc_url,
72+
'sandbox_url': website_url,
73+
'token': token
74+
}
75+
}).eq('project_id', self.project_id).execute()
76+
77+
if not update_result.data:
78+
# Cleanup created sandbox if DB update failed
79+
try:
80+
await delete_sandbox(sandbox_id)
81+
except Exception:
82+
logger.error(f"Failed to delete sandbox {sandbox_id} after DB update failure", exc_info=True)
83+
raise Exception("Database update failed when storing sandbox metadata")
84+
85+
# Store local metadata and ensure sandbox is ready
86+
self._sandbox_id = sandbox_id
87+
self._sandbox_pass = sandbox_pass
88+
self._sandbox = await get_or_start_sandbox(self._sandbox_id)
89+
else:
90+
# Use existing sandbox metadata
91+
self._sandbox_id = sandbox_info['id']
92+
self._sandbox_pass = sandbox_info.get('pass')
93+
self._sandbox = await get_or_start_sandbox(self._sandbox_id)
94+
6495
except Exception as e:
65-
logger.error(f"Error retrieving sandbox for project {self.project_id}: {str(e)}", exc_info=True)
96+
logger.error(f"Error retrieving/creating sandbox for project {self.project_id}: {str(e)}", exc_info=True)
6697
raise e
67-
98+
6899
return self._sandbox
69100

70101
@property

0 commit comments

Comments
 (0)