Skip to content

Commit a66219f

Browse files
authored
fix(platform): Remove un-runnable agents from schedule (#11374)
Currently when an agent fails validation during a scheduled run, we raise an error then try again, regardless of why. This change removed the agent schedule and notifies the user ### Changes 🏗️ - add schedule_id to the GraphExecutionJobArgs - add agent_name to the GraphExecutionJobArgs - Delete schedule on GraphValidationError - Notify the user with a message that include the agent name ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [x] I have ensured the scheduler tests work with these changes
1 parent 8b3a741 commit a66219f

File tree

4 files changed

+60
-9
lines changed

4 files changed

+60
-9
lines changed

autogpt_platform/backend/backend/executor/scheduler.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import os
44
import threading
5+
import uuid
56
from enum import Enum
67
from typing import Optional
78
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
@@ -36,7 +37,9 @@
3637
from backend.util.clients import get_scheduler_client
3738
from backend.util.cloud_storage import cleanup_expired_files_async
3839
from backend.util.exceptions import (
40+
GraphNotFoundError,
3941
GraphNotInLibraryError,
42+
GraphValidationError,
4043
NotAuthorizedError,
4144
NotFoundError,
4245
)
@@ -160,14 +163,12 @@ async def _execute_graph(**kwargs):
160163
f"Graph execution {graph_exec.id} took {elapsed:.2f}s to create/publish - "
161164
f"this is unusually slow and may indicate resource contention"
162165
)
166+
except GraphNotFoundError as e:
167+
await _handle_graph_not_available(e, args, start_time)
163168
except GraphNotInLibraryError as e:
164-
elapsed = asyncio.get_event_loop().time() - start_time
165-
logger.warning(
166-
f"Scheduled execution blocked for deleted/archived graph {args.graph_id} "
167-
f"(user {args.user_id}) after {elapsed:.2f}s: {e}"
168-
)
169-
# Clean up orphaned schedules for this graph
170-
await _cleanup_orphaned_schedules_for_graph(args.graph_id, args.user_id)
169+
await _handle_graph_not_available(e, args, start_time)
170+
except GraphValidationError:
171+
await _handle_graph_validation_error(args)
171172
except Exception as e:
172173
elapsed = asyncio.get_event_loop().time() - start_time
173174
logger.error(
@@ -176,6 +177,34 @@ async def _execute_graph(**kwargs):
176177
)
177178

178179

180+
async def _handle_graph_validation_error(args: "GraphExecutionJobArgs") -> None:
181+
logger.error(
182+
f"Scheduled Graph {args.graph_id} failed validation. Unscheduling graph"
183+
)
184+
if args.schedule_id:
185+
scheduler_client = get_scheduler_client()
186+
await scheduler_client.delete_schedule(
187+
schedule_id=args.schedule_id,
188+
user_id=args.user_id,
189+
)
190+
else:
191+
logger.error(
192+
f"Unable to unschedule graph: {args.graph_id} as this is an old job with no associated schedule_id please remove manually"
193+
)
194+
195+
196+
async def _handle_graph_not_available(
197+
e: Exception, args: "GraphExecutionJobArgs", start_time: float
198+
) -> None:
199+
elapsed = asyncio.get_event_loop().time() - start_time
200+
logger.warning(
201+
f"Scheduled execution blocked for deleted/archived graph {args.graph_id} "
202+
f"(user {args.user_id}) after {elapsed:.2f}s: {e}"
203+
)
204+
# Clean up orphaned schedules for this graph
205+
await _cleanup_orphaned_schedules_for_graph(args.graph_id, args.user_id)
206+
207+
179208
async def _cleanup_orphaned_schedules_for_graph(graph_id: str, user_id: str) -> None:
180209
"""
181210
Clean up orphaned schedules for a specific graph when execution fails with GraphNotAccessibleError.
@@ -220,9 +249,11 @@ class Jobstores(Enum):
220249

221250

222251
class GraphExecutionJobArgs(BaseModel):
252+
schedule_id: str | None = None
223253
user_id: str
224254
graph_id: str
225255
graph_version: int
256+
agent_name: str | None = None
226257
cron: str
227258
input_data: BlockInput
228259
input_credentials: dict[str, CredentialsMetaInput] = Field(default_factory=dict)
@@ -468,11 +499,14 @@ def add_graph_execution_schedule(
468499
logger.info(
469500
f"Scheduling job for user {user_id} with timezone {user_timezone} (cron: {cron})"
470501
)
502+
schedule_id = str(uuid.uuid4())
471503

472504
job_args = GraphExecutionJobArgs(
505+
schedule_id=schedule_id,
473506
user_id=user_id,
474507
graph_id=graph_id,
475508
graph_version=graph_version,
509+
agent_name=name,
476510
cron=cron,
477511
input_data=input_data,
478512
input_credentials=input_credentials,
@@ -484,6 +518,7 @@ def add_graph_execution_schedule(
484518
trigger=CronTrigger.from_crontab(cron, timezone=user_timezone),
485519
jobstore=Jobstores.EXECUTION.value,
486520
replace_existing=True,
521+
id=schedule_id,
487522
)
488523
logger.info(
489524
f"Added job {job.id} with cron schedule '{cron}' in timezone {user_timezone}, input data: {input_data}"

autogpt_platform/backend/backend/executor/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@
4242
get_database_manager_async_client,
4343
get_integration_credentials_store,
4444
)
45-
from backend.util.exceptions import GraphValidationError, NotFoundError
45+
from backend.util.exceptions import (
46+
GraphNotFoundError,
47+
GraphValidationError,
48+
NotFoundError,
49+
)
4650
from backend.util.logging import TruncatedLogger, is_structured_logging_enabled
4751
from backend.util.settings import Config
4852
from backend.util.type import convert
@@ -516,7 +520,7 @@ async def validate_and_construct_node_execution_input(
516520
skip_access_check=True,
517521
)
518522
if not graph:
519-
raise NotFoundError(f"Graph #{graph_id} not found.")
523+
raise GraphNotFoundError(f"Graph #{graph_id} not found.")
520524

521525
# Validate that the user has permission to execute this graph
522526
# This checks both library membership and execution permissions,

autogpt_platform/backend/backend/util/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ class NotFoundError(ValueError):
3838
"""The requested record was not found, resulting in an error condition"""
3939

4040

41+
class GraphNotFoundError(ValueError):
42+
"""The requested Agent Graph was not found, resulting in an error condition"""
43+
44+
4145
class NeedConfirmation(Exception):
4246
"""The user must explicitly confirm that they want to proceed"""
4347

autogpt_platform/frontend/src/app/api/openapi.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6018,9 +6018,17 @@
60186018
},
60196019
"GraphExecutionJobInfo": {
60206020
"properties": {
6021+
"schedule_id": {
6022+
"anyOf": [{ "type": "string" }, { "type": "null" }],
6023+
"title": "Schedule Id"
6024+
},
60216025
"user_id": { "type": "string", "title": "User Id" },
60226026
"graph_id": { "type": "string", "title": "Graph Id" },
60236027
"graph_version": { "type": "integer", "title": "Graph Version" },
6028+
"agent_name": {
6029+
"anyOf": [{ "type": "string" }, { "type": "null" }],
6030+
"title": "Agent Name"
6031+
},
60246032
"cron": { "type": "string", "title": "Cron" },
60256033
"input_data": {
60266034
"additionalProperties": true,

0 commit comments

Comments
 (0)