From fcaec8ac20887e520506a022d6d8f81c63196a58 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 15:19:20 +0100 Subject: [PATCH 1/6] Don't log an error when the snapshot shouldn't be created, it's normal for this to happen --- .../run-engine/src/engine/systems/checkpointSystem.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts index bec173d960..384384fd8c 100644 --- a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts @@ -62,7 +62,7 @@ export class CheckpointSystem { snapshot.executionStatus === "QUEUED_EXECUTING"); if (!isValidSnapshot) { - this.$.logger.error("Tried to createCheckpoint on an invalid snapshot", { + this.$.logger.info("Tried to createCheckpoint on an invalid snapshot", { snapshot, snapshotId, }); From 0d1bae734c15de914ca8395e4e3c9bb17b7c7804 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 15:58:13 +0100 Subject: [PATCH 2/6] Slack alerts, skip `account_inactive` errors --- .../webapp/app/v3/services/alerts/deliverAlert.server.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index ecbf8ef1bc..90198a0929 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -991,6 +991,15 @@ export class DeliverAlertService extends BaseService { throw new SkipRetryError("Slack invalid blocks"); } + if (error.data.error === "account_inactive") { + logger.info("[DeliverAlert] Slack account inactive, skipping retry", { + error, + message, + }); + + throw new SkipRetryError("Slack account inactive"); + } + throw new Error("Slack platform error"); } From c8afb4a010f0be3888bc9bb1ae03b879c67973af Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 16:14:29 +0100 Subject: [PATCH 3/6] v3 finalize run with no locked isn't an error --- apps/webapp/app/v3/services/finalizeTaskRun.server.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index 796a10a3b0..611f2dd8a6 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -305,9 +305,10 @@ export class FinalizeTaskRunService extends BaseService { }); if (!run.lockedById) { - logger.error( + // This happens when a run is expired or was cancelled before an attempt, it's not a problem + logger.info( "FinalizeTaskRunService: No lockedById, so can't get the BackgroundWorkerTask. Not creating an attempt.", - { runId: run.id } + { runId: run.id, status: run.status } ); return; } From cc65653adffab855dfa3a35bcb42e4896f04319f Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 17:26:52 +0100 Subject: [PATCH 4/6] Another false error --- .../src/engine/systems/dequeueSystem.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts index 85683c5a17..84ef841775 100644 --- a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts @@ -8,7 +8,7 @@ import { PrismaClientOrTransaction } from "@trigger.dev/database"; import { getRunWithBackgroundWorkerTasks } from "../db/worker.js"; import { sendNotificationToWorker } from "../eventBus.js"; import { getMachinePreset } from "../machinePresets.js"; -import { isDequeueableExecutionStatus } from "../statuses.js"; +import { isDequeueableExecutionStatus, isExecuting } from "../statuses.js"; import { RunEngineOptions } from "../types.js"; import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; import { RunAttemptSystem } from "./runAttemptSystem.js"; @@ -132,9 +132,16 @@ export class DequeueSystem { }, tx: prisma, }); - this.$.logger.error( - `RunEngine.dequeueFromWorkerQueue(): Run is not in a valid state to be dequeued: ${runId}\n ${snapshot.id}:${snapshot.executionStatus}` - ); + + if (isExecuting(snapshot.executionStatus)) { + this.$.logger.error( + `RunEngine.dequeueFromWorkerQueue(): Run is not in a valid state to be dequeued: ${runId}\n ${snapshot.id}:${snapshot.executionStatus}` + ); + } else { + this.$.logger.warn( + `RunEngine.dequeueFromWorkerQueue(): Run is in an expected not valid state to be dequeued: ${runId}\n ${snapshot.id}:${snapshot.executionStatus}` + ); + } return; } From dc4dded08a5cbbd12cac4a4b7c1d03ad5d8bf39c Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 18:03:27 +0100 Subject: [PATCH 5/6] Finalize run CRASHED runs were logging errors --- apps/webapp/app/v3/services/finalizeTaskRun.server.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index 611f2dd8a6..ab51df5de6 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -150,7 +150,7 @@ export class FinalizeTaskRunService extends BaseService { } if (isFatalRunStatus(run.status)) { - logger.error("FinalizeTaskRunService: Fatal status", { runId: run.id, status: run.status }); + logger.warn("FinalizeTaskRunService: Fatal status", { runId: run.id, status: run.status }); const extendedRun = await this._prisma.taskRun.findFirst({ where: { id: run.id }, @@ -170,7 +170,7 @@ export class FinalizeTaskRunService extends BaseService { }); if (extendedRun && extendedRun.runtimeEnvironment.type !== "DEVELOPMENT") { - logger.error("FinalizeTaskRunService: Fatal status, requesting worker exit", { + logger.warn("FinalizeTaskRunService: Fatal status, requesting worker exit", { runId: run.id, status: run.status, }); From f9ddc86e48df96b996c9f706b2506180c8c84b7d Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Mon, 18 Aug 2025 18:20:13 +0100 Subject: [PATCH 6/6] All slack alert errors are warnings except invalid blocks --- .../app/v3/services/alerts/deliverAlert.server.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 90198a0929..1d8e644876 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -158,7 +158,7 @@ export class DeliverAlertService extends BaseService { } } catch (error) { if (error instanceof SkipRetryError) { - logger.error("[DeliverAlert] Skipping retry", { + logger.warn("[DeliverAlert] Skipping retry", { reason: error.message, }); @@ -951,7 +951,7 @@ export class DeliverAlertService extends BaseService { return await client.chat.postMessage(message); } catch (error) { if (isWebAPIRateLimitedError(error)) { - logger.error("[DeliverAlert] Slack rate limited", { + logger.warn("[DeliverAlert] Slack rate limited", { error, message, }); @@ -960,7 +960,7 @@ export class DeliverAlertService extends BaseService { } if (isWebAPIHTTPError(error)) { - logger.error("[DeliverAlert] Slack HTTP error", { + logger.warn("[DeliverAlert] Slack HTTP error", { error, message, }); @@ -969,7 +969,7 @@ export class DeliverAlertService extends BaseService { } if (isWebAPIRequestError(error)) { - logger.error("[DeliverAlert] Slack request error", { + logger.warn("[DeliverAlert] Slack request error", { error, message, }); @@ -978,7 +978,7 @@ export class DeliverAlertService extends BaseService { } if (isWebAPIPlatformError(error)) { - logger.error("[DeliverAlert] Slack platform error", { + logger.warn("[DeliverAlert] Slack platform error", { error, message, }); @@ -1003,7 +1003,7 @@ export class DeliverAlertService extends BaseService { throw new Error("Slack platform error"); } - logger.error("[DeliverAlert] Failed to send slack message", { + logger.warn("[DeliverAlert] Failed to send slack message", { error, message, });