11import { db } from '@sim/db'
22import { workflowExecutionLogs } from '@sim/db/schema'
3- import { and , avg , count , desc , eq , gte } from 'drizzle-orm'
3+ import { and , avg , count , desc , eq , gte , inArray } from 'drizzle-orm'
44import { createLogger } from '@/lib/logs/console/logger'
55
66const logger = createLogger ( 'AlertRules' )
@@ -135,25 +135,29 @@ export function isInCooldown(lastAlertAt: Date | null): boolean {
135135 return new Date ( ) < cooldownEnd
136136}
137137
138- /**
139- * Context passed to alert check functions
140- */
141138export interface AlertCheckContext {
142139 workflowId : string
143140 executionId : string
144141 status : 'success' | 'error'
145142 durationMs : number
146143 cost : number
144+ triggerFilter : string [ ]
147145}
148146
149- /**
150- * Check if consecutive failures threshold is met
151- */
152- async function checkConsecutiveFailures ( workflowId : string , threshold : number ) : Promise < boolean > {
147+ async function checkConsecutiveFailures (
148+ workflowId : string ,
149+ threshold : number ,
150+ triggerFilter : string [ ]
151+ ) : Promise < boolean > {
153152 const recentLogs = await db
154153 . select ( { level : workflowExecutionLogs . level } )
155154 . from ( workflowExecutionLogs )
156- . where ( eq ( workflowExecutionLogs . workflowId , workflowId ) )
155+ . where (
156+ and (
157+ eq ( workflowExecutionLogs . workflowId , workflowId ) ,
158+ inArray ( workflowExecutionLogs . trigger , triggerFilter )
159+ )
160+ )
157161 . orderBy ( desc ( workflowExecutionLogs . createdAt ) )
158162 . limit ( threshold )
159163
@@ -162,13 +166,11 @@ async function checkConsecutiveFailures(workflowId: string, threshold: number):
162166 return recentLogs . every ( ( log ) => log . level === 'error' )
163167}
164168
165- /**
166- * Check if failure rate exceeds threshold
167- */
168169async function checkFailureRate (
169170 workflowId : string ,
170171 ratePercent : number ,
171- windowHours : number
172+ windowHours : number ,
173+ triggerFilter : string [ ]
172174) : Promise < boolean > {
173175 const windowStart = new Date ( Date . now ( ) - windowHours * 60 * 60 * 1000 )
174176
@@ -181,7 +183,8 @@ async function checkFailureRate(
181183 . where (
182184 and (
183185 eq ( workflowExecutionLogs . workflowId , workflowId ) ,
184- gte ( workflowExecutionLogs . createdAt , windowStart )
186+ gte ( workflowExecutionLogs . createdAt , windowStart ) ,
187+ inArray ( workflowExecutionLogs . trigger , triggerFilter )
185188 )
186189 )
187190 . orderBy ( workflowExecutionLogs . createdAt )
@@ -206,14 +209,12 @@ function checkLatencyThreshold(durationMs: number, thresholdMs: number): boolean
206209 return durationMs > thresholdMs
207210}
208211
209- /**
210- * Check if execution duration is significantly above average
211- */
212212async function checkLatencySpike (
213213 workflowId : string ,
214214 currentDurationMs : number ,
215215 spikePercent : number ,
216- windowHours : number
216+ windowHours : number ,
217+ triggerFilter : string [ ]
217218) : Promise < boolean > {
218219 const windowStart = new Date ( Date . now ( ) - windowHours * 60 * 60 * 1000 )
219220
@@ -226,7 +227,8 @@ async function checkLatencySpike(
226227 . where (
227228 and (
228229 eq ( workflowExecutionLogs . workflowId , workflowId ) ,
229- gte ( workflowExecutionLogs . createdAt , windowStart )
230+ gte ( workflowExecutionLogs . createdAt , windowStart ) ,
231+ inArray ( workflowExecutionLogs . trigger , triggerFilter )
230232 )
231233 )
232234
@@ -248,13 +250,11 @@ function checkCostThreshold(cost: number, thresholdDollars: number): boolean {
248250 return cost > thresholdDollars
249251}
250252
251- /**
252- * Check if error count exceeds threshold within window
253- */
254253async function checkErrorCount (
255254 workflowId : string ,
256255 threshold : number ,
257- windowHours : number
256+ windowHours : number ,
257+ triggerFilter : string [ ]
258258) : Promise < boolean > {
259259 const windowStart = new Date ( Date . now ( ) - windowHours * 60 * 60 * 1000 )
260260
@@ -265,17 +265,15 @@ async function checkErrorCount(
265265 and (
266266 eq ( workflowExecutionLogs . workflowId , workflowId ) ,
267267 eq ( workflowExecutionLogs . level , 'error' ) ,
268- gte ( workflowExecutionLogs . createdAt , windowStart )
268+ gte ( workflowExecutionLogs . createdAt , windowStart ) ,
269+ inArray ( workflowExecutionLogs . trigger , triggerFilter )
269270 )
270271 )
271272
272273 const errorCount = result [ 0 ] ?. count || 0
273274 return errorCount >= threshold
274275}
275276
276- /**
277- * Evaluates if an alert should be triggered based on the configuration
278- */
279277export async function shouldTriggerAlert (
280278 config : AlertConfig ,
281279 context : AlertCheckContext ,
@@ -287,16 +285,21 @@ export async function shouldTriggerAlert(
287285 }
288286
289287 const { rule } = config
290- const { workflowId, status, durationMs, cost } = context
288+ const { workflowId, status, durationMs, cost, triggerFilter } = context
291289
292290 switch ( rule ) {
293291 case 'consecutive_failures' :
294292 if ( status !== 'error' ) return false
295- return checkConsecutiveFailures ( workflowId , config . consecutiveFailures ! )
293+ return checkConsecutiveFailures ( workflowId , config . consecutiveFailures ! , triggerFilter )
296294
297295 case 'failure_rate' :
298296 if ( status !== 'error' ) return false
299- return checkFailureRate ( workflowId , config . failureRatePercent ! , config . windowHours ! )
297+ return checkFailureRate (
298+ workflowId ,
299+ config . failureRatePercent ! ,
300+ config . windowHours ! ,
301+ triggerFilter
302+ )
300303
301304 case 'latency_threshold' :
302305 return checkLatencyThreshold ( durationMs , config . durationThresholdMs ! )
@@ -306,19 +309,24 @@ export async function shouldTriggerAlert(
306309 workflowId ,
307310 durationMs ,
308311 config . latencySpikePercent ! ,
309- config . windowHours !
312+ config . windowHours ! ,
313+ triggerFilter
310314 )
311315
312316 case 'cost_threshold' :
313317 return checkCostThreshold ( cost , config . costThresholdDollars ! )
314318
315319 case 'no_activity' :
316- // no_activity alerts are handled by the hourly polling job, not execution events
317320 return false
318321
319322 case 'error_count' :
320323 if ( status !== 'error' ) return false
321- return checkErrorCount ( workflowId , config . errorCountThreshold ! , config . windowHours ! )
324+ return checkErrorCount (
325+ workflowId ,
326+ config . errorCountThreshold ! ,
327+ config . windowHours ! ,
328+ triggerFilter
329+ )
322330
323331 default :
324332 logger . warn ( `Unknown alert rule: ${ rule } ` )
0 commit comments