@@ -15,6 +15,7 @@ import { EnqueueSystem } from "./enqueueSystem.js";
15
15
import { ExecutionSnapshotSystem , getLatestExecutionSnapshot } from "./executionSnapshotSystem.js" ;
16
16
import { SystemResources } from "./systems.js" ;
17
17
import { ReleaseConcurrencySystem } from "./releaseConcurrencySystem.js" ;
18
+ import { assertNever } from "assert-never" ;
18
19
19
20
export type WaitpointSystemOptions = {
20
21
resources : SystemResources ;
@@ -499,190 +500,247 @@ export class WaitpointSystem {
499
500
runId,
500
501
} ) ;
501
502
502
- // 1. Get the any blocking waitpoints
503
- const blockingWaitpoints = await this . $ . prisma . taskRunWaitpoint . findMany ( {
504
- where : { taskRunId : runId } ,
505
- select : {
506
- id : true ,
507
- batchId : true ,
508
- batchIndex : true ,
509
- waitpoint : {
510
- select : { id : true , status : true } ,
511
- } ,
512
- } ,
513
- } ) ;
514
-
515
503
await this . $ . raceSimulationSystem . waitForRacepoint ( { runId } ) ;
516
504
517
- // 2. There are blockers still, so do nothing
518
- if ( blockingWaitpoints . some ( ( w ) => w . waitpoint . status !== "COMPLETED" ) ) {
519
- this . $ . logger . debug ( `continueRunIfUnblocked: blocking waitpoints still exist` , {
520
- runId,
521
- blockingWaitpoints,
505
+ return await this . $ . runLock . lock ( "continueRunIfUnblocked" , [ runId ] , async ( ) => {
506
+ // 1. Get the any blocking waitpoints
507
+ const blockingWaitpoints = await this . $ . prisma . taskRunWaitpoint . findMany ( {
508
+ where : { taskRunId : runId } ,
509
+ select : {
510
+ id : true ,
511
+ batchId : true ,
512
+ batchIndex : true ,
513
+ waitpoint : {
514
+ select : { id : true , status : true } ,
515
+ } ,
516
+ } ,
522
517
} ) ;
523
- return "blocked" ;
524
- }
525
518
526
- // 3. Get the run with environment
527
- const run = await this . $ . prisma . taskRun . findFirst ( {
528
- where : {
529
- id : runId ,
530
- } ,
531
- include : {
532
- runtimeEnvironment : {
533
- select : {
534
- id : true ,
535
- type : true ,
536
- maximumConcurrencyLimit : true ,
537
- project : { select : { id : true } } ,
538
- organization : { select : { id : true } } ,
519
+ // 2. There are blockers still, so do nothing
520
+ if ( blockingWaitpoints . some ( ( w ) => w . waitpoint . status !== "COMPLETED" ) ) {
521
+ this . $ . logger . debug ( `continueRunIfUnblocked: blocking waitpoints still exist` , {
522
+ runId,
523
+ blockingWaitpoints,
524
+ } ) ;
525
+ return "blocked" ;
526
+ }
527
+
528
+ // 3. Get the run with environment
529
+ const run = await this . $ . prisma . taskRun . findFirst ( {
530
+ where : {
531
+ id : runId ,
532
+ } ,
533
+ include : {
534
+ runtimeEnvironment : {
535
+ select : {
536
+ id : true ,
537
+ type : true ,
538
+ maximumConcurrencyLimit : true ,
539
+ project : { select : { id : true } } ,
540
+ organization : { select : { id : true } } ,
541
+ } ,
539
542
} ,
540
543
} ,
541
- } ,
542
- } ) ;
543
-
544
- if ( ! run ) {
545
- this . $ . logger . error ( `continueRunIfUnblocked: run not found` , {
546
- runId,
547
544
} ) ;
548
- throw new Error ( `continueRunIfUnblocked: run not found: ${ runId } ` ) ;
549
- }
550
545
551
- //4. Continue the run whether it's executing or not
552
- await this . $ . runLock . lock ( "continueRunIfUnblocked" , [ runId ] , async ( ) => {
553
- const snapshot = await getLatestExecutionSnapshot ( this . $ . prisma , runId ) ;
554
-
555
- if ( isFinishedOrPendingFinished ( snapshot . executionStatus ) ) {
556
- this . $ . logger . debug ( `continueRunIfUnblocked: run is finished, skipping` , {
546
+ if ( ! run ) {
547
+ this . $ . logger . error ( `continueRunIfUnblocked: run not found` , {
557
548
runId,
558
- snapshot,
559
549
} ) ;
560
- return "skipped" ;
550
+ throw new Error ( `continueRunIfUnblocked: run not found: ${ runId } ` ) ;
561
551
}
562
552
563
- //run is still executing, send a message to the worker
564
- if ( isExecuting ( snapshot . executionStatus ) ) {
565
- const result = await this . $ . runQueue . reacquireConcurrency (
566
- run . runtimeEnvironment . organization . id ,
567
- runId
568
- ) ;
569
-
570
- if ( result ) {
571
- const newSnapshot = await this . executionSnapshotSystem . createExecutionSnapshot (
572
- this . $ . prisma ,
573
- {
574
- run : {
575
- id : runId ,
576
- status : snapshot . runStatus ,
577
- attemptNumber : snapshot . attemptNumber ,
578
- } ,
553
+ //4. Continue the run whether it's executing or not
554
+ const snapshot = await getLatestExecutionSnapshot ( this . $ . prisma , runId ) ;
555
+
556
+ switch ( snapshot . executionStatus ) {
557
+ case "RUN_CREATED" : {
558
+ this . $ . logger . info ( `continueRunIfUnblocked: run is run created, skipping` , {
559
+ runId,
560
+ snapshot,
561
+ executionStatus : snapshot . executionStatus ,
562
+ } ) ;
563
+
564
+ return "skipped" ;
565
+ }
566
+ case "QUEUED" : {
567
+ this . $ . logger . info ( `continueRunIfUnblocked: run is queued, skipping` , {
568
+ runId,
569
+ snapshot,
570
+ executionStatus : snapshot . executionStatus ,
571
+ } ) ;
572
+
573
+ return "skipped" ;
574
+ }
575
+ case "PENDING_EXECUTING" : {
576
+ this . $ . logger . info ( `continueRunIfUnblocked: run is pending executing, skipping` , {
577
+ runId,
578
+ snapshot,
579
+ executionStatus : snapshot . executionStatus ,
580
+ } ) ;
581
+
582
+ return "skipped" ;
583
+ }
584
+ case "QUEUED_EXECUTING" : {
585
+ this . $ . logger . info ( `continueRunIfUnblocked: run is already queued executing, skipping` , {
586
+ runId,
587
+ snapshot,
588
+ executionStatus : snapshot . executionStatus ,
589
+ } ) ;
590
+
591
+ return "skipped" ;
592
+ }
593
+ case "EXECUTING" : {
594
+ this . $ . logger . info ( `continueRunIfUnblocked: run is already executing, skipping` , {
595
+ runId,
596
+ snapshot,
597
+ executionStatus : snapshot . executionStatus ,
598
+ } ) ;
599
+
600
+ return "skipped" ;
601
+ }
602
+ case "PENDING_CANCEL" :
603
+ case "FINISHED" : {
604
+ this . $ . logger . debug ( `continueRunIfUnblocked: run is finished, skipping` , {
605
+ runId,
606
+ snapshot,
607
+ executionStatus : snapshot . executionStatus ,
608
+ } ) ;
609
+ return "skipped" ;
610
+ }
611
+ case "EXECUTING_WITH_WAITPOINTS" : {
612
+ const result = await this . $ . runQueue . reacquireConcurrency (
613
+ run . runtimeEnvironment . organization . id ,
614
+ runId
615
+ ) ;
616
+
617
+ if ( result ) {
618
+ const newSnapshot = await this . executionSnapshotSystem . createExecutionSnapshot (
619
+ this . $ . prisma ,
620
+ {
621
+ run : {
622
+ id : runId ,
623
+ status : snapshot . runStatus ,
624
+ attemptNumber : snapshot . attemptNumber ,
625
+ } ,
626
+ snapshot : {
627
+ executionStatus : "EXECUTING" ,
628
+ description : "Run was continued, whilst still executing." ,
629
+ } ,
630
+ previousSnapshotId : snapshot . id ,
631
+ environmentId : snapshot . environmentId ,
632
+ environmentType : snapshot . environmentType ,
633
+ projectId : snapshot . projectId ,
634
+ organizationId : snapshot . organizationId ,
635
+ batchId : snapshot . batchId ?? undefined ,
636
+ completedWaitpoints : blockingWaitpoints . map ( ( b ) => ( {
637
+ id : b . waitpoint . id ,
638
+ index : b . batchIndex ?? undefined ,
639
+ } ) ) ,
640
+ }
641
+ ) ;
642
+
643
+ await this . releaseConcurrencySystem . refillTokensForSnapshot ( snapshot ) ;
644
+
645
+ this . $ . logger . debug (
646
+ `continueRunIfUnblocked: run was still executing, sending notification` ,
647
+ {
648
+ runId,
649
+ snapshot,
650
+ newSnapshot,
651
+ }
652
+ ) ;
653
+
654
+ await sendNotificationToWorker ( {
655
+ runId,
656
+ snapshot : newSnapshot ,
657
+ eventBus : this . $ . eventBus ,
658
+ } ) ;
659
+ } else {
660
+ // Because we cannot reacquire the concurrency, we need to enqueue the run again
661
+ // and because the run is still executing, we need to set the status to QUEUED_EXECUTING
662
+ const newSnapshot = await this . enqueueSystem . enqueueRun ( {
663
+ run,
664
+ env : run . runtimeEnvironment ,
579
665
snapshot : {
580
- executionStatus : "EXECUTING " ,
581
- description : "Run was continued, whilst still executing. " ,
666
+ status : "QUEUED_EXECUTING " ,
667
+ description : "Run can continue, but is waiting for concurrency " ,
582
668
} ,
583
669
previousSnapshotId : snapshot . id ,
584
- environmentId : snapshot . environmentId ,
585
- environmentType : snapshot . environmentType ,
586
- projectId : snapshot . projectId ,
587
- organizationId : snapshot . organizationId ,
588
670
batchId : snapshot . batchId ?? undefined ,
589
671
completedWaitpoints : blockingWaitpoints . map ( ( b ) => ( {
590
672
id : b . waitpoint . id ,
591
673
index : b . batchIndex ?? undefined ,
592
674
} ) ) ,
593
- }
594
- ) ;
595
-
596
- await this . releaseConcurrencySystem . refillTokensForSnapshot ( snapshot ) ;
675
+ } ) ;
597
676
598
- this . $ . logger . debug (
599
- `continueRunIfUnblocked: run was still executing, sending notification` ,
600
- {
677
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED_EXECUTING` , {
601
678
runId,
602
679
snapshot,
603
680
newSnapshot,
604
- }
605
- ) ;
681
+ } ) ;
682
+ }
606
683
607
- await sendNotificationToWorker ( {
608
- runId,
609
- snapshot : newSnapshot ,
610
- eventBus : this . $ . eventBus ,
611
- } ) ;
612
- } else {
613
- // Because we cannot reacquire the concurrency, we need to enqueue the run again
614
- // and because the run is still executing, we need to set the status to QUEUED_EXECUTING
684
+ break ;
685
+ }
686
+ case "SUSPENDED" : {
687
+ if ( ! snapshot . checkpointId ) {
688
+ this . $ . logger . error ( `continueRunIfUnblocked: run is suspended, but has no checkpoint` , {
689
+ runId,
690
+ snapshot,
691
+ } ) ;
692
+ throw new Error (
693
+ `continueRunIfUnblocked: run is suspended, but has no checkpoint: ${ runId } `
694
+ ) ;
695
+ }
696
+
697
+ //put it back in the queue, with the original timestamp (w/ priority)
698
+ //this prioritizes dequeuing waiting runs over new runs
615
699
const newSnapshot = await this . enqueueSystem . enqueueRun ( {
616
700
run,
617
701
env : run . runtimeEnvironment ,
618
702
snapshot : {
619
- status : "QUEUED_EXECUTING " ,
620
- description : "Run can continue, but is waiting for concurrency " ,
703
+ status : "QUEUED " ,
704
+ description : "Run was QUEUED, because all waitpoints are completed " ,
621
705
} ,
622
- previousSnapshotId : snapshot . id ,
623
706
batchId : snapshot . batchId ?? undefined ,
624
707
completedWaitpoints : blockingWaitpoints . map ( ( b ) => ( {
625
708
id : b . waitpoint . id ,
626
709
index : b . batchIndex ?? undefined ,
627
710
} ) ) ,
711
+ checkpointId : snapshot . checkpointId ?? undefined ,
628
712
} ) ;
629
713
630
- this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED_EXECUTING ` , {
714
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED ` , {
631
715
runId,
632
716
snapshot,
633
717
newSnapshot,
634
718
} ) ;
719
+
720
+ break ;
635
721
}
636
- } else {
637
- if ( snapshot . executionStatus !== "RUN_CREATED" && ! snapshot . checkpointId ) {
638
- // TODO: We're screwed, should probably fail the run immediately
639
- this . $ . logger . error ( `continueRunIfUnblocked: run has no checkpoint` , {
640
- runId : run . id ,
641
- snapshot,
642
- blockingWaitpoints,
643
- } ) ;
644
- throw new Error ( `continueRunIfUnblocked: run has no checkpoint: ${ run . id } ` ) ;
722
+ default : {
723
+ assertNever ( snapshot . executionStatus ) ;
645
724
}
725
+ }
646
726
647
- //put it back in the queue, with the original timestamp (w/ priority)
648
- //this prioritizes dequeuing waiting runs over new runs
649
- const newSnapshot = await this . enqueueSystem . enqueueRun ( {
650
- run,
651
- env : run . runtimeEnvironment ,
652
- snapshot : {
653
- description : "Run was QUEUED, because all waitpoints are completed" ,
727
+ if ( blockingWaitpoints . length > 0 ) {
728
+ //5. Remove the blocking waitpoints
729
+ await this . $ . prisma . taskRunWaitpoint . deleteMany ( {
730
+ where : {
731
+ taskRunId : runId ,
732
+ id : { in : blockingWaitpoints . map ( ( b ) => b . id ) } ,
654
733
} ,
655
- batchId : snapshot . batchId ?? undefined ,
656
- completedWaitpoints : blockingWaitpoints . map ( ( b ) => ( {
657
- id : b . waitpoint . id ,
658
- index : b . batchIndex ?? undefined ,
659
- } ) ) ,
660
- checkpointId : snapshot . checkpointId ?? undefined ,
661
734
} ) ;
662
735
663
- this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED ` , {
736
+ this . $ . logger . debug ( `continueRunIfUnblocked: removed blocking waitpoints ` , {
664
737
runId,
665
- snapshot,
666
- newSnapshot,
738
+ blockingWaitpoints,
667
739
} ) ;
668
740
}
669
- } ) ;
670
-
671
- if ( blockingWaitpoints . length > 0 ) {
672
- //5. Remove the blocking waitpoints
673
- await this . $ . prisma . taskRunWaitpoint . deleteMany ( {
674
- where : {
675
- taskRunId : runId ,
676
- id : { in : blockingWaitpoints . map ( ( b ) => b . id ) } ,
677
- } ,
678
- } ) ;
679
-
680
- this . $ . logger . debug ( `continueRunIfUnblocked: removed blocking waitpoints` , {
681
- runId,
682
- } ) ;
683
- }
684
741
685
- return "unblocked" ;
742
+ return "unblocked" ;
743
+ } ) ; // end of runlock
686
744
}
687
745
688
746
public async createRunAssociatedWaitpoint (
0 commit comments