@@ -13,7 +13,6 @@ import {
1313 MachinePreset ,
1414 ProdTaskRunExecution ,
1515 ProdTaskRunExecutionPayload ,
16- QueueOptions ,
1716 TaskRunError ,
1817 TaskRunErrorCodes ,
1918 TaskRunExecution ,
@@ -29,13 +28,13 @@ import {
2928 BackgroundWorker ,
3029 BackgroundWorkerTask ,
3130 Prisma ,
32- TaskQueue ,
3331 TaskRunStatus ,
3432} from "@trigger.dev/database" ;
3533import { z } from "zod" ;
3634import { $replica , prisma } from "~/db.server" ;
3735import { env } from "~/env.server" ;
3836import { findEnvironmentById } from "~/models/runtimeEnvironment.server" ;
37+ import { findQueueInEnvironment , sanitizeQueueName } from "~/models/taskQueue.server" ;
3938import { generateJWTTokenForEnvironment } from "~/services/apiAuth.server" ;
4039import { logger } from "~/services/logger.server" ;
4140import { singleton } from "~/utils/singleton" ;
@@ -67,7 +66,6 @@ import {
6766import { tracer } from "../tracer.server" ;
6867import { getMaxDuration } from "../utils/maxDuration" ;
6968import { MessagePayload } from "./types" ;
70- import { findQueueInEnvironment , sanitizeQueueName } from "~/models/taskQueue.server" ;
7169
7270const WithTraceContext = z . object ( {
7371 traceparent : z . string ( ) . optional ( ) ,
@@ -323,6 +321,14 @@ export class SharedQueueConsumer {
323321 ROOT_CONTEXT
324322 ) ;
325323
324+ logger . debug ( "SharedQueueConsumer starting new trace" , {
325+ reasonStats : this . _reasonStats ,
326+ actionStats : this . _actionStats ,
327+ outcomeStats : this . _outcomeStats ,
328+ iterationCount : this . _iterationsCount ,
329+ consumerId : this . _id ,
330+ } ) ;
331+
326332 // Get the span trace context
327333 this . _currentSpanContext = trace . setSpan ( ROOT_CONTEXT , this . _currentSpan ) ;
328334
@@ -351,6 +357,10 @@ export class SharedQueueConsumer {
351357 try {
352358 const result = await this . #doWorkInternal( ) ;
353359
360+ if ( result . reason !== "no_message_dequeued" ) {
361+ logger . debug ( "SharedQueueConsumer doWorkInternal result" , { result } ) ;
362+ }
363+
354364 this . _reasonStats [ result . reason ] = ( this . _reasonStats [ result . reason ] ?? 0 ) + 1 ;
355365 this . _outcomeStats [ result . outcome ] = ( this . _outcomeStats [ result . outcome ] ?? 0 ) + 1 ;
356366
@@ -371,6 +381,9 @@ export class SharedQueueConsumer {
371381 if ( result . error ) {
372382 span . recordException ( result . error ) ;
373383 span . setStatus ( { code : SpanStatusCode . ERROR } ) ;
384+ this . _currentSpan ?. recordException ( result . error ) ;
385+ this . _currentSpan ?. setStatus ( { code : SpanStatusCode . ERROR } ) ;
386+ this . _endSpanInNextIteration = true ;
374387 }
375388
376389 if ( typeof result . interval === "number" ) {
@@ -755,7 +768,7 @@ export class SharedQueueConsumer {
755768 ) ;
756769
757770 if ( ! queue ) {
758- logger . debug ( "SharedQueueConsumer queue not found, so nacking message" , {
771+ logger . debug ( "SharedQueueConsumer queue not found, so acking message" , {
759772 queueMessage : message ,
760773 taskRunQueue : lockedTaskRun . queue ,
761774 runtimeEnvironmentId : lockedTaskRun . runtimeEnvironmentId ,
@@ -876,33 +889,49 @@ export class SharedQueueConsumer {
876889 machinePresetFromRun ( lockedTaskRun ) ??
877890 machinePresetFromConfig ( lockedTaskRun . lockedBy ?. machineConfig ?? { } ) ;
878891
879- await this . #startActiveSpan( "scheduleAttemptOnProvider" , async ( span ) => {
880- await this . _providerSender . send ( "BACKGROUND_WORKER_MESSAGE" , {
881- backgroundWorkerId : worker . friendlyId ,
882- data : {
883- type : "SCHEDULE_ATTEMPT" ,
884- image : imageReference ,
885- version : deployment . version ,
886- machine,
887- nextAttemptNumber,
888- // identifiers
889- id : "placeholder" , // TODO: Remove this completely in a future release
890- envId : lockedTaskRun . runtimeEnvironment . id ,
891- envType : lockedTaskRun . runtimeEnvironment . type ,
892- orgId : lockedTaskRun . runtimeEnvironment . organizationId ,
893- projectId : lockedTaskRun . runtimeEnvironment . projectId ,
894- runId : lockedTaskRun . id ,
895- } ,
892+ return await this . #startActiveSpan( "scheduleAttemptOnProvider" , async ( span ) => {
893+ span . setAttributes ( {
894+ run_id : lockedTaskRun . id ,
896895 } ) ;
897- } ) ;
898896
899- return {
900- action : "noop" ,
901- reason : "scheduled_attempt" ,
902- attrs : {
903- next_attempt_number : nextAttemptNumber ,
904- } ,
905- } ;
897+ if ( await this . _providerSender . validateCanSendMessage ( ) ) {
898+ await this . _providerSender . send ( "BACKGROUND_WORKER_MESSAGE" , {
899+ backgroundWorkerId : worker . friendlyId ,
900+ data : {
901+ type : "SCHEDULE_ATTEMPT" ,
902+ image : imageReference ,
903+ version : deployment . version ,
904+ machine,
905+ nextAttemptNumber,
906+ // identifiers
907+ id : "placeholder" , // TODO: Remove this completely in a future release
908+ envId : lockedTaskRun . runtimeEnvironment . id ,
909+ envType : lockedTaskRun . runtimeEnvironment . type ,
910+ orgId : lockedTaskRun . runtimeEnvironment . organizationId ,
911+ projectId : lockedTaskRun . runtimeEnvironment . projectId ,
912+ runId : lockedTaskRun . id ,
913+ } ,
914+ } ) ;
915+
916+ return {
917+ action : "noop" ,
918+ reason : "scheduled_attempt" ,
919+ attrs : {
920+ next_attempt_number : nextAttemptNumber ,
921+ } ,
922+ } ;
923+ } else {
924+ return {
925+ action : "nack_and_do_more_work" ,
926+ reason : "provider_not_connected" ,
927+ attrs : {
928+ run_id : lockedTaskRun . id ,
929+ } ,
930+ interval : this . _options . nextTickInterval ,
931+ retryInMs : 5_000 ,
932+ } ;
933+ }
934+ } ) ;
906935 }
907936 } catch ( e ) {
908937 // We now need to unlock the task run and delete the task run attempt
@@ -929,6 +958,8 @@ export class SharedQueueConsumer {
929958 action : "nack_and_do_more_work" ,
930959 reason : "failed_to_schedule_attempt" ,
931960 error : e instanceof Error ? e : String ( e ) ,
961+ interval : this . _options . nextTickInterval ,
962+ retryInMs : 5_000 ,
932963 } ;
933964 }
934965 }
0 commit comments