@@ -3,6 +3,7 @@ import { Server } from "socket.io";
33import {
44 CoordinatorToPlatformMessages ,
55 CoordinatorToProdWorkerMessages ,
6+ omit ,
67 PlatformToCoordinatorMessages ,
78 ProdWorkerSocketData ,
89 ProdWorkerToCoordinatorMessages ,
@@ -127,17 +128,28 @@ class TaskCoordinator {
127128 clientMessages : CoordinatorToPlatformMessages ,
128129 serverMessages : PlatformToCoordinatorMessages ,
129130 authToken : PLATFORM_SECRET ,
131+ logHandlerPayloads : false ,
130132 handlers : {
131133 RESUME_AFTER_DEPENDENCY : async ( message ) => {
134+ const log = logger . child ( {
135+ eventName : "RESUME_AFTER_DEPENDENCY" ,
136+ ...omit ( message , "completions" , "executions" ) ,
137+ completions : message . completions . map ( ( c ) => c . id ) ,
138+ executions : message . executions . length ,
139+ } ) ;
140+
141+ log . debug ( "Handling RESUME_AFTER_DEPENDENCY" ) ;
142+
132143 const taskSocket = await this . #getAttemptSocket( message . attemptFriendlyId ) ;
133144
134145 if ( ! taskSocket ) {
135- logger . log ( "Socket for attempt not found" , {
136- attemptFriendlyId : message . attemptFriendlyId ,
137- } ) ;
146+ log . debug ( "Socket for attempt not found" ) ;
138147 return ;
139148 }
140149
150+ log . addFields ( { socketData : taskSocket . data } ) ;
151+ log . log ( "Found task socket for RESUME_AFTER_DEPENDENCY" ) ;
152+
141153 await chaosMonkey . call ( ) ;
142154
143155 // In case the task resumed faster than we could checkpoint
@@ -146,12 +158,17 @@ class TaskCoordinator {
146158 taskSocket . emit ( "RESUME_AFTER_DEPENDENCY" , message ) ;
147159 } ,
148160 RESUME_AFTER_DEPENDENCY_WITH_ACK : async ( message ) => {
161+ const log = logger . child ( {
162+ eventName : "RESUME_AFTER_DEPENDENCY_WITH_ACK" ,
163+ ...omit ( message , "completions" , "executions" ) ,
164+ } ) ;
165+
166+ log . debug ( "Handling RESUME_AFTER_DEPENDENCY_WITH_ACK" ) ;
167+
149168 const taskSocket = await this . #getAttemptSocket( message . attemptFriendlyId ) ;
150169
151170 if ( ! taskSocket ) {
152- logger . log ( "Socket for attempt not found" , {
153- attemptFriendlyId : message . attemptFriendlyId ,
154- } ) ;
171+ log . debug ( "Socket for attempt not found" ) ;
155172 return {
156173 success : false ,
157174 error : {
@@ -161,11 +178,12 @@ class TaskCoordinator {
161178 } ;
162179 }
163180
181+ log . addFields ( { socketData : taskSocket . data } ) ;
182+ log . log ( "Found task socket for RESUME_AFTER_DEPENDENCY_WITH_ACK" ) ;
183+
164184 //if this is set, we want to kill the process because it will be resumed with the checkpoint from the queue
165185 if ( taskSocket . data . requiresCheckpointResumeWithMessage ) {
166- logger . log ( "RESUME_AFTER_DEPENDENCY_WITH_ACK: Checkpoint is set so going to nack" , {
167- socketData : taskSocket . data ,
168- } ) ;
186+ log . log ( "RESUME_AFTER_DEPENDENCY_WITH_ACK: Checkpoint is set so going to nack" ) ;
169187
170188 return {
171189 success : false ,
@@ -189,6 +207,13 @@ class TaskCoordinator {
189207 } ;
190208 } ,
191209 RESUME_AFTER_DURATION : async ( message ) => {
210+ const log = logger . child ( {
211+ eventName : "RESUME_AFTER_DURATION" ,
212+ ...message ,
213+ } ) ;
214+
215+ log . debug ( "Handling RESUME_AFTER_DURATION" ) ;
216+
192217 const taskSocket = await this . #getAttemptSocket( message . attemptFriendlyId ) ;
193218
194219 if ( ! taskSocket ) {
@@ -198,11 +223,19 @@ class TaskCoordinator {
198223 return ;
199224 }
200225
226+ log . addFields ( { socketData : taskSocket . data } ) ;
227+ log . log ( "Found task socket for RESUME_AFTER_DURATION" ) ;
228+
201229 await chaosMonkey . call ( ) ;
202230
203231 taskSocket . emit ( "RESUME_AFTER_DURATION" , message ) ;
204232 } ,
205233 REQUEST_ATTEMPT_CANCELLATION : async ( message ) => {
234+ const log = logger . child ( {
235+ eventName : "REQUEST_ATTEMPT_CANCELLATION" ,
236+ ...message ,
237+ } ) ;
238+
206239 const taskSocket = await this . #getAttemptSocket( message . attemptFriendlyId ) ;
207240
208241 if ( ! taskSocket ) {
@@ -212,9 +245,17 @@ class TaskCoordinator {
212245 return ;
213246 }
214247
248+ log . addFields ( { socketData : taskSocket . data } ) ;
249+ log . log ( "Found task socket for REQUEST_ATTEMPT_CANCELLATION" ) ;
250+
215251 taskSocket . emit ( "REQUEST_ATTEMPT_CANCELLATION" , message ) ;
216252 } ,
217253 REQUEST_RUN_CANCELLATION : async ( message ) => {
254+ const log = logger . child ( {
255+ eventName : "REQUEST_RUN_CANCELLATION" ,
256+ ...message ,
257+ } ) ;
258+
218259 const taskSocket = await this . #getRunSocket( message . runId ) ;
219260
220261 if ( ! taskSocket ) {
@@ -224,6 +265,9 @@ class TaskCoordinator {
224265 return ;
225266 }
226267
268+ log . addFields ( { socketData : taskSocket . data } ) ;
269+ log . log ( "Found task socket for REQUEST_RUN_CANCELLATION" ) ;
270+
227271 this . #cancelCheckpoint( message . runId ) ;
228272
229273 if ( message . delayInMs ) {
@@ -239,6 +283,11 @@ class TaskCoordinator {
239283 }
240284 } ,
241285 READY_FOR_RETRY : async ( message ) => {
286+ const log = logger . child ( {
287+ eventName : "READY_FOR_RETRY" ,
288+ ...message ,
289+ } ) ;
290+
242291 const taskSocket = await this . #getRunSocket( message . runId ) ;
243292
244293 if ( ! taskSocket ) {
@@ -248,11 +297,19 @@ class TaskCoordinator {
248297 return ;
249298 }
250299
300+ log . addFields ( { socketData : taskSocket . data } ) ;
301+ log . log ( "Found task socket for READY_FOR_RETRY" ) ;
302+
251303 await chaosMonkey . call ( ) ;
252304
253305 taskSocket . emit ( "READY_FOR_RETRY" , message ) ;
254306 } ,
255307 DYNAMIC_CONFIG : async ( message ) => {
308+ const log = logger . child ( {
309+ eventName : "DYNAMIC_CONFIG" ,
310+ ...message ,
311+ } ) ;
312+
256313 this . #delayThresholdInMs = message . checkpointThresholdInMs ;
257314
258315 // The first time we receive a dynamic config, the worker namespace will be created
0 commit comments