@@ -17,8 +17,8 @@ import { tmpdir } from "node:os";
17
17
import { join } from "node:path" ;
18
18
import shellEscape from "shell-escape" ;
19
19
20
- import { envToInt } from "@cocalc/backend/misc/env-to-number" ;
21
20
import getLogger from "@cocalc/backend/logger" ;
21
+ import { envToInt } from "@cocalc/backend/misc/env-to-number" ;
22
22
import { aggregate } from "@cocalc/util/aggregate" ;
23
23
import { callback_opts } from "@cocalc/util/async-utils" ;
24
24
import { to_json , trunc , uuid , walltime } from "@cocalc/util/misc" ;
@@ -39,6 +39,7 @@ const log = getLogger("execute-code");
39
39
40
40
const ASYNC_CACHE_MAX = envToInt ( "COCALC_PROJECT_ASYNC_EXEC_CACHE_MAX" , 100 ) ;
41
41
const ASYNC_CACHE_TTL_S = envToInt ( "COCALC_PROJECT_ASYNC_EXEC_TTL_S" , 60 * 60 ) ;
42
+ const MONITOR_INTERVAL_S = 10 ; // for async execution, every that many secs check up on the child-tree
42
43
43
44
const asyncCache = new LRU < string , ExecuteCodeOutputAsync > ( {
44
45
max : ASYNC_CACHE_MAX ,
@@ -231,6 +232,15 @@ function update_async(
231
232
}
232
233
}
233
234
235
+ function setupMonitor ( _job_id : string , _pid : number ) {
236
+ // periodically check up on the child process tree and record stats
237
+ // this also keeps the entry in the cache alive, when the ttl is less than the duration of the execution
238
+
239
+ const projInfo = get_ProjectInfoServer ( )
240
+
241
+ return setInterval ( ( ) => { } , 1000 * MONITOR_INTERVAL_S ) ;
242
+ }
243
+
234
244
function doSpawn (
235
245
opts ,
236
246
cb : ( err : string | undefined , result ?: ExecuteCodeOutputBlocking ) => void ,
@@ -260,11 +270,11 @@ function doSpawn(
260
270
} ,
261
271
} ;
262
272
263
- let r : ChildProcessWithoutNullStreams ;
273
+ let child : ChildProcessWithoutNullStreams ;
264
274
let ran_code = false ;
265
275
try {
266
- r = spawn ( opts . command , opts . args , spawnOptions ) ;
267
- if ( r . stdout == null || r . stderr == null ) {
276
+ child = spawn ( opts . command , opts . args , spawnOptions ) ;
277
+ if ( child . stdout == null || child . stderr == null ) {
268
278
// The docs/examples at https://nodejs.org/api/child_process.html#child_process_child_process_spawn_command_args_options
269
279
// suggest that r.stdout and r.stderr are always defined. However, this is
270
280
// definitely NOT the case in edge cases, as we have observed.
@@ -288,7 +298,7 @@ function doSpawn(
288
298
let stderr = "" ;
289
299
let exit_code : undefined | number = undefined ;
290
300
291
- r . stdout . on ( "data" , ( data ) => {
301
+ child . stdout . on ( "data" , ( data ) => {
292
302
data = data . toString ( ) ;
293
303
if ( opts . max_output != null ) {
294
304
if ( stdout . length < opts . max_output ) {
@@ -300,7 +310,7 @@ function doSpawn(
300
310
update_async ( opts . job_id , "stdout" , stdout ) ;
301
311
} ) ;
302
312
303
- r . stderr . on ( "data" , ( data ) => {
313
+ child . stderr . on ( "data" , ( data ) => {
304
314
data = data . toString ( ) ;
305
315
if ( opts . max_output != null ) {
306
316
if ( stderr . length < opts . max_output ) {
@@ -316,25 +326,25 @@ function doSpawn(
316
326
let stdout_is_done = false ;
317
327
let killed = false ;
318
328
319
- r . stderr . on ( "end" , ( ) => {
329
+ child . stderr . on ( "end" , ( ) => {
320
330
stderr_is_done = true ;
321
331
finish ( ) ;
322
332
} ) ;
323
333
324
- r . stdout . on ( "end" , ( ) => {
334
+ child . stdout . on ( "end" , ( ) => {
325
335
stdout_is_done = true ;
326
336
finish ( ) ;
327
337
} ) ;
328
338
329
- r . on ( "exit" , ( code ) => {
339
+ child . on ( "exit" , ( code ) => {
330
340
exit_code = code != null ? code : undefined ;
331
341
finish ( ) ;
332
342
} ) ;
333
343
334
344
// This can happen, e.g., "Error: spawn ENOMEM" if there is no memory. Without this handler,
335
345
// an unhandled exception gets raised, which is nasty.
336
346
// From docs: "Note that the exit-event may or may not fire after an error has occurred. "
337
- r . on ( "error" , ( err ) => {
347
+ child . on ( "error" , ( err ) => {
338
348
if ( exit_code == null ) {
339
349
exit_code = 1 ;
340
350
}
@@ -344,6 +354,9 @@ function doSpawn(
344
354
finish ( ) ;
345
355
} ) ;
346
356
357
+ let monitor =
358
+ opts . job_id && child . pid ? setupMonitor ( opts . job_id , child . pid ) : undefined ;
359
+
347
360
let callback_done = false ;
348
361
const finish = ( err ?) => {
349
362
if ( ! killed && ( ! stdout_is_done || ! stderr_is_done || exit_code == null ) ) {
@@ -362,6 +375,10 @@ function doSpawn(
362
375
clearTimeout ( timer ) ;
363
376
timer = undefined ;
364
377
}
378
+ if ( monitor != null ) {
379
+ clearInterval ( monitor ) ;
380
+ monitor = undefined ;
381
+ }
365
382
if ( opts . verbose && log . isEnabled ( "debug" ) ) {
366
383
log . debug (
367
384
"finished exec of" ,
@@ -418,11 +435,11 @@ function doSpawn(
418
435
}
419
436
} ;
420
437
421
- let timer : any = undefined ;
438
+ let timer : NodeJS . Timeout | undefined = undefined ;
422
439
if ( opts . timeout ) {
423
440
// setup a timer that will kill the command after a certain amount of time.
424
441
const f = ( ) => {
425
- if ( r . exitCode != null ) {
442
+ if ( child . exitCode != null ) {
426
443
// command already exited.
427
444
return ;
428
445
}
@@ -435,8 +452,8 @@ function doSpawn(
435
452
}
436
453
try {
437
454
killed = true ; // we set the kill flag in any case – i.e. process will no longer exist
438
- if ( r . pid != null ) {
439
- process . kill ( - r . pid , "SIGKILL" ) ; // this should kill process group
455
+ if ( child . pid != null ) {
456
+ process . kill ( - child . pid , "SIGKILL" ) ; // this should kill process group
440
457
}
441
458
} catch ( err ) {
442
459
// Exceptions can happen, which left uncaught messes up calling code big time.
0 commit comments