Skip to content

Commit 19aa396

Browse files
committed
api/v2/exec: gather process info – WIP
1 parent c1a9ae2 commit 19aa396

File tree

1 file changed

+31
-14
lines changed

1 file changed

+31
-14
lines changed

src/packages/backend/execute-code.ts

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import { tmpdir } from "node:os";
1717
import { join } from "node:path";
1818
import shellEscape from "shell-escape";
1919

20-
import { envToInt } from "@cocalc/backend/misc/env-to-number";
2120
import getLogger from "@cocalc/backend/logger";
21+
import { envToInt } from "@cocalc/backend/misc/env-to-number";
2222
import { aggregate } from "@cocalc/util/aggregate";
2323
import { callback_opts } from "@cocalc/util/async-utils";
2424
import { to_json, trunc, uuid, walltime } from "@cocalc/util/misc";
@@ -39,6 +39,7 @@ const log = getLogger("execute-code");
3939

4040
const ASYNC_CACHE_MAX = envToInt("COCALC_PROJECT_ASYNC_EXEC_CACHE_MAX", 100);
4141
const ASYNC_CACHE_TTL_S = envToInt("COCALC_PROJECT_ASYNC_EXEC_TTL_S", 60 * 60);
42+
const MONITOR_INTERVAL_S = 10; // for async execution, every that many secs check up on the child-tree
4243

4344
const asyncCache = new LRU<string, ExecuteCodeOutputAsync>({
4445
max: ASYNC_CACHE_MAX,
@@ -231,6 +232,15 @@ function update_async(
231232
}
232233
}
233234

235+
function setupMonitor(_job_id: string, _pid: number) {
236+
// periodically check up on the child process tree and record stats
237+
// this also keeps the entry in the cache alive, when the ttl is less than the duration of the execution
238+
239+
const projInfo = get_ProjectInfoServer()
240+
241+
return setInterval(() => {}, 1000 * MONITOR_INTERVAL_S);
242+
}
243+
234244
function doSpawn(
235245
opts,
236246
cb: (err: string | undefined, result?: ExecuteCodeOutputBlocking) => void,
@@ -260,11 +270,11 @@ function doSpawn(
260270
},
261271
};
262272

263-
let r: ChildProcessWithoutNullStreams;
273+
let child: ChildProcessWithoutNullStreams;
264274
let ran_code = false;
265275
try {
266-
r = spawn(opts.command, opts.args, spawnOptions);
267-
if (r.stdout == null || r.stderr == null) {
276+
child = spawn(opts.command, opts.args, spawnOptions);
277+
if (child.stdout == null || child.stderr == null) {
268278
// The docs/examples at https://nodejs.org/api/child_process.html#child_process_child_process_spawn_command_args_options
269279
// suggest that r.stdout and r.stderr are always defined. However, this is
270280
// definitely NOT the case in edge cases, as we have observed.
@@ -288,7 +298,7 @@ function doSpawn(
288298
let stderr = "";
289299
let exit_code: undefined | number = undefined;
290300

291-
r.stdout.on("data", (data) => {
301+
child.stdout.on("data", (data) => {
292302
data = data.toString();
293303
if (opts.max_output != null) {
294304
if (stdout.length < opts.max_output) {
@@ -300,7 +310,7 @@ function doSpawn(
300310
update_async(opts.job_id, "stdout", stdout);
301311
});
302312

303-
r.stderr.on("data", (data) => {
313+
child.stderr.on("data", (data) => {
304314
data = data.toString();
305315
if (opts.max_output != null) {
306316
if (stderr.length < opts.max_output) {
@@ -316,25 +326,25 @@ function doSpawn(
316326
let stdout_is_done = false;
317327
let killed = false;
318328

319-
r.stderr.on("end", () => {
329+
child.stderr.on("end", () => {
320330
stderr_is_done = true;
321331
finish();
322332
});
323333

324-
r.stdout.on("end", () => {
334+
child.stdout.on("end", () => {
325335
stdout_is_done = true;
326336
finish();
327337
});
328338

329-
r.on("exit", (code) => {
339+
child.on("exit", (code) => {
330340
exit_code = code != null ? code : undefined;
331341
finish();
332342
});
333343

334344
// This can happen, e.g., "Error: spawn ENOMEM" if there is no memory. Without this handler,
335345
// an unhandled exception gets raised, which is nasty.
336346
// From docs: "Note that the exit-event may or may not fire after an error has occurred. "
337-
r.on("error", (err) => {
347+
child.on("error", (err) => {
338348
if (exit_code == null) {
339349
exit_code = 1;
340350
}
@@ -344,6 +354,9 @@ function doSpawn(
344354
finish();
345355
});
346356

357+
let monitor =
358+
opts.job_id && child.pid ? setupMonitor(opts.job_id, child.pid) : undefined;
359+
347360
let callback_done = false;
348361
const finish = (err?) => {
349362
if (!killed && (!stdout_is_done || !stderr_is_done || exit_code == null)) {
@@ -362,6 +375,10 @@ function doSpawn(
362375
clearTimeout(timer);
363376
timer = undefined;
364377
}
378+
if (monitor != null) {
379+
clearInterval(monitor);
380+
monitor = undefined;
381+
}
365382
if (opts.verbose && log.isEnabled("debug")) {
366383
log.debug(
367384
"finished exec of",
@@ -418,11 +435,11 @@ function doSpawn(
418435
}
419436
};
420437

421-
let timer: any = undefined;
438+
let timer: NodeJS.Timeout | undefined = undefined;
422439
if (opts.timeout) {
423440
// setup a timer that will kill the command after a certain amount of time.
424441
const f = () => {
425-
if (r.exitCode != null) {
442+
if (child.exitCode != null) {
426443
// command already exited.
427444
return;
428445
}
@@ -435,8 +452,8 @@ function doSpawn(
435452
}
436453
try {
437454
killed = true; // we set the kill flag in any case – i.e. process will no longer exist
438-
if (r.pid != null) {
439-
process.kill(-r.pid, "SIGKILL"); // this should kill process group
455+
if (child.pid != null) {
456+
process.kill(-child.pid, "SIGKILL"); // this should kill process group
440457
}
441458
} catch (err) {
442459
// Exceptions can happen, which left uncaught messes up calling code big time.

0 commit comments

Comments
 (0)