-
Notifications
You must be signed in to change notification settings - Fork 34
Closed
Description
Below is an adaptation of the code I have ended up using with execStreaming. It addresses a few issues that would be great for sandbox sdk to improve
- you have to keep bumping the container activity timeout otherwise cloudflare shuts down the container while your process is still running
- the execStream response just “hangs” in a bunch of cases. For example if the container crashes or if bun closes the connection because it’s idle - there’s no error on the execStream. So it’s not safe to consume it normally, instead we regularly check on the health of the sandbox and we consume the stream by calling next with a timeout race so we can detect and handle failure
// Run the exec command in streaming mode
const commandExec = input.command;
const resultStream = await execStreamOnSandbox(sandbox, sandboxSession.id, commandExec, {
timeout: 30 * 60 * 1000, // 30 minutes total timeout - that should be enough
});
let stdout = "";
let stderr = "";
const stream = resultStream[Symbol.asyncIterator]();
// we have to keep an eye on the sandbox, sometimes it crashes and we don't get an error from the stream
let sandboxHealthy = true;
const interval = setInterval(async () => {
try {
const state = await sandbox.getState();
if (state.status !== "healthy") {
sandboxHealthy = false;
}
await sandbox.renewActivityTimeout();
} catch (err) {
logger.error("Error renewing activity timeout", err);
}
}, 5000);
// try finally block to close the interval
try {
while (true) {
if (!sandboxHealthy) {
logger.warn("Sandbox is not healthy, exiting");
return {
success: false,
message: "Sandbox crashed after completing this work",
stdout,
stderr,
exitCode: 1,
};
}
// We allow 260 seconds for the next stream event, if we don't get one, we timeout
// this is because the Bun Server idle timeout is 255 seconds and if we don't get
// an event we probably will never get one but we don't get an error either from
// sandbox sdk when bun closes the connection
const abortController = new AbortController();
const getNextStreamEventTimeout = setTimeoutPromise(260_000, {
signal: abortController.signal,
}).then(() => "TIMEOUT" as const);
const result = await Promise.race([stream.next(), getNextStreamEventTimeout]);
// Clean up the timeout regardless of which promise won the race
abortController.abort();
if (result === "TIMEOUT") {
return {
message: "The connection to the sandbox timed out",
success: false,
stdout,
stderr,
exitCode: 1,
};
}
if (result.done) {
logger.info("Exec readable exhausted without complete event", {
input,
stdout,
stderr,
});
// should not get here
return {
success: false,
message: "Result stream terminated before process completion signal was received",
stdout,
stderr,
exitCode: 1,
};
}
const event = result.value;
switch (event.type) {
case "stdout":
stdout += event.data;
logger.info(`Exec stdout: ${event.data}`);
break;
case "stderr":
stderr += event.data;
logger.info(`Exec stderr: ${event.data}`);
break;
case "error":
stderr += event.data;
logger.info(`Exec error: ${event.data}`);
logger.error(`Error running \`${commandExec}\` in sandbox`, event);
return {
message: "Execution errors occurred",
success: false,
stdout,
stderr,
exitCode: 1,
};
case "complete":
logger.log(`Tests ${event.exitCode === 0 ? "passed" : "failed"}`);
return {
message:
event.exitCode === 0
? "Execution completed successfully"
: "Execution completed with errors",
success: event.exitCode === 0,
stdout,
stderr,
exitCode: event.exitCode,
};
}
}
} finally {
clearInterval(interval);
}
};
Metadata
Metadata
Assignees
Labels
No labels