Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .env.base.example

This file was deleted.

5 changes: 0 additions & 5 deletions .env.profiles/daytona.env.example

This file was deleted.

6 changes: 0 additions & 6 deletions .env.profiles/minimax-cn.env.example

This file was deleted.

6 changes: 0 additions & 6 deletions .env.profiles/minimax-global.env.example

This file was deleted.

3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ dist/
.env
.env.local
.env.*.local
.env.base
.env.profiles/*.env
!.env.profiles/*.env.example

# IDE
.vscode/
Expand Down
2 changes: 1 addition & 1 deletion benchmark/terminalbench/open_agent_sdk_harbor/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def create_run_agent_commands(self, instruction: str) -> list[ExecInput]:
# - Keep --no-persist only when both transcript and trajectory export are off.
# - While the CLI canary may lag behind local code, keep best-effort sync
# from /root/.open-agent/sessions to /logs/agent/open-agent-transcript.
cli_flags = f"--model {model} --output-format json"
cli_flags = f"--model {model} --output-format json --cleanup-background never"
save_trajectory = os.environ.get("OAS_HARBOR_SAVE_TRAJECTORY") == "1"
save_transcript = os.environ.get("OAS_HARBOR_SAVE_TRANSCRIPT", "1") == "1"
if save_trajectory:
Expand Down
9 changes: 8 additions & 1 deletion docs/workflows/terminal-bench-harbor-runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ln -sf "$(pwd)/benchmark/terminalbench/open_agent_sdk_harbor/agent.py" \

## 2. Load Environment Variables

Use the repository `.env` as the source of truth:
Use the repository `.env` as the single source of truth:

```bash
set -a
Expand All @@ -33,6 +33,13 @@ Required for MiniMax Anthropic-compatible endpoint:

If these are empty, `command-0` fails quickly with invalid URL/provider errors.

Quick sanity check:

```bash
echo "ANTHROPIC_API_KEY length=${#ANTHROPIC_API_KEY}"
echo "ANTHROPIC_BASE_URL=$ANTHROPIC_BASE_URL"
```

## 3. Proxy Handling (Important)

Use proxy settings for host tooling only if needed, but run Harbor process with proxy vars removed to avoid container networking issues with local `127.0.0.1` proxies.
Expand Down
29 changes: 29 additions & 0 deletions packages/core/src/tools/bash-output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@
*/

import type { Tool, ToolContext, JSONSchema } from '../types/tools';
import { readFileSync } from 'fs';
import { backgroundProcesses } from './bash';

const MAX_CAPTURE_CHARS = 200_000;
const TRUNCATED_NOTICE = '\n[Output truncated to avoid excessive memory usage]';

function truncateOutput(value: string): { value: string; truncated: boolean } {
if (value.length <= MAX_CAPTURE_CHARS) return { value, truncated: false };
return { value: value.slice(0, MAX_CAPTURE_CHARS), truncated: true };
}

export interface BashOutputInput {
shellId: string;
}
Expand Down Expand Up @@ -57,6 +64,28 @@ export class BashOutputTool implements Tool<BashOutputInput, BashOutputOutput> {
// Check if process is still running
const running = process.exitCode === null;

// For detached background processes, refresh output from redirected log files.
if (process.stdoutPath) {
try {
const content = readFileSync(process.stdoutPath, 'utf8');
const next = truncateOutput(content);
process.stdout = next.value;
process.stdoutTruncated = next.truncated;
} catch {
// Ignore missing/temporary read failures.
}
}
if (process.stderrPath) {
try {
const content = readFileSync(process.stderrPath, 'utf8');
const next = truncateOutput(content);
process.stderr = next.value;
process.stderrTruncated = next.truncated;
} catch {
// Ignore missing/temporary read failures.
}
}

return {
stdout: process.stdout + (process.stdoutTruncated ? TRUNCATED_NOTICE : ''),
stderr: process.stderr + (process.stderrTruncated ? TRUNCATED_NOTICE : ''),
Expand Down
109 changes: 89 additions & 20 deletions packages/core/src/tools/bash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@
*/

import { spawn, ChildProcess } from 'child_process';
import { mkdirSync, readFileSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import type { Tool, ToolContext, JSONSchema } from '../types/tools';

export interface BackgroundProcess {
pid: number;
startTime: number;
stdout: string;
stderr: string;
stdoutPath?: string;
stderrPath?: string;
stdoutTruncated: boolean;
stderrTruncated: boolean;
exitCode: number | null;
process: ChildProcess;
detached: boolean;
}

export interface BashInput {
Expand Down Expand Up @@ -61,6 +67,7 @@ export const backgroundProcesses = new Map<string, BackgroundProcess>();
// Cap captured output to avoid OOM when commands print large streams.
const MAX_CAPTURE_CHARS = 200_000;
const TRUNCATED_NOTICE = '\n[Output truncated to avoid excessive memory usage]';
const BG_LOG_DIR = join(tmpdir(), 'open-agent-sdk-bg');

function appendCapped(
current: string,
Expand Down Expand Up @@ -107,11 +114,33 @@ export class BashTool implements Tool<BashInput, BashOutput> {
return new Promise((resolve) => {
const shell = process.platform === 'win32' ? 'cmd.exe' : '/bin/sh';
const shellFlag = process.platform === 'win32' ? '/c' : '-c';
let shellId: string | undefined;
let stdoutPath: string | undefined;
let stderrPath: string | undefined;
let detachedBackground = false;

const child = spawn(shell, [shellFlag, command], {
if (run_in_background) {
shellId = `shell_${++backgroundProcessId}`;
if (process.platform !== 'win32') {
detachedBackground = true;
mkdirSync(BG_LOG_DIR, { recursive: true });
stdoutPath = join(BG_LOG_DIR, `${shellId}.stdout.log`);
stderrPath = join(BG_LOG_DIR, `${shellId}.stderr.log`);
}
}

const normalizedCommand = run_in_background
? command.replace(/\s*&\s*$/, '').trim()
: command;
const commandToRun = run_in_background && stdoutPath && stderrPath
? `( ${normalizedCommand} ) >>"${stdoutPath}" 2>>"${stderrPath}"`
: normalizedCommand;

const child = spawn(shell, [shellFlag, commandToRun], {
cwd: context.cwd,
env: { ...process.env, ...context.env },
stdio: ['ignore', 'pipe', 'pipe'],
stdio: run_in_background && detachedBackground ? ['ignore', 'ignore', 'ignore'] : ['ignore', 'pipe', 'pipe'],
detached: run_in_background ? detachedBackground : false,
});

// Set up abort handler
Expand All @@ -128,35 +157,59 @@ export class BashTool implements Tool<BashInput, BashOutput> {

// Handle background execution
if (run_in_background) {
const shellId = `shell_${++backgroundProcessId}`;
const bgProcess: BackgroundProcess = {
pid: child.pid!,
startTime: Date.now(),
stdout: '',
stderr: '',
stdoutPath,
stderrPath,
stdoutTruncated: false,
stderrTruncated: false,
exitCode: null,
process: child,
detached: detachedBackground,
};
backgroundProcesses.set(shellId, bgProcess);

// Capture stdout/stderr
child.stdout?.on('data', (data) => {
const next = appendCapped(bgProcess.stdout, data.toString(), MAX_CAPTURE_CHARS);
bgProcess.stdout = next.value;
if (next.truncated) bgProcess.stdoutTruncated = true;
});
backgroundProcesses.set(shellId!, bgProcess);

if (!detachedBackground) {
// Capture stdout/stderr in-process when not detached.
child.stdout?.on('data', (data) => {
const next = appendCapped(bgProcess.stdout, data.toString(), MAX_CAPTURE_CHARS);
bgProcess.stdout = next.value;
if (next.truncated) bgProcess.stdoutTruncated = true;
});

child.stderr?.on('data', (data) => {
const next = appendCapped(bgProcess.stderr, data.toString(), MAX_CAPTURE_CHARS);
bgProcess.stderr = next.value;
if (next.truncated) bgProcess.stderrTruncated = true;
});
child.stderr?.on('data', (data) => {
const next = appendCapped(bgProcess.stderr, data.toString(), MAX_CAPTURE_CHARS);
bgProcess.stderr = next.value;
if (next.truncated) bgProcess.stderrTruncated = true;
});
}

// Set exit code when process exits (don't delete from map)
child.on('exit', (code) => {
bgProcess.exitCode = code ?? -1;
if (bgProcess.stdoutPath) {
try {
const content = readFileSync(bgProcess.stdoutPath, 'utf8');
const next = appendCapped('', content, MAX_CAPTURE_CHARS);
bgProcess.stdout = next.value;
bgProcess.stdoutTruncated = next.truncated;
} catch {
// Ignore read errors for best-effort output capture.
}
}
if (bgProcess.stderrPath) {
try {
const content = readFileSync(bgProcess.stderrPath, 'utf8');
const next = appendCapped('', content, MAX_CAPTURE_CHARS);
bgProcess.stderr = next.value;
bgProcess.stderrTruncated = next.truncated;
} catch {
// Ignore read errors for best-effort output capture.
}
}
});

// Prevent background child handles from keeping the process alive.
Expand All @@ -166,9 +219,9 @@ export class BashTool implements Tool<BashInput, BashOutput> {

// Don't wait for completion
resolve({
output: `Command running in background with ID: ${shellId}`,
output: `Command running in background with ID: ${shellId!}`,
exitCode: 0,
shellId,
shellId: shellId!,
});

return;
Expand Down Expand Up @@ -295,11 +348,27 @@ export async function cleanupBackgroundProcesses(
done();
});

bgProcess.process.kill('SIGTERM');
try {
if (bgProcess.detached && process.platform !== 'win32') {
process.kill(-bgProcess.pid, 'SIGTERM');
} else {
bgProcess.process.kill('SIGTERM');
}
} catch {
// Ignore errors if process already exited.
}

const forceKillTimer = setTimeout(() => {
if (bgProcess.exitCode === null) {
bgProcess.process.kill('SIGKILL');
try {
if (bgProcess.detached && process.platform !== 'win32') {
process.kill(-bgProcess.pid, 'SIGKILL');
} else {
bgProcess.process.kill('SIGKILL');
}
} catch {
// Ignore errors if process already exited.
}
}
done();
}, Math.max(1, forceKillAfterMs));
Expand Down
22 changes: 19 additions & 3 deletions packages/core/src/tools/kill-bash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,16 @@ export class KillBashTool implements Tool<KillBashInput, KillBashOutput> {
};
}

// Send SIGTERM
bgProcess.process.kill('SIGTERM');
// Send SIGTERM (target process group for detached background jobs)
try {
if (bgProcess.detached && process.platform !== 'win32') {
process.kill(-bgProcess.pid, 'SIGTERM');
} else {
bgProcess.process.kill('SIGTERM');
}
} catch {
// Ignore if already exited between checks.
}

// Wait up to 5 seconds for graceful exit, then SIGKILL
return new Promise((resolve) => {
Expand All @@ -74,7 +82,15 @@ export class KillBashTool implements Tool<KillBashInput, KillBashOutput> {
const forceKillTimeout = setTimeout(() => {
clearInterval(checkInterval);
if (bgProcess.exitCode === null) {
bgProcess.process.kill('SIGKILL');
try {
if (bgProcess.detached && process.platform !== 'win32') {
process.kill(-bgProcess.pid, 'SIGKILL');
} else {
bgProcess.process.kill('SIGKILL');
}
} catch {
// Ignore if already exited.
}
resolve({
success: true,
message: `Process ${shellId} force-killed with SIGKILL`,
Expand Down
15 changes: 15 additions & 0 deletions packages/core/tests/tools/bash-enhanced.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,19 @@ describe('BashTool - Enhanced Background Process Tracking', () => {
const processAfterCleanup = getBackgroundProcess(shellId);
expect(processAfterCleanup?.exitCode).not.toBeNull();
});

test('should handle commands that already include trailing & in background mode', async () => {
const result = await tool.handler(
{
command: 'sleep 1 &',
run_in_background: true,
},
context
);

const shellId = result.shellId!;
const process = getBackgroundProcess(shellId);
expect(process).toBeDefined();
expect(process?.exitCode).toBeNull();
});
});
42 changes: 0 additions & 42 deletions scripts/env/use-profile.sh

This file was deleted.