Skip to content

Commit 6105efb

Browse files
committed
eval: add optional setup script support for eval sessions
Add a 'setup' property to EvalCriteria that allows defining a sh script to run inside the eval container before cagent exec. The script is written to a temp file, bind-mounted into the container, and executed as part of the entrypoint chain. This is useful for preparing the container environment (e.g., installing packages, initializing repos) before the agent runs. Assisted-By: cagent
1 parent 001e339 commit 6105efb

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

pkg/evaluation/eval.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ func (r *Runner) runSingleEval(ctx context.Context, evalSess *InputSession) (Res
309309
return result, fmt.Errorf("building eval image: %w", err)
310310
}
311311

312-
events, err := r.runCagentInContainer(ctx, imageID, getUserMessages(evalSess.Session))
312+
events, err := r.runCagentInContainer(ctx, imageID, getUserMessages(evalSess.Session), evals.Setup)
313313
if err != nil {
314314
return result, fmt.Errorf("running cagent in container: %w", err)
315315
}
@@ -346,7 +346,7 @@ func (r *Runner) runSingleEval(ctx context.Context, evalSess *InputSession) (Res
346346
return result, nil
347347
}
348348

349-
func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, questions []string) ([]map[string]any, error) {
349+
func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, questions []string, setup string) ([]map[string]any, error) {
350350
agentDir := r.agentSource.ParentDir()
351351
agentFile := filepath.Base(r.agentSource.Name())
352352
containerName := fmt.Sprintf("cagent-eval-%d", uuid.New().ID())
@@ -396,7 +396,31 @@ func (r *Runner) runCagentInContainer(ctx context.Context, imageID string, quest
396396
}
397397
}
398398

399-
args = append(args, imageID, "/configs/"+agentFile)
399+
// When a setup script is provided, mount it into the container and
400+
// override the entrypoint to run it before cagent exec.
401+
// The default entrypoint is: /run.sh /cagent exec --yolo --json
402+
// /run.sh starts dockerd then exec's "$@".
403+
if setup != "" {
404+
setupFile := filepath.Join(os.TempDir(), fmt.Sprintf("cagent-eval-setup-%d.sh", uuid.New().ID()))
405+
if err := os.WriteFile(setupFile, []byte(setup), 0o600); err != nil {
406+
return nil, fmt.Errorf("writing setup script: %w", err)
407+
}
408+
defer os.Remove(setupFile)
409+
410+
args = append(args,
411+
"-v", setupFile+":/setup.sh:ro",
412+
"--entrypoint", "/run.sh",
413+
)
414+
}
415+
416+
args = append(args, imageID)
417+
418+
if setup != "" {
419+
// Run setup script, then cagent exec with the original arguments.
420+
args = append(args, "sh", "-c", "sh /setup.sh && exec /cagent exec --yolo --json \"$@\"", "--", "/configs/"+agentFile)
421+
} else {
422+
args = append(args, "/configs/"+agentFile)
423+
}
400424
args = append(args, questions...)
401425

402426
cmd := exec.CommandContext(ctx, "docker", args...)

pkg/session/session.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ type EvalCriteria struct {
207207
Relevance []string `json:"relevance"` // Statements that should be true about the response
208208
WorkingDir string `json:"working_dir,omitempty"` // Subdirectory under evals/working_dirs/
209209
Size string `json:"size,omitempty"` // Expected response size: S, M, L, XL
210+
Setup string `json:"setup,omitempty"` // Optional sh script to run in the container before cagent exec
210211
}
211212

212213
// Session helper methods

0 commit comments

Comments
 (0)