Skip to content

Commit 6362fe6

Browse files
committed
fix: hidden test exposure & docs: bump version to v1.6.0 with incompatibility note
1 parent f735dc4 commit 6362fe6

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
A lightweight evaluation harness for coding agents that runs high-signal, compact but challenging problems in isolated Docker containers. Evaluate agents across 26 tasks in 6 languages with weighted scoring, integrity verification, and detailed reporting.
1111

12+
> **Note:** All evaluation results obtained before version `v1.6.0` cannot be compared to results obtained on or after `v1.6.0` due to a critical fix in how hidden tests are handled.
13+
1214
<!-- Add demo GIF/screenshot here -->
1315

1416
## Table of Contents

internal/runner/runner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ func (r *Runner) ensureWorkspace(t *task.Task, dir string) error {
412412
return nil
413413
}
414414

415-
return r.copyTaskFiles(t, dir, t.AllFiles())
415+
return r.copyTaskFiles(t, dir, t.VisibleFiles())
416416
}
417417

418418
// captureWorkspace reads the workspace files into the session.
@@ -500,7 +500,7 @@ func (r *Runner) InitWorkspaceForTask(t *task.Task, outputDir string) error {
500500
return fmt.Errorf("directory is not empty: %s", absDir)
501501
}
502502

503-
return r.copyTaskFiles(t, absDir, t.AllFiles())
503+
return r.copyTaskFiles(t, absDir, t.VisibleFiles())
504504
}
505505

506506
// ListTasks returns all available tasks.

internal/task/task.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ type Validation struct {
6969
Args []string `json:"args" toml:"args"`
7070
}
7171

72+
// VisibleFiles returns the files that should be visible to the agent initially.
73+
func (t *Task) VisibleFiles() []string {
74+
files := make([]string, 0, len(t.Files.Stub)+len(t.Files.Test)+len(t.Files.Support))
75+
files = append(files, t.Files.Stub...)
76+
files = append(files, t.Files.Test...)
77+
files = append(files, t.Files.Support...)
78+
return files
79+
}
80+
7281
// AllFiles returns all files associated with this task, including hidden tests.
7382
func (t *Task) AllFiles() []string {
7483
files := make([]string, 0, len(t.Files.Stub)+len(t.Files.Test)+len(t.Files.HiddenTest)+len(t.Files.Support))

0 commit comments

Comments
 (0)