Skip to content

Commit ce3e4e8

Browse files
authored
feat(evals): add UI and backend support for importing and injecting f… (#3606)
1 parent 1e5783d commit ce3e4e8

File tree

4 files changed

+52
-9
lines changed

4 files changed

+52
-9
lines changed

evals/apps/cli/src/index.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,15 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
178178
const workspacePath = path.resolve(exercisesPath, language, exercise)
179179
const taskSocketPath = path.resolve(dirname, `${dirname}/task-${task.id}.sock`)
180180

181+
// Inject foot gun system prompt if present
182+
if (process.env.FOOTGUN_SYSTEM_PROMPT) {
183+
const rooDir = path.join(workspacePath, ".roo")
184+
if (!fs.existsSync(rooDir)) {
185+
fs.mkdirSync(rooDir, { recursive: true })
186+
}
187+
fs.writeFileSync(path.join(rooDir, "system-prompt-code"), process.env.FOOTGUN_SYSTEM_PROMPT)
188+
}
189+
181190
// If debugging:
182191
// Use --wait --log trace or --verbose.
183192
// Don't await execa and store result as subprocess.

evals/apps/web/src/app/runs/new/new-run.tsx

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { zodResolver } from "@hookform/resolvers/zod"
88
import fuzzysort from "fuzzysort"
99
import { toast } from "sonner"
1010
import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "lucide-react"
11+
import { Dialog, DialogContent, DialogTitle, DialogFooter } from "@/components/ui/dialog"
1112

1213
import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"
1314

@@ -83,6 +84,10 @@ export function NewRun() {
8384

8485
const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])
8586

87+
const [systemPromptDialogOpen, setSystemPromptDialogOpen] = useState(false)
88+
const [systemPrompt, setSystemPrompt] = useState("")
89+
const systemPromptRef = useRef<HTMLTextAreaElement>(null)
90+
8691
const onSubmit = useCallback(
8792
async (values: FormValues) => {
8893
try {
@@ -97,13 +102,13 @@ export function NewRun() {
97102
values.settings = { ...(values.settings || {}), openRouterModelId }
98103
}
99104

100-
const { id } = await createRun(values)
105+
const { id } = await createRun({ ...values, systemPrompt })
101106
router.push(`/runs/${id}`)
102107
} catch (e) {
103108
toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
104109
}
105110
},
106-
[mode, model, models.data, router],
111+
[mode, model, models.data, router, systemPrompt],
107112
)
108113

109114
const onFilterModels = useCallback(
@@ -313,6 +318,10 @@ export function NewRun() {
313318
)}
314319
<FormMessage />
315320
</FormItem>
321+
322+
<Button type="button" variant="secondary" onClick={() => setSystemPromptDialogOpen(true)}>
323+
Import Foot Gun System Prompt
324+
</Button>
316325
</div>
317326

318327
<FormField
@@ -394,6 +403,21 @@ export function NewRun() {
394403
onClick={() => router.push("/")}>
395404
<X className="size-6" />
396405
</Button>
406+
<Dialog open={systemPromptDialogOpen} onOpenChange={setSystemPromptDialogOpen}>
407+
<DialogContent>
408+
<DialogTitle>Import Foot Gun System Prompt</DialogTitle>
409+
<textarea
410+
ref={systemPromptRef}
411+
value={systemPrompt}
412+
onChange={(e) => setSystemPrompt(e.target.value)}
413+
placeholder="Paste or type your system prompt here..."
414+
className="w-full min-h-[120px] border rounded p-2"
415+
/>
416+
<DialogFooter>
417+
<Button onClick={() => setSystemPromptDialogOpen(false)}>Done</Button>
418+
</DialogFooter>
419+
</DialogContent>
420+
</Dialog>
397421
</>
398422
)
399423
}

evals/apps/web/src/lib/schemas.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export const createRunSchema = z
1818
exercises: z.array(z.string()).optional(),
1919
settings: rooCodeSettingsSchema.optional(),
2020
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
21+
systemPrompt: z.string().optional(),
2122
})
2223
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
2324
message: "Exercises are required when running a partial suite.",

evals/apps/web/src/lib/server/runs.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import * as db from "@evals/db"
1414
import { CreateRun } from "@/lib/schemas"
1515
import { getExercisesForLanguage } from "./exercises"
1616

17-
export async function createRun({ suite, exercises = [], ...values }: CreateRun) {
17+
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
1818
const run = await db.createRun({
1919
...values,
2020
socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
@@ -45,13 +45,22 @@ export async function createRun({ suite, exercises = [], ...values }: CreateRun)
4545
try {
4646
const logFile = fs.openSync(`/tmp/roo-code-evals-${run.id}.log`, "a")
4747

48-
const process = spawn("pnpm", ["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()], {
49-
detached: true,
50-
stdio: ["ignore", logFile, logFile],
51-
})
48+
const env: NodeJS.ProcessEnv = systemPrompt
49+
? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
50+
: process.env
5251

53-
process.unref()
54-
await db.updateRun(run.id, { pid: process.pid })
52+
const childProcess = spawn(
53+
"pnpm",
54+
["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()],
55+
{
56+
detached: true,
57+
stdio: ["ignore", logFile, logFile],
58+
env,
59+
},
60+
)
61+
62+
childProcess.unref()
63+
await db.updateRun(run.id, { pid: childProcess.pid })
5564
} catch (error) {
5665
console.error(error)
5766
}

0 commit comments

Comments
 (0)