More tweaks

cte · cte · commit 326b647baf62 · 2025-04-04T00:06:13.000-07:00
diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts
@@ -356,7 +356,7 @@ const runUnitTest = async ({ task }: { task: Task }) => {
 				break
 			}
 		} catch (error) {
-			console.log("[cli#run] execa error =", error)
+			console.log("[cli#runUnitTest] execa error =", error)
 			passed = false
 			break
 		}
diff --git a/evals/apps/web/src/app/layout.tsx b/evals/apps/web/src/app/layout.tsx
@@ -11,7 +11,7 @@ const fontSans = Geist({ variable: "--font-sans", subsets: ["latin"] })
 const fontMono = Geist_Mono({ variable: "--font-mono", subsets: ["latin"] })
 
 export const metadata: Metadata = {
-	title: "Roo Code Benchmarks",
+	title: "Roo Code Evals",
 }
 
 export default function RootLayout({
diff --git a/evals/packages/types/src/roo-code-defaults.ts b/evals/packages/types/src/roo-code-defaults.ts
@@ -4,6 +4,22 @@ export const rooCodeDefaults: RooCodeSettings = {
 	apiProvider: "openrouter",
 	openRouterModelId: "google/gemini-2.0-flash-001", // "anthropic/claude-3.7-sonnet",
 
+	// apiProvider: "openai",
+	// openAiBaseUrl: "http://hrudolph.duckdns.org:4269/api/v1",
+	// openAiApiKey: process.env.OPENAI_API_KEY,
+	// openAiModelId: "models/gemini-2.5-pro-exp-03-25",
+	// openAiCustomModelInfo: {
+	// 	maxTokens: 65536,
+	// 	contextWindow: 1000000,
+	// 	supportsImages: true,
+	// 	supportsPromptCache: false,
+	// 	inputPrice: 0,
+	// 	outputPrice: 0,
+	// 	description:
+	// 		"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.",
+	// 	thinking: false,
+	// },
+
 	pinnedApiConfigs: {},
 	lastShownAnnouncementId: "mar-20-2025-3-10",
 
@@ -47,7 +63,6 @@ export const rooCodeDefaults: RooCodeSettings = {
 	diffEnabled: true,
 	fuzzyMatchThreshold: 1.0,
 	experiments: {
-		multi_search_and_replace: false,
 		search_and_replace: true,
 		insert_content: false,
 		powerSteering: false,

Original file line number	Diff line number	Diff line change
`@@ -356,7 +356,7 @@ const runUnitTest = async ({ task }: { task: Task }) => {`
`356`	`356`	`break`
`357`	`357`	`}`
`358`	`358`	`} catch (error) {`
`359`		`- console.log("[cli#run] execa error =", error)`
	`359`	`+ console.log("[cli#runUnitTest] execa error =", error)`
`360`	`360`	`passed = false`
`361`	`361`	`break`
`362`	`362`	`}`
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ const fontSans = Geist({ variable: "--font-sans", subsets: ["latin"] })`
`11`	`11`	`const fontMono = Geist_Mono({ variable: "--font-mono", subsets: ["latin"] })`
`12`	`12`
`13`	`13`	`export const metadata: Metadata = {`
`14`		`- title: "Roo Code Benchmarks",`
	`14`	`+ title: "Roo Code Evals",`
`15`	`15`	`}`
`16`	`16`
`17`	`17`	`export default function RootLayout({`