Skip to content

Commit c46da5b

Browse files
committed
SWE-bench
1 parent f813ed8 commit c46da5b

File tree

11 files changed

+713
-3
lines changed

11 files changed

+713
-3
lines changed

evals/apps/swe-bench/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.venv
2+
3+
datasets
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import { config } from "@evals/eslint-config/base"
2+
3+
/** @type {import("eslint").Linter.Config} */
4+
export default [...config]

evals/apps/swe-bench/package.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "@evals/swe-bench",
3+
"private": true,
4+
"type": "module",
5+
"scripts": {
6+
"dev": "tsx src/index.ts",
7+
"dataset": "uv run scripts/dataset.py"
8+
},
9+
"dependencies": {},
10+
"devDependencies": {
11+
"@evals/eslint-config": "workspace:^",
12+
"@evals/typescript-config": "workspace:^"
13+
}
14+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[project]
2+
name = "swe-bench"
3+
version = "0.1.0"
4+
requires-python = ">=3.13"
5+
dependencies = [
6+
"datasets>=3.5.1",
7+
]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from datasets import Dataset, load_dataset
2+
from typing import Any, Dict, cast
3+
import json
4+
import os
5+
6+
7+
def main():
8+
swebench = cast(Dataset, load_dataset("princeton-nlp/SWE-bench", split="test"))
9+
10+
# print(f"Count: {len(swebench)}")
11+
# print(f"Features: {swebench.features}")
12+
# print(f"Repos: {swebench.unique('repo')}")
13+
14+
datasets_dir = "datasets"
15+
os.makedirs(datasets_dir, exist_ok=True)
16+
data_file = os.path.join(datasets_dir, "swebench_test.jsonl")
17+
18+
if os.path.exists(data_file):
19+
print(f"dataset already exists in {data_file}")
20+
return
21+
22+
with open(data_file, "w") as f:
23+
for i, row in enumerate(swebench):
24+
example = dict(cast(Dict[str, Any], row))
25+
f.write(json.dumps(example) + "\n")
26+
if i % 100 == 0:
27+
print(f"Processed {i} rows...")
28+
29+
print(f"dataset successfully written to {data_file}")
30+
31+
32+
if __name__ == "__main__":
33+
main()

evals/apps/swe-bench/src/index.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import fs from "fs"
2+
import path from "path"
3+
import readline from "readline"
4+
import { fileURLToPath } from "url"
5+
6+
const __filename = fileURLToPath(import.meta.url)
7+
const __dirname = path.dirname(__filename)
8+
9+
async function main() {
10+
const datasetPath = path.join(__dirname, "..", "datasets", "swebench_test.jsonl")
11+
12+
try {
13+
const input = fs.createReadStream(datasetPath, { encoding: "utf-8" })
14+
const rl = readline.createInterface({ input, crlfDelay: Infinity })
15+
16+
for await (const line of rl) {
17+
console.log(`Line: ${line}`)
18+
// console.log(JSON.parse(line))
19+
// break
20+
}
21+
22+
rl.close()
23+
input.close()
24+
} catch (error) {
25+
console.error("Error reading file:", error)
26+
}
27+
}
28+
29+
main().catch((error) => {
30+
console.error("Unhandled error:", error)
31+
})

evals/apps/swe-bench/tsconfig.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"extends": "@evals/typescript-config/base.json",
3+
"include": ["src"],
4+
"exclude": ["node_modules"]
5+
}

evals/apps/swe-bench/uv.lock

Lines changed: 606 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

evals/packages/types/src/roo-code.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,6 @@ export const providerSettingsSchema = z.object({
399399
// OpenAI Native
400400
openAiNativeApiKey: z.string().optional(),
401401
openAiNativeBaseUrl: z.string().optional(),
402-
// XAI
403-
xaiApiKey: z.string().optional(),
404402
// Mistral
405403
mistralApiKey: z.string().optional(),
406404
mistralCodestralUrl: z.string().optional(),

0 commit comments

Comments
 (0)