Skip to content

Commit 371dabd

Browse files
chore: add swe bench example fetcher (#550)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed Co-authored-by: jemeza-codegen <[email protected]>
1 parent b318ee8 commit 371dabd

File tree

1 file changed

+67
-0
lines changed
  • src/codegen/extensions/langchain

1 file changed

+67
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Utilities for working with language models and datasets."""
2+
3+
from dataclasses import dataclass
4+
from typing import Optional
5+
6+
import requests
7+
8+
9+
@dataclass
10+
class SweBenchExample:
11+
"""A single example from the SWE-bench dataset."""
12+
13+
repo: str
14+
instance_id: str
15+
base_commit: str
16+
patch: str
17+
test_patch: str
18+
problem_statement: str
19+
hints_text: Optional[str]
20+
created_at: str
21+
version: str
22+
fail_to_pass: str
23+
pass_to_pass: Optional[str]
24+
environment_setup_commit: Optional[str]
25+
26+
27+
def get_swe_bench_examples() -> list[SweBenchExample]:
28+
"""Fetch examples from the SWE-bench dataset.
29+
30+
Returns:
31+
List of SweBenchExample objects
32+
33+
Raises:
34+
requests.RequestException: If the API request fails
35+
"""
36+
url = "https://datasets-server.huggingface.co/rows"
37+
params = {
38+
"dataset": "princeton-nlp/SWE-bench",
39+
"config": "default",
40+
"split": "dev",
41+
"offset": 0,
42+
"length": 100,
43+
}
44+
45+
response = requests.get(url, params=params)
46+
response.raise_for_status()
47+
data = response.json()
48+
49+
examples = []
50+
for row in data["rows"]:
51+
example = SweBenchExample(
52+
repo=row["row"]["repo"],
53+
instance_id=row["row"]["instance_id"],
54+
base_commit=row["row"]["base_commit"],
55+
patch=row["row"]["patch"],
56+
test_patch=row["row"]["test_patch"],
57+
problem_statement=row["row"]["problem_statement"],
58+
hints_text=row["row"].get("hints_text"),
59+
created_at=row["row"]["created_at"],
60+
version=row["row"]["version"],
61+
fail_to_pass=row["row"]["FAIL_TO_PASS"],
62+
pass_to_pass=row["row"].get("PASS_TO_PASS"),
63+
environment_setup_commit=row["row"].get("environment_setup_commit"),
64+
)
65+
examples.append(example)
66+
67+
return examples

0 commit comments

Comments
 (0)