Skip to content

Commit 77150a8

Browse files
chore: improved swebench dataset access (#675)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed
1 parent 3e2e6e1 commit 77150a8

File tree

3 files changed

+60
-86
lines changed

3 files changed

+60
-86
lines changed

codegen-examples/examples/swebench_agent_run/run_eval.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import modal
77
import click
88
from datetime import datetime
9-
from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples
9+
from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_examples
1010
from codegen.extensions.swebench.report import generate_report
1111

1212
PREDS_DNAME = Path(__file__).parent / "predictions"
@@ -92,10 +92,7 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in
9292
run_id = use_existing_preds or str(uuid.uuid4())
9393
predictions_dir = PREDS_DNAME / f"results_{run_id}"
9494
dataset = SWEBenchDataset(dataset)
95-
if instance_id:
96-
examples = [get_swe_bench_example(instance_id, dataset=dataset)]
97-
else:
98-
examples = get_swe_bench_examples(dataset=dataset, length=length)
95+
examples = get_swe_bench_examples(dataset=dataset, length=length, instance_id=instance_id)
9996

10097
try:
10198
if use_existing_preds is None:

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ dependencies = [
7474
"httpx>=0.28.1",
7575
"docker>=6.1.3",
7676
"urllib3>=2.0.0",
77+
"datasets",
7778
]
7879

7980
license = { text = "Apache-2.0" }

src/codegen/extensions/swebench/utils.py

Lines changed: 57 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
from pprint import pprint
66
from typing import Literal, Optional
77

8-
import requests
8+
from datasets import load_dataset
9+
10+
# Add constant for cache directory
11+
CACHE_DIR = Path.home() / ".cache" / "swebench"
912

1013

1114
class SWEBenchDataset(Enum):
@@ -64,93 +67,66 @@ def load_predictions(paths):
6467
return predictions
6568

6669

67-
def get_swe_bench_examples(dataset: SWEBenchDataset = SWEBenchDataset.LITE, split: Literal["train", "dev", "test"] = "test", offset: int = 0, length: int = 100) -> list[SweBenchExample]:
68-
"""Fetch examples from the SWE-bench dataset.
70+
def get_swe_bench_examples(
71+
dataset: SWEBenchDataset = SWEBenchDataset.LITE,
72+
split: Literal["train", "dev", "test"] = "test",
73+
offset: int = 0,
74+
length: int = 100,
75+
instance_id: str | None = None,
76+
) -> list[SweBenchExample]:
77+
"""Fetch examples from the SWE-bench dataset using the datasets library.
78+
79+
Args:
80+
dataset: The dataset to use (LITE, FULL, or VERIFIED)
81+
split: The dataset split to use
82+
offset: Starting index for examples
83+
length: Number of examples to fetch
6984
7085
Returns:
7186
List of SweBenchExample objects
72-
73-
Raises:
74-
requests.RequestException: If the API request fails
7587
"""
76-
url = "https://datasets-server.huggingface.co/rows"
77-
params = {
78-
"dataset": dataset.value,
79-
"config": "default",
80-
"split": split,
81-
"offset": offset,
82-
"length": length,
83-
}
84-
85-
response = requests.get(url, params=params)
86-
response.raise_for_status()
87-
data = response.json()
88-
88+
# Ensure cache directory exists
89+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
90+
91+
# Load the dataset with caching enabled
92+
dataset_name = dataset.value
93+
swe_bench_dataset = load_dataset(dataset_name, cache_dir=str(CACHE_DIR), download_mode="reuse_dataset_if_exists")
94+
95+
# Get the requested split
96+
split_data = swe_bench_dataset[split]
97+
98+
# Apply offset and length
99+
if instance_id:
100+
offset = 0
101+
end_idx = len(split_data)
102+
else:
103+
end_idx = min(offset + length, len(split_data))
104+
if offset >= len(split_data):
105+
return []
106+
107+
# Use the select method instead of slicing
108+
# This ensures we get dictionary-like objects
109+
selected_rows = split_data.select(range(offset, end_idx))
110+
111+
# Convert to SweBenchExample objects
89112
examples = []
90-
for row in data["rows"]:
113+
for row in selected_rows:
114+
if instance_id and row["instance_id"] != instance_id:
115+
continue
91116
example = SweBenchExample(
92-
repo=row["row"]["repo"],
93-
instance_id=row["row"]["instance_id"],
94-
base_commit=row["row"]["base_commit"],
95-
patch=row["row"]["patch"],
96-
test_patch=row["row"]["test_patch"],
97-
problem_statement=row["row"]["problem_statement"],
98-
hints_text=row["row"].get("hints_text"),
99-
created_at=row["row"]["created_at"],
100-
version=row["row"]["version"],
101-
fail_to_pass=row["row"]["FAIL_TO_PASS"],
102-
pass_to_pass=row["row"].get("PASS_TO_PASS"),
103-
environment_setup_commit=row["row"].get("environment_setup_commit"),
117+
repo=row["repo"],
118+
instance_id=row["instance_id"],
119+
base_commit=row["base_commit"],
120+
patch=row["patch"],
121+
test_patch=row["test_patch"],
122+
problem_statement=row["problem_statement"],
123+
hints_text=row.get("hints_text"),
124+
created_at=row["created_at"],
125+
version=row["version"],
126+
fail_to_pass=row["FAIL_TO_PASS"],
127+
pass_to_pass=row.get("PASS_TO_PASS"),
128+
environment_setup_commit=row.get("environment_setup_commit"),
104129
)
105130
examples.append(example)
106131

107132
return examples
108-
109-
110-
def get_swe_bench_example(
111-
instance_id: str,
112-
dataset: SWEBenchDataset = SWEBenchDataset.LITE,
113-
) -> SweBenchExample:
114-
"""Fetch a single example from the SWE-bench dataset by its instance ID.
115-
116-
Args:
117-
instance_id: The unique identifier of the example to fetch
118-
119-
Returns:
120-
SweBenchExample object
121-
122-
Raises:
123-
ValueError: If no example found with the given ID
124-
requests.RequestException: If the API request fails
125-
"""
126-
url = "https://datasets-server.huggingface.co/filter"
127-
params = {
128-
"dataset": dataset.value,
129-
"config": "default",
130-
"split": "dev",
131-
"where": f"instance_id='{instance_id}'",
132-
}
133-
134-
response = requests.get(url, params=params)
135-
response.raise_for_status()
136-
data = response.json()
137-
138-
if not data["rows"]:
139-
msg = f"No example found with instance_id: {instance_id}"
140-
raise ValueError(msg)
141-
142-
row = data["rows"][0]["row"]
143-
return SweBenchExample(
144-
repo=row["repo"],
145-
instance_id=row["instance_id"],
146-
base_commit=row["base_commit"],
147-
patch=row["patch"],
148-
test_patch=row["test_patch"],
149-
problem_statement=row["problem_statement"],
150-
hints_text=row.get("hints_text"),
151-
created_at=row["created_at"],
152-
version=row["version"],
153-
fail_to_pass=row["FAIL_TO_PASS"],
154-
pass_to_pass=row.get("PASS_TO_PASS"),
155-
environment_setup_commit=row.get("environment_setup_commit"),
156-
)

0 commit comments

Comments
 (0)