Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ dependencies = [
"urllib3>=2.0.0",
"datasets",
"colorlog>=6.9.0",
"langsmith",
]

license = { text = "Apache-2.0" }
Expand Down
24 changes: 13 additions & 11 deletions src/codegen/extensions/swebench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

from datasets import load_dataset

# Add constant for cache directory
CACHE_DIR = Path.home() / ".cache" / "swebench"


class SWEBenchDataset(Enum):
LITE = "princeton-nlp/SWE-bench_Lite"
Expand Down Expand Up @@ -68,35 +65,38 @@ def load_predictions(paths):


def get_swe_bench_examples(
dataset: SWEBenchDataset = SWEBenchDataset.LITE,
dataset: Literal["lite", "full", "verified"] = "lite",
split: Literal["train", "dev", "test"] = "test",
offset: int = 0,
length: int = 100,
instance_id: str | None = None,
repo: str | None = None,
) -> list[SweBenchExample]:
"""Fetch examples from the SWE-bench dataset using the datasets library.

Args:
dataset: The dataset to use (LITE, FULL, or VERIFIED)
dataset: The dataset to use ("lite", "full", or "verified")
split: The dataset split to use
offset: Starting index for examples
length: Number of examples to fetch
instance_id: Optional specific instance ID to fetch

Returns:
List of SweBenchExample objects
"""
# Ensure cache directory exists
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# Convert string dataset name to enum
dataset_map = {"lite": SWEBenchDataset.LITE, "full": SWEBenchDataset.FULL, "verified": SWEBenchDataset.VERIFIED}
dataset_enum = dataset_map[dataset.lower()]

# Load the dataset with caching enabled
dataset_name = dataset.value
swe_bench_dataset = load_dataset(dataset_name, cache_dir=str(CACHE_DIR), download_mode="reuse_dataset_if_exists")
dataset_name = dataset_enum.value
swe_bench_dataset = load_dataset(dataset_name, download_mode="reuse_dataset_if_exists")

# Get the requested split
split_data = swe_bench_dataset[split]

# Apply offset and length
if instance_id:
if instance_id or repo:
offset = 0
end_idx = len(split_data)
else:
Expand All @@ -113,6 +113,8 @@ def get_swe_bench_examples(
for row in selected_rows:
if instance_id and row["instance_id"] != instance_id:
continue
if repo and row["repo"] != repo:
continue
example = SweBenchExample(
repo=row["repo"],
instance_id=row["instance_id"],
Expand All @@ -129,4 +131,4 @@ def get_swe_bench_examples(
)
examples.append(example)

return examples
return examples[:length]
19 changes: 0 additions & 19 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading