|
5 | 5 | from pprint import pprint |
6 | 6 | from typing import Literal, Optional |
7 | 7 |
|
8 | | -import requests |
| 8 | +from datasets import load_dataset |
| 9 | + |
| 10 | +# Add constant for cache directory |
| 11 | +CACHE_DIR = Path.home() / ".cache" / "swebench" |
9 | 12 |
|
10 | 13 |
|
11 | 14 | class SWEBenchDataset(Enum): |
@@ -64,93 +67,66 @@ def load_predictions(paths): |
64 | 67 | return predictions |
65 | 68 |
|
66 | 69 |
|
67 | | -def get_swe_bench_examples(dataset: SWEBenchDataset = SWEBenchDataset.LITE, split: Literal["train", "dev", "test"] = "test", offset: int = 0, length: int = 100) -> list[SweBenchExample]: |
68 | | - """Fetch examples from the SWE-bench dataset. |
| 70 | +def get_swe_bench_examples( |
| 71 | + dataset: SWEBenchDataset = SWEBenchDataset.LITE, |
| 72 | + split: Literal["train", "dev", "test"] = "test", |
| 73 | + offset: int = 0, |
| 74 | + length: int = 100, |
| 75 | + instance_id: str | None = None, |
| 76 | +) -> list[SweBenchExample]: |
| 77 | + """Fetch examples from the SWE-bench dataset using the datasets library. |
| 78 | +
|
| 79 | + Args: |
| 80 | + dataset: The dataset to use (LITE, FULL, or VERIFIED) |
| 81 | + split: The dataset split to use |
| 82 | + offset: Starting index for examples |
| 83 | + length: Number of examples to fetch |
69 | 84 |
|
70 | 85 | Returns: |
71 | 86 | List of SweBenchExample objects |
72 | | -
|
73 | | - Raises: |
74 | | - requests.RequestException: If the API request fails |
75 | 87 | """ |
76 | | - url = "https://datasets-server.huggingface.co/rows" |
77 | | - params = { |
78 | | - "dataset": dataset.value, |
79 | | - "config": "default", |
80 | | - "split": split, |
81 | | - "offset": offset, |
82 | | - "length": length, |
83 | | - } |
84 | | - |
85 | | - response = requests.get(url, params=params) |
86 | | - response.raise_for_status() |
87 | | - data = response.json() |
88 | | - |
| 88 | + # Ensure cache directory exists |
| 89 | + CACHE_DIR.mkdir(parents=True, exist_ok=True) |
| 90 | + |
| 91 | + # Load the dataset with caching enabled |
| 92 | + dataset_name = dataset.value |
| 93 | + swe_bench_dataset = load_dataset(dataset_name, cache_dir=str(CACHE_DIR), download_mode="reuse_dataset_if_exists") |
| 94 | + |
| 95 | + # Get the requested split |
| 96 | + split_data = swe_bench_dataset[split] |
| 97 | + |
| 98 | + # Apply offset and length |
| 99 | + if instance_id: |
| 100 | + offset = 0 |
| 101 | + end_idx = len(split_data) |
| 102 | + else: |
| 103 | + end_idx = min(offset + length, len(split_data)) |
| 104 | + if offset >= len(split_data): |
| 105 | + return [] |
| 106 | + |
| 107 | + # Use the select method instead of slicing |
| 108 | + # This ensures we get dictionary-like objects |
| 109 | + selected_rows = split_data.select(range(offset, end_idx)) |
| 110 | + |
| 111 | + # Convert to SweBenchExample objects |
89 | 112 | examples = [] |
90 | | - for row in data["rows"]: |
| 113 | + for row in selected_rows: |
| 114 | + if instance_id and row["instance_id"] != instance_id: |
| 115 | + continue |
91 | 116 | example = SweBenchExample( |
92 | | - repo=row["row"]["repo"], |
93 | | - instance_id=row["row"]["instance_id"], |
94 | | - base_commit=row["row"]["base_commit"], |
95 | | - patch=row["row"]["patch"], |
96 | | - test_patch=row["row"]["test_patch"], |
97 | | - problem_statement=row["row"]["problem_statement"], |
98 | | - hints_text=row["row"].get("hints_text"), |
99 | | - created_at=row["row"]["created_at"], |
100 | | - version=row["row"]["version"], |
101 | | - fail_to_pass=row["row"]["FAIL_TO_PASS"], |
102 | | - pass_to_pass=row["row"].get("PASS_TO_PASS"), |
103 | | - environment_setup_commit=row["row"].get("environment_setup_commit"), |
| 117 | + repo=row["repo"], |
| 118 | + instance_id=row["instance_id"], |
| 119 | + base_commit=row["base_commit"], |
| 120 | + patch=row["patch"], |
| 121 | + test_patch=row["test_patch"], |
| 122 | + problem_statement=row["problem_statement"], |
| 123 | + hints_text=row.get("hints_text"), |
| 124 | + created_at=row["created_at"], |
| 125 | + version=row["version"], |
| 126 | + fail_to_pass=row["FAIL_TO_PASS"], |
| 127 | + pass_to_pass=row.get("PASS_TO_PASS"), |
| 128 | + environment_setup_commit=row.get("environment_setup_commit"), |
104 | 129 | ) |
105 | 130 | examples.append(example) |
106 | 131 |
|
107 | 132 | return examples |
108 | | - |
109 | | - |
110 | | -def get_swe_bench_example( |
111 | | - instance_id: str, |
112 | | - dataset: SWEBenchDataset = SWEBenchDataset.LITE, |
113 | | -) -> SweBenchExample: |
114 | | - """Fetch a single example from the SWE-bench dataset by its instance ID. |
115 | | -
|
116 | | - Args: |
117 | | - instance_id: The unique identifier of the example to fetch |
118 | | -
|
119 | | - Returns: |
120 | | - SweBenchExample object |
121 | | -
|
122 | | - Raises: |
123 | | - ValueError: If no example found with the given ID |
124 | | - requests.RequestException: If the API request fails |
125 | | - """ |
126 | | - url = "https://datasets-server.huggingface.co/filter" |
127 | | - params = { |
128 | | - "dataset": dataset.value, |
129 | | - "config": "default", |
130 | | - "split": "dev", |
131 | | - "where": f"instance_id='{instance_id}'", |
132 | | - } |
133 | | - |
134 | | - response = requests.get(url, params=params) |
135 | | - response.raise_for_status() |
136 | | - data = response.json() |
137 | | - |
138 | | - if not data["rows"]: |
139 | | - msg = f"No example found with instance_id: {instance_id}" |
140 | | - raise ValueError(msg) |
141 | | - |
142 | | - row = data["rows"][0]["row"] |
143 | | - return SweBenchExample( |
144 | | - repo=row["repo"], |
145 | | - instance_id=row["instance_id"], |
146 | | - base_commit=row["base_commit"], |
147 | | - patch=row["patch"], |
148 | | - test_patch=row["test_patch"], |
149 | | - problem_statement=row["problem_statement"], |
150 | | - hints_text=row.get("hints_text"), |
151 | | - created_at=row["created_at"], |
152 | | - version=row["version"], |
153 | | - fail_to_pass=row["FAIL_TO_PASS"], |
154 | | - pass_to_pass=row.get("PASS_TO_PASS"), |
155 | | - environment_setup_commit=row.get("environment_setup_commit"), |
156 | | - ) |
0 commit comments