Skip to content

Commit ebfd451

Browse files
authored
feat(consume): add --extract-to parameter for direct fixture extraction (#1861)
Add new --extract-to flag to consume cache command that extracts fixtures directly to a specified directory, bypassing the normal cache structure. This replaces the need for separate download scripts like 'download-and-extract-fixtures.sh' by integrating the functionality into the existing consume tooling. Usage: uvx --from git+https://github.com/ethereum/execution-spec-tests \ consume cache [email protected] --extract-to=./zkevm-fixtures
1 parent 2920953 commit ebfd451

File tree

4 files changed

+131
-44
lines changed

4 files changed

+131
-44
lines changed

docs/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ Users can select any of the artifacts depending on their testing needs for their
7777

7878
#### `consume`
7979

80+
- ✨ Add `--extract-to` parameter to `consume cache` command for direct fixture extraction to specified directory, replacing the need for separate download scripts. ([#1861](https://github.com/ethereum/execution-spec-tests/pull/1861)).
8081
- 🐞 Fix `consume cache --cache-folder` parameter being ignored, now properly caches fixtures in the specified directory instead of always using the default system cache location.
8182
- 🔀 `consume` now automatically avoids GitHub API calls when using direct release URLs (better for CI environments), while release specifiers like `stable@latest` continue to use the API for version resolution ([#1788](https://github.com/ethereum/execution-spec-tests/pull/1788)).
8283
- 🔀 Refactor consume simulator architecture to use explicit pytest plugin structure with forward-looking architecture ([#1801](https://github.com/ethereum/execution-spec-tests/pull/1801)).

docs/running_tests/consume/cache.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ You can override this location with the `--cache-folder` flag:
114114
uv run consume cache --input stable@latest --cache-folder /path/to/custom/cache
115115
```
116116

117+
Or extract directly to a specific directory (bypasses cache structure):
118+
119+
```bash
120+
uv run consume cache --input [email protected] --extract-to ./benchmark-fixtures
121+
```
122+
117123
**Cache structure:**
118124

119125
```text

src/pytest_plugins/consume/consume.py

Lines changed: 103 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,59 +46,61 @@ def default_html_report_file_path() -> str:
4646
class FixtureDownloader:
4747
"""Handles downloading and extracting fixture archives."""
4848

49-
def __init__(self, url: str, base_directory: Path): # noqa: D107
49+
def __init__(self, url: str, destination_folder: Path): # noqa: D107
5050
self.url = url
51-
self.base_directory = base_directory
51+
self.destination_folder = destination_folder
5252
self.parsed_url = urlparse(url)
5353
self.archive_name = self.strip_archive_extension(Path(self.parsed_url.path).name)
5454

55-
@property
56-
def extract_to(self) -> Path:
57-
"""Path to the directory where the archive will be extracted."""
58-
if is_release_url(self.url):
59-
version = Path(self.parsed_url.path).parts[-2]
60-
self.org_repo = self.extract_github_repo()
61-
return self.base_directory / self.org_repo / version / self.archive_name
62-
return self.base_directory / "other" / self.archive_name
63-
6455
def download_and_extract(self) -> Tuple[bool, Path]:
6556
"""Download the URL and extract it locally if it hasn't already been downloaded."""
66-
if self.extract_to.exists():
57+
if self.destination_folder.exists():
6758
return True, self.detect_extracted_directory()
6859

6960
return False, self.fetch_and_extract()
7061

71-
def extract_github_repo(self) -> str:
72-
"""Extract <username>/<repo> from GitHub URLs, otherwise return 'other'."""
73-
parts = self.parsed_url.path.strip("/").split("/")
74-
return (
75-
f"{parts[0]}/{parts[1]}"
76-
if self.parsed_url.netloc == "github.com" and len(parts) >= 2
77-
else "other"
78-
)
79-
8062
@staticmethod
8163
def strip_archive_extension(filename: str) -> str:
8264
"""Remove .tar.gz or .tgz extensions from filename."""
8365
return filename.removesuffix(".tar.gz").removesuffix(".tgz")
8466

67+
@staticmethod
68+
def get_cache_path(url: str, cache_folder: Path) -> Path:
69+
"""Get the appropriate cache path for a given URL."""
70+
parsed_url = urlparse(url)
71+
archive_name = FixtureDownloader.strip_archive_extension(Path(parsed_url.path).name)
72+
73+
if is_release_url(url):
74+
version = Path(parsed_url.path).parts[-2]
75+
parts = parsed_url.path.strip("/").split("/")
76+
org_repo = (
77+
f"{parts[0]}/{parts[1]}"
78+
if parsed_url.netloc == "github.com" and len(parts) >= 2
79+
else "other"
80+
)
81+
return cache_folder / org_repo / version / archive_name
82+
return cache_folder / "other" / archive_name
83+
8584
def fetch_and_extract(self) -> Path:
8685
"""Download and extract an archive from the given URL."""
87-
self.extract_to.mkdir(parents=True, exist_ok=False)
86+
self.destination_folder.mkdir(parents=True, exist_ok=True)
8887
response = requests.get(self.url)
8988
response.raise_for_status()
9089

9190
with tarfile.open(fileobj=BytesIO(response.content), mode="r:gz") as tar:
92-
tar.extractall(path=self.extract_to, filter="data")
91+
tar.extractall(path=self.destination_folder, filter="data")
9392

9493
return self.detect_extracted_directory()
9594

9695
def detect_extracted_directory(self) -> Path:
9796
"""
98-
Detect a single top-level dir within the extracted archive, otherwise return extract_to.
97+
Detect a single top-level dir within the extracted archive, otherwise return
98+
destination_folder.
9999
""" # noqa: D200
100-
extracted_dirs = [d for d in self.extract_to.iterdir() if d.is_dir() and d.name != ".meta"]
101-
return extracted_dirs[0] if len(extracted_dirs) == 1 else self.extract_to
100+
extracted_dirs = [
101+
d for d in self.destination_folder.iterdir() if d.is_dir() and d.name != ".meta"
102+
]
103+
return extracted_dirs[0] if len(extracted_dirs) == 1 else self.destination_folder
102104

103105

104106
@dataclass
@@ -112,31 +114,45 @@ class FixturesSource:
112114
is_local: bool = True
113115
is_stdin: bool = False
114116
was_cached: bool = False
117+
extract_to_local_path: bool = False
115118

116119
@classmethod
117120
def from_input(
118-
cls, input_source: str, cache_folder: Optional[Path] = None
121+
cls,
122+
input_source: str,
123+
cache_folder: Optional[Path] = None,
124+
extract_to: Optional[Path] = None,
119125
) -> "FixturesSource":
120126
"""Determine the fixture source type and return an instance."""
121127
if cache_folder is None:
122128
cache_folder = CACHED_DOWNLOADS_DIRECTORY
123129
if input_source == "stdin":
124130
return cls(input_option=input_source, path=Path(), is_local=False, is_stdin=True)
125131
if is_release_url(input_source):
126-
return cls.from_release_url(input_source, cache_folder)
132+
return cls.from_release_url(input_source, cache_folder, extract_to)
127133
if is_url(input_source):
128-
return cls.from_url(input_source, cache_folder)
134+
return cls.from_url(input_source, cache_folder, extract_to)
129135
if ReleaseTag.is_release_string(input_source):
130-
return cls.from_release_spec(input_source, cache_folder)
136+
return cls.from_release_spec(input_source, cache_folder, extract_to)
131137
return cls.validate_local_path(Path(input_source))
132138

133139
@classmethod
134-
def from_release_url(cls, url: str, cache_folder: Optional[Path] = None) -> "FixturesSource":
140+
def from_release_url(
141+
cls, url: str, cache_folder: Optional[Path] = None, extract_to: Optional[Path] = None
142+
) -> "FixturesSource":
135143
"""Create a fixture source from a supported github repo release URL."""
136144
if cache_folder is None:
137145
cache_folder = CACHED_DOWNLOADS_DIRECTORY
138-
downloader = FixtureDownloader(url, cache_folder)
139-
was_cached, path = downloader.download_and_extract()
146+
147+
destination_folder = extract_to or FixtureDownloader.get_cache_path(url, cache_folder)
148+
downloader = FixtureDownloader(url, destination_folder)
149+
150+
# Skip cache check for extract_to (always download fresh)
151+
if extract_to is not None:
152+
was_cached = False
153+
path = downloader.fetch_and_extract()
154+
else:
155+
was_cached, path = downloader.download_and_extract()
140156

141157
return cls(
142158
input_option=url,
@@ -145,40 +161,65 @@ def from_release_url(cls, url: str, cache_folder: Optional[Path] = None) -> "Fix
145161
release_page="",
146162
is_local=False,
147163
was_cached=was_cached,
164+
extract_to_local_path=extract_to is not None,
148165
)
149166

150167
@classmethod
151-
def from_url(cls, url: str, cache_folder: Optional[Path] = None) -> "FixturesSource":
168+
def from_url(
169+
cls, url: str, cache_folder: Optional[Path] = None, extract_to: Optional[Path] = None
170+
) -> "FixturesSource":
152171
"""Create a fixture source from a direct URL."""
153172
if cache_folder is None:
154173
cache_folder = CACHED_DOWNLOADS_DIRECTORY
155-
downloader = FixtureDownloader(url, cache_folder)
156-
was_cached, path = downloader.download_and_extract()
174+
175+
destination_folder = extract_to or FixtureDownloader.get_cache_path(url, cache_folder)
176+
downloader = FixtureDownloader(url, destination_folder)
177+
178+
# Skip cache check for extract_to (always download fresh)
179+
if extract_to is not None:
180+
was_cached = False
181+
path = downloader.fetch_and_extract()
182+
else:
183+
was_cached, path = downloader.download_and_extract()
184+
157185
return cls(
158186
input_option=url,
159187
path=path,
160188
url=url,
161189
release_page="",
162190
is_local=False,
163191
was_cached=was_cached,
192+
extract_to_local_path=extract_to is not None,
164193
)
165194

166195
@classmethod
167-
def from_release_spec(cls, spec: str, cache_folder: Optional[Path] = None) -> "FixturesSource":
196+
def from_release_spec(
197+
cls, spec: str, cache_folder: Optional[Path] = None, extract_to: Optional[Path] = None
198+
) -> "FixturesSource":
168199
"""Create a fixture source from a release spec (e.g., develop@latest)."""
169200
if cache_folder is None:
170201
cache_folder = CACHED_DOWNLOADS_DIRECTORY
171202
url = get_release_url(spec)
172203
release_page = get_release_page_url(url)
173-
downloader = FixtureDownloader(url, cache_folder)
174-
was_cached, path = downloader.download_and_extract()
204+
205+
destination_folder = extract_to or FixtureDownloader.get_cache_path(url, cache_folder)
206+
downloader = FixtureDownloader(url, destination_folder)
207+
208+
# Skip cache check for extract_to (always download fresh)
209+
if extract_to is not None:
210+
was_cached = False
211+
path = downloader.fetch_and_extract()
212+
else:
213+
was_cached, path = downloader.download_and_extract()
214+
175215
return cls(
176216
input_option=spec,
177217
path=path,
178218
url=url,
179219
release_page=release_page,
180220
is_local=False,
181221
was_cached=was_cached,
222+
extract_to_local_path=extract_to is not None,
182223
)
183224

184225
@staticmethod
@@ -268,6 +309,17 @@ def pytest_addoption(parser): # noqa: D103
268309
f"Defaults to the following directory: '{CACHED_DOWNLOADS_DIRECTORY}'."
269310
),
270311
)
312+
consume_group.addoption(
313+
"--extract-to",
314+
action="store",
315+
dest="extract_to_folder",
316+
default=None,
317+
help=(
318+
"Extract downloaded fixtures to the specified directory. Only valid with 'cache' "
319+
"command. When used, fixtures are extracted directly to this path instead of the "
320+
"user's execution-spec-tests cache directory."
321+
),
322+
)
271323
if "cache" in sys.argv:
272324
return
273325
consume_group.addoption(
@@ -308,6 +360,10 @@ def pytest_configure(config): # noqa: D103
308360
called before the pytest-html plugin's pytest_configure to ensure that
309361
it uses the modified `htmlpath` option.
310362
"""
363+
# Validate --extract-to usage
364+
if config.option.extract_to_folder is not None and "cache" not in sys.argv:
365+
pytest.exit("The --extract-to flag is only valid with the 'cache' command.")
366+
311367
if config.option.fixtures_source is None:
312368
# NOTE: Setting the default value here is necessary for correct stdin/piping behavior.
313369
config.fixtures_source = FixturesSource(
@@ -318,7 +374,11 @@ def pytest_configure(config): # noqa: D103
318374
# be evaluated twice which breaks the result of `was_cached`; the work-around is to call it
319375
# manually here.
320376
config.fixtures_source = FixturesSource.from_input(
321-
config.option.fixtures_source, Path(config.option.fixture_cache_folder)
377+
config.option.fixtures_source,
378+
Path(config.option.fixture_cache_folder),
379+
Path(config.option.extract_to_folder)
380+
if config.option.extract_to_folder is not None
381+
else None,
322382
)
323383
config.fixture_source_flags = ["--input", config.fixtures_source.input_option]
324384

@@ -327,7 +387,9 @@ def pytest_configure(config): # noqa: D103
327387

328388
if "cache" in sys.argv:
329389
reason = ""
330-
if config.fixtures_source.was_cached:
390+
if config.fixtures_source.extract_to_local_path:
391+
reason += "Fixtures downloaded and extracted to specified directory."
392+
elif config.fixtures_source.was_cached:
331393
reason += "Fixtures already cached."
332394
elif not config.fixtures_source.is_local:
333395
reason += "Fixtures downloaded and cached."

src/pytest_plugins/consume/tests/test_fixtures_source_input_types.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ def test_from_input_handles_release_url(self):
141141

142142
FixturesSource.from_input(test_url)
143143

144-
mock_from_release_url.assert_called_once_with(test_url, CACHED_DOWNLOADS_DIRECTORY)
144+
mock_from_release_url.assert_called_once_with(
145+
test_url, CACHED_DOWNLOADS_DIRECTORY, None
146+
)
145147

146148
def test_from_input_handles_release_spec(self):
147149
"""Test that from_input properly handles release specs."""
@@ -152,7 +154,9 @@ def test_from_input_handles_release_spec(self):
152154

153155
FixturesSource.from_input(test_spec)
154156

155-
mock_from_release_spec.assert_called_once_with(test_spec, CACHED_DOWNLOADS_DIRECTORY)
157+
mock_from_release_spec.assert_called_once_with(
158+
test_spec, CACHED_DOWNLOADS_DIRECTORY, None
159+
)
156160

157161
def test_from_input_handles_regular_url(self):
158162
"""Test that from_input properly handles regular URLs."""
@@ -163,4 +167,18 @@ def test_from_input_handles_regular_url(self):
163167

164168
FixturesSource.from_input(test_url)
165169

166-
mock_from_url.assert_called_once_with(test_url, CACHED_DOWNLOADS_DIRECTORY)
170+
mock_from_url.assert_called_once_with(test_url, CACHED_DOWNLOADS_DIRECTORY, None)
171+
172+
def test_from_input_handles_extract_to_parameter(self):
173+
"""Test that from_input properly passes extract_to parameter."""
174+
test_url = "https://github.com/ethereum/execution-spec-tests/releases/download/v3.0.0/fixtures_develop.tar.gz"
175+
extract_to_path = Path("/custom/extract/path")
176+
177+
with patch.object(FixturesSource, "from_release_url") as mock_from_release_url:
178+
mock_from_release_url.return_value = MagicMock()
179+
180+
FixturesSource.from_input(test_url, extract_to=extract_to_path)
181+
182+
mock_from_release_url.assert_called_once_with(
183+
test_url, CACHED_DOWNLOADS_DIRECTORY, extract_to_path
184+
)

0 commit comments

Comments
 (0)