Skip to content

Commit c166087

Browse files
committed
Adds integration test for invocation export dataset flags
Adds an integration test to verify the correct inclusion and exclusion of hidden and deleted datasets during export of workflow invocations.
1 parent ce14fe5 commit c166087

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

test/integration/test_workflow_tasks.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import json
88
import os
9+
import zipfile
910
from typing import (
1011
Any,
1112
cast,
@@ -255,3 +256,139 @@ def _run_workflow(self, has_workflow, history_id: str, **kwds) -> RunJobsSummary
255256
assert "expected_response" not in kwds
256257
run_summary = self.workflow_populator.run_workflow(has_workflow, history_id=history_id, **kwds)
257258
return cast(RunJobsSummary, run_summary)
259+
260+
def test_export_ro_crate_with_hidden_and_deleted_datasets(self):
261+
"""Test that hidden and deleted datasets are properly included/excluded based on export flags.
262+
263+
This test creates a workflow with three outputs, hides one, deletes another, and then
264+
exports the invocation with different combinations of include_hidden and include_deleted
265+
flags to verify correct behavior.
266+
"""
267+
with self.dataset_populator.test_history() as history_id:
268+
# Run a workflow that produces three outputs
269+
test_data = """
270+
input_1:
271+
value: 1.bed
272+
type: File
273+
"""
274+
summary = self._run_workflow(
275+
"""
276+
class: GalaxyWorkflow
277+
inputs:
278+
input_1: data
279+
outputs:
280+
output_1:
281+
outputSource: first_cat/out_file1
282+
output_2:
283+
outputSource: second_cat/out_file1
284+
output_3:
285+
outputSource: third_cat/out_file1
286+
steps:
287+
first_cat:
288+
tool_id: cat
289+
in:
290+
input1: input_1
291+
second_cat:
292+
tool_id: cat
293+
in:
294+
input1: input_1
295+
third_cat:
296+
tool_id: cat
297+
in:
298+
input1: input_1
299+
""",
300+
test_data=test_data,
301+
history_id=history_id,
302+
)
303+
invocation_id = summary.invocation_id
304+
305+
# Get the invocation details to find output datasets
306+
invocation = self.workflow_populator.get_invocation(invocation_id)
307+
outputs = invocation["outputs"]
308+
309+
# Hide output_1
310+
hidden_dataset_id = outputs["output_1"]["id"]
311+
self.dataset_populator.update_dataset(hidden_dataset_id, {"visible": False})
312+
313+
# Delete output_2 (but don't purge it)
314+
deleted_dataset_id = outputs["output_2"]["id"]
315+
self.dataset_populator.delete_dataset(history_id, deleted_dataset_id, purge=False)
316+
317+
# Verify the datasets have the expected states
318+
hidden_dataset = self.dataset_populator.get_history_dataset_details(
319+
history_id, dataset_id=hidden_dataset_id
320+
)
321+
assert hidden_dataset["visible"] is False
322+
323+
deleted_dataset = self.dataset_populator.get_history_dataset_details(
324+
history_id, dataset_id=deleted_dataset_id
325+
)
326+
assert deleted_dataset["deleted"] is True
327+
328+
# Test 1: Export with include_hidden=False, include_deleted=False
329+
# Expected: 2 datasets (input_1 + output_3)
330+
dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=False, include_deleted=False)
331+
assert len(dataset_files) == 2, (
332+
f"Test 1 (hidden=False, deleted=False): Expected 2 datasets, found {len(dataset_files)}: "
333+
f"{dataset_files}"
334+
)
335+
336+
# Test 2: Export with include_hidden=True, include_deleted=False
337+
# Expected: 3 datasets (input_1 + output_1[hidden] + output_3)
338+
dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=True, include_deleted=False)
339+
assert len(dataset_files) == 3, (
340+
f"Test 2 (hidden=True, deleted=False): Expected 3 datasets, found {len(dataset_files)}: "
341+
f"{dataset_files}"
342+
)
343+
344+
# Test 3: Export with include_hidden=False, include_deleted=True
345+
# Expected: 3 datasets (input_1 + output_2[deleted] + output_3)
346+
dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=False, include_deleted=True)
347+
assert len(dataset_files) == 3, (
348+
f"Test 3 (hidden=False, deleted=True): Expected 3 datasets, found {len(dataset_files)}: "
349+
f"{dataset_files}"
350+
)
351+
352+
# Test 4: Export with include_hidden=True, include_deleted=True
353+
# Expected: 4 datasets (input_1 + output_1[hidden] + output_2[deleted] + output_3)
354+
dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=True, include_deleted=True)
355+
assert len(dataset_files) == 4, (
356+
f"Test 4 (hidden=True, deleted=True): Expected 4 datasets, found {len(dataset_files)}: "
357+
f"{dataset_files}"
358+
)
359+
360+
def _export_and_get_datasets(self, invocation_id: str, include_hidden: bool, include_deleted: bool) -> list[str]:
361+
"""Helper method to export an invocation and return the list of dataset files in the archive."""
362+
url = f"invocations/{invocation_id}/prepare_store_download"
363+
download_response = self.workflow_populator._post(
364+
url,
365+
dict(
366+
include_files=True,
367+
include_hidden=include_hidden,
368+
include_deleted=include_deleted,
369+
model_store_format="rocrate.zip",
370+
),
371+
json=True,
372+
)
373+
storage_request_id = self.dataset_populator.assert_download_request_ok(download_response)
374+
self.dataset_populator.wait_for_download_ready(storage_request_id)
375+
ro_crate_path = self.workflow_populator._get_to_tempfile(f"short_term_storage/{storage_request_id}")
376+
return self._get_dataset_files_in_archive(ro_crate_path)
377+
378+
def _get_dataset_files_in_archive(self, archive_path: str) -> list[str]:
379+
"""Extract dataset files from a rocrate.zip archive, excluding metadata files.
380+
381+
Dataset files are typically stored in a 'datasets/' folder within the archive.
382+
"""
383+
dataset_files = []
384+
385+
with zipfile.ZipFile(archive_path, "r") as zf:
386+
for name in zf.namelist():
387+
# Skip directories
388+
if name.endswith("/"):
389+
continue
390+
# Only count files in the datasets/ folder
391+
if name.startswith("datasets/"):
392+
dataset_files.append(name)
393+
394+
return dataset_files

0 commit comments

Comments
 (0)