|
6 | 6 |
|
7 | 7 | import json |
8 | 8 | import os |
| 9 | +import zipfile |
9 | 10 | from typing import ( |
10 | 11 | Any, |
11 | 12 | cast, |
@@ -255,3 +256,139 @@ def _run_workflow(self, has_workflow, history_id: str, **kwds) -> RunJobsSummary |
255 | 256 | assert "expected_response" not in kwds |
256 | 257 | run_summary = self.workflow_populator.run_workflow(has_workflow, history_id=history_id, **kwds) |
257 | 258 | return cast(RunJobsSummary, run_summary) |
| 259 | + |
| 260 | + def test_export_ro_crate_with_hidden_and_deleted_datasets(self): |
| 261 | + """Test that hidden and deleted datasets are properly included/excluded based on export flags. |
| 262 | +
|
| 263 | + This test creates a workflow with three outputs, hides one, deletes another, and then |
| 264 | + exports the invocation with different combinations of include_hidden and include_deleted |
| 265 | + flags to verify correct behavior. |
| 266 | + """ |
| 267 | + with self.dataset_populator.test_history() as history_id: |
| 268 | + # Run a workflow that produces three outputs |
| 269 | + test_data = """ |
| 270 | +input_1: |
| 271 | + value: 1.bed |
| 272 | + type: File |
| 273 | +""" |
| 274 | + summary = self._run_workflow( |
| 275 | + """ |
| 276 | +class: GalaxyWorkflow |
| 277 | +inputs: |
| 278 | + input_1: data |
| 279 | +outputs: |
| 280 | + output_1: |
| 281 | + outputSource: first_cat/out_file1 |
| 282 | + output_2: |
| 283 | + outputSource: second_cat/out_file1 |
| 284 | + output_3: |
| 285 | + outputSource: third_cat/out_file1 |
| 286 | +steps: |
| 287 | + first_cat: |
| 288 | + tool_id: cat |
| 289 | + in: |
| 290 | + input1: input_1 |
| 291 | + second_cat: |
| 292 | + tool_id: cat |
| 293 | + in: |
| 294 | + input1: input_1 |
| 295 | + third_cat: |
| 296 | + tool_id: cat |
| 297 | + in: |
| 298 | + input1: input_1 |
| 299 | +""", |
| 300 | + test_data=test_data, |
| 301 | + history_id=history_id, |
| 302 | + ) |
| 303 | + invocation_id = summary.invocation_id |
| 304 | + |
| 305 | + # Get the invocation details to find output datasets |
| 306 | + invocation = self.workflow_populator.get_invocation(invocation_id) |
| 307 | + outputs = invocation["outputs"] |
| 308 | + |
| 309 | + # Hide output_1 |
| 310 | + hidden_dataset_id = outputs["output_1"]["id"] |
| 311 | + self.dataset_populator.update_dataset(hidden_dataset_id, {"visible": False}) |
| 312 | + |
| 313 | + # Delete output_2 (but don't purge it) |
| 314 | + deleted_dataset_id = outputs["output_2"]["id"] |
| 315 | + self.dataset_populator.delete_dataset(history_id, deleted_dataset_id, purge=False) |
| 316 | + |
| 317 | + # Verify the datasets have the expected states |
| 318 | + hidden_dataset = self.dataset_populator.get_history_dataset_details( |
| 319 | + history_id, dataset_id=hidden_dataset_id |
| 320 | + ) |
| 321 | + assert hidden_dataset["visible"] is False |
| 322 | + |
| 323 | + deleted_dataset = self.dataset_populator.get_history_dataset_details( |
| 324 | + history_id, dataset_id=deleted_dataset_id |
| 325 | + ) |
| 326 | + assert deleted_dataset["deleted"] is True |
| 327 | + |
| 328 | + # Test 1: Export with include_hidden=False, include_deleted=False |
| 329 | + # Expected: 2 datasets (input_1 + output_3) |
| 330 | + dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=False, include_deleted=False) |
| 331 | + assert len(dataset_files) == 2, ( |
| 332 | + f"Test 1 (hidden=False, deleted=False): Expected 2 datasets, found {len(dataset_files)}: " |
| 333 | + f"{dataset_files}" |
| 334 | + ) |
| 335 | + |
| 336 | + # Test 2: Export with include_hidden=True, include_deleted=False |
| 337 | + # Expected: 3 datasets (input_1 + output_1[hidden] + output_3) |
| 338 | + dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=True, include_deleted=False) |
| 339 | + assert len(dataset_files) == 3, ( |
| 340 | + f"Test 2 (hidden=True, deleted=False): Expected 3 datasets, found {len(dataset_files)}: " |
| 341 | + f"{dataset_files}" |
| 342 | + ) |
| 343 | + |
| 344 | + # Test 3: Export with include_hidden=False, include_deleted=True |
| 345 | + # Expected: 3 datasets (input_1 + output_2[deleted] + output_3) |
| 346 | + dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=False, include_deleted=True) |
| 347 | + assert len(dataset_files) == 3, ( |
| 348 | + f"Test 3 (hidden=False, deleted=True): Expected 3 datasets, found {len(dataset_files)}: " |
| 349 | + f"{dataset_files}" |
| 350 | + ) |
| 351 | + |
| 352 | + # Test 4: Export with include_hidden=True, include_deleted=True |
| 353 | + # Expected: 4 datasets (input_1 + output_1[hidden] + output_2[deleted] + output_3) |
| 354 | + dataset_files = self._export_and_get_datasets(invocation_id, include_hidden=True, include_deleted=True) |
| 355 | + assert len(dataset_files) == 4, ( |
| 356 | + f"Test 4 (hidden=True, deleted=True): Expected 4 datasets, found {len(dataset_files)}: " |
| 357 | + f"{dataset_files}" |
| 358 | + ) |
| 359 | + |
| 360 | + def _export_and_get_datasets(self, invocation_id: str, include_hidden: bool, include_deleted: bool) -> list[str]: |
| 361 | + """Helper method to export an invocation and return the list of dataset files in the archive.""" |
| 362 | + url = f"invocations/{invocation_id}/prepare_store_download" |
| 363 | + download_response = self.workflow_populator._post( |
| 364 | + url, |
| 365 | + dict( |
| 366 | + include_files=True, |
| 367 | + include_hidden=include_hidden, |
| 368 | + include_deleted=include_deleted, |
| 369 | + model_store_format="rocrate.zip", |
| 370 | + ), |
| 371 | + json=True, |
| 372 | + ) |
| 373 | + storage_request_id = self.dataset_populator.assert_download_request_ok(download_response) |
| 374 | + self.dataset_populator.wait_for_download_ready(storage_request_id) |
| 375 | + ro_crate_path = self.workflow_populator._get_to_tempfile(f"short_term_storage/{storage_request_id}") |
| 376 | + return self._get_dataset_files_in_archive(ro_crate_path) |
| 377 | + |
| 378 | + def _get_dataset_files_in_archive(self, archive_path: str) -> list[str]: |
| 379 | + """Extract dataset files from a rocrate.zip archive, excluding metadata files. |
| 380 | +
|
| 381 | + Dataset files are typically stored in a 'datasets/' folder within the archive. |
| 382 | + """ |
| 383 | + dataset_files = [] |
| 384 | + |
| 385 | + with zipfile.ZipFile(archive_path, "r") as zf: |
| 386 | + for name in zf.namelist(): |
| 387 | + # Skip directories |
| 388 | + if name.endswith("/"): |
| 389 | + continue |
| 390 | + # Only count files in the datasets/ folder |
| 391 | + if name.startswith("datasets/"): |
| 392 | + dataset_files.append(name) |
| 393 | + |
| 394 | + return dataset_files |
0 commit comments