Skip to content

Commit 3e7f220

Browse files
authored
Merge pull request #20916 from mvdbeek/from_work_dir_directory
Add support for picking ``from_work_dir`` directory
2 parents d50d35c + 8b8abd1 commit 3e7f220

File tree

17 files changed

+67
-15
lines changed

17 files changed

+67
-15
lines changed

client/src/api/schema/schema.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14258,6 +14258,11 @@ export interface components {
1425814258
* @description Parameter name. Used when referencing parameter in workflows.
1425914259
*/
1426014260
name?: string | null;
14261+
/**
14262+
* precreate_directory
14263+
* @default false
14264+
*/
14265+
precreate_directory: boolean | null;
1426114266
/**
1426214267
* @description discriminator enum property added by openapi-typescript
1426314268
* @enum {string}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"permissions": {
3+
"allow": [
4+
"Read(//Users/mvandenb/src/galaxy/lib/galaxy/tool_util_models/**)"
5+
],
6+
"deny": [],
7+
"ask": []
8+
}
9+
}

client/src/components/Tool/ToolSourceSchema.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

lib/galaxy/dependencies/pinned-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ proto-plus==1.26.1
147147
protobuf==6.32.1
148148
prov==1.5.1
149149
psutil==7.1.0
150-
pulsar-galaxy-lib==0.15.10
150+
pulsar-galaxy-lib==0.15.11
151151
pyasn1==0.6.1
152152
pyasn1-modules==0.4.2
153153
pycparser==2.23 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')

lib/galaxy/job_execution/setup.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,12 +288,18 @@ def compute_outputs(self) -> None:
288288
if da_false_path and not os.path.exists(da_false_path):
289289
with open(da_false_path, "ab"):
290290
pass
291+
real_path = da.dataset.get_file_name(sync_cache=False)
292+
false_extra_files_path = os.path.join(
293+
os.path.dirname(da_false_path or real_path), da.dataset.dataset.extra_files_path_name
294+
)
295+
291296
mutable = da.dataset.dataset.external_filename is None
292297
dataset_path = DatasetPath(
293298
da.dataset.dataset.id,
294-
da.dataset.get_file_name(sync_cache=False),
299+
real_path,
295300
false_path=da_false_path,
296301
mutable=mutable,
302+
false_extra_files_path=false_extra_files_path,
297303
)
298304
job_outputs.append(JobOutput(da.name, da.dataset, dataset_path))
299305

lib/galaxy/jobs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3015,7 +3015,7 @@ def setup_external_metadata(
30153015
def get_output_destination(self, output_path):
30163016
"""
30173017
Destination for outputs marked as from_work_dir. These must be copied with
3018-
the same basenme as the path for the ultimate output destination. This is
3018+
the same basename as the path for the ultimate output destination. This is
30193019
required in the task case so they can be merged.
30203020
"""
30213021
return os.path.join(self.working_directory, os.path.basename(output_path))

lib/galaxy/jobs/command_factory.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,13 +288,17 @@ def __handle_metadata(
288288

289289
def __copy_if_exists_command(work_dir_output):
290290
source_file, destination = work_dir_output
291+
is_directory = True if destination.endswith("_files") else False
292+
test_flag = "-d" if is_directory else "-f"
293+
recursive_flag = " -r" if is_directory else ""
294+
delete_destination_dir = f" rmdir {destination}; " if is_directory else ""
291295
if "?" in source_file or "*" in source_file:
292296
source_file = source_file.replace("*", '"*"').replace("?", '"?"')
293297
# Check if source and destination exist.
294298
# Users can purge outputs before the job completes,
295299
# in that case we don't want to copy the output to a purged path.
296300
# Static, non work_dir_output files are handled in job_finish code.
297-
return f'\nif [ -f "{source_file}" -a -f "{destination}" ] ; then cp "{source_file}" "{destination}" ; fi'
301+
return f'\nif [ {test_flag} "{source_file}" -a {test_flag} "{destination}" ] ; then{delete_destination_dir} cp{recursive_flag} "{source_file}" "{destination}" ; fi'
298302

299303

300304
class CommandsBuilder:

lib/galaxy/jobs/runners/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,13 @@ def get_work_dir_outputs(
373373
# Set up dict of dataset id --> output path; output path can be real or
374374
# false depending on outputs_to_working_directory
375375
output_paths = {}
376+
output_extra_paths = {}
376377
for dataset_path in job_wrapper.job_io.get_output_fnames():
377378
path = dataset_path.real_path
378379
if asbool(job_wrapper.get_destination_configuration("outputs_to_working_directory", False)):
379380
path = dataset_path.false_path
380381
output_paths[dataset_path.dataset_id] = path
382+
output_extra_paths[dataset_path.dataset_id] = dataset_path.false_extra_files_path
381383

382384
output_pairs = []
383385
# Walk job's output associations to find and use from_work_dir attributes.
@@ -397,9 +399,15 @@ def get_work_dir_outputs(
397399
# Copy from working dir to HDA.
398400
# TODO: move instead of copy to save time?
399401
source_file = os.path.join(tool_working_directory, hda_tool_output.from_work_dir)
400-
destination = job_wrapper.get_output_destination(output_paths[dataset.dataset_id])
402+
if hda_tool_output.precreate_directory:
403+
# precreate directory, allows using `-d` check to avoid copying data to purged outputs
404+
dataset.dataset.create_extra_files_path()
405+
output_path = output_extra_paths[dataset.dataset_id]
406+
os.makedirs(output_path, exist_ok=True)
407+
else:
408+
output_path = output_paths[dataset.dataset_id]
401409
if in_directory(source_file, tool_working_directory):
402-
output_pairs.append((source_file, destination))
410+
output_pairs.append((source_file, job_wrapper.get_output_destination(output_path)))
403411
else:
404412
# Security violation.
405413
log.exception(

lib/galaxy/tool_util/parser/output_objects.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class ToolOutputBase(Dictifiable):
5252
name: str
5353
label: Optional[str]
5454
hidden: bool
55+
precreate_directory: bool
5556

5657
def __init__(
5758
self,
@@ -70,6 +71,7 @@ def __init__(
7071
self.hidden = hidden
7172
self.collection = False
7273
self.from_expression = from_expression
74+
self.precreate_directory = False
7375

7476
def to_dict(self, view="collection", value_mapper=None, app=None):
7577
return super().to_dict(view=view, value_mapper=value_mapper)
@@ -131,6 +133,7 @@ def __init__(
131133
self.change_format: List[ChangeFormatModel] = []
132134
self.implicit = implicit
133135
self.from_work_dir: Optional[str] = None
136+
self.precreate_directory: bool = False
134137
self.dataset_collector_descriptions: List[DatasetCollectionDescription] = []
135138
self.default_identifier_source: Optional[str] = None
136139
self.count: Optional[int] = None
@@ -178,6 +181,7 @@ def to_model(self) -> ToolOutputDataModel:
178181
metadata_source=self.metadata_source,
179182
discover_datasets=[d.to_model() for d in self.dataset_collector_descriptions],
180183
from_work_dir=self.from_work_dir,
184+
precreate_directory=self.precreate_directory,
181185
)
182186

183187
@staticmethod
@@ -193,6 +197,7 @@ def from_dict(name: str, output_dict: Dict[str, Any], app: Optional[ToolOutputAc
193197
output.count = output_dict.get("count", 1)
194198
output.filters = []
195199
output.from_work_dir = output_dict.get("from_work_dir")
200+
output.precreate_directory = output_dict.get("precreate_directory") or False
196201
output.hidden = output_dict.get("hidden") or False
197202
# TODO: implement tool output action group fixes
198203
if app is not None:
@@ -223,6 +228,7 @@ def __init__(self, name, output_type, from_expression, label=None, filters=None,
223228
self.change_format = []
224229
self.implicit = False
225230
self.from_work_dir = None
231+
self.precreate_directory = False
226232

227233
self.dataset_collector_descriptions = []
228234

lib/galaxy/tool_util/parser/xml.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ def _parse_output(
589589
output.count = int(data_elem.get("count", 1))
590590
output.filters = data_elem.findall("filter")
591591
output.from_work_dir = data_elem.get("from_work_dir", None)
592+
output.precreate_directory = data_elem.get("precreate_directory") or False
592593
profile_version = Version(self.parse_profile())
593594
if output.from_work_dir and profile_version < Version("21.09"):
594595
# We started quoting from_work_dir outputs in 21.09.

0 commit comments

Comments
 (0)