Skip to content

Commit bb50f86

Browse files
authored
feat(service): add workflow export endpoint (#3212)
1 parent dacddde commit bb50f86

File tree

21 files changed

+548
-121
lines changed

21 files changed

+548
-121
lines changed

docs/spelling_wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ Postgresql
172172
powerline
173173
pre
174174
prepend
175+
prepended
175176
prepending
176177
preprocessed
177178
preprocessing

poetry.lock

Lines changed: 76 additions & 53 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ importlib-resources = { version = ">=5.4.0,<5.10.0", python = "<3.9.0" }
8383
inject = "<4.4.0,>=4.3.0"
8484
isort = { version = "<5.10.2,>=5.3.2", optional = true }
8585
jinja2 = { version = ">=2.11.3,<3.1.3" }
86-
marshmallow = { version = ">=3.14.0,<3.18.0", optional = true }
86+
marshmallow = { version = ">=3.18.0", optional = true }
8787
marshmallow-oneofschema = { version=">=3.0.1,<4.0.0", optional = true }
8888
mypy = {version = ">=0.942,<1.0", optional = true}
8989
networkx = "<2.7,>=2.6.0"
@@ -123,7 +123,7 @@ rdflib = "<7.0,>=6.0.0"
123123
redis = { version = ">=3.5.3,<4.2.0", optional = true }
124124
renku-sphinx-theme = { version = ">=0.2.0", optional = true }
125125
requests = ">=2.23.0,<2.28.2"
126-
responses = { version = ">=0.7.0,<0.22.0", optional = true }
126+
responses = { version = ">=0.7.0,<=0.22.0", optional = true }
127127
rich = ">=9.3.0,<12.6.0"
128128
rq = { version = "==1.11.0", optional = true }
129129
rq-scheduler = { version = "==0.11.0", optional = true }
@@ -343,6 +343,7 @@ files = [
343343
"renku/**/*.py",
344344
"tests/**/*.py"
345345
]
346+
implicit_optional = true
346347

347348
[[tool.mypy.overrides]]
348349
module = [

renku/command/command_builder/command.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,8 @@ def command(self, operation: Callable):
365365
def working_directory(self, directory: str) -> "Command":
366366
"""Set the working directory for the command.
367367
368+
WARNING: Should not be used in the core service.
369+
368370
Args:
369371
directory(str): The working directory to work in.
370372

renku/core/plugin/dataset_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828

2929
@hookspec
30-
def dataset_provider() -> "Type[ProviderApi]":
30+
def dataset_provider() -> "Type[ProviderApi]": # type: ignore[empty-body]
3131
"""Plugin Hook for different dataset providers.
3232
3333
Returns:

renku/core/plugin/session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727

2828
@hookspec
29-
def session_provider() -> Tuple[ISessionProvider, str]:
29+
def session_provider() -> Tuple[ISessionProvider, str]: # type: ignore[empty-body]
3030
"""Plugin Hook for ``session`` sub-command.
3131
3232
Returns:

renku/core/plugin/workflow.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131

3232
@hookspec
33-
def workflow_format() -> Tuple[IWorkflowConverter, List[str]]: # type: ignore
33+
def workflow_format() -> Tuple[IWorkflowConverter, List[str]]: # type: ignore[empty-body]
3434
"""Plugin Hook for ``workflow export`` call.
3535
3636
Can be used to export renku workflows in different formats.
@@ -44,9 +44,14 @@ def workflow_format() -> Tuple[IWorkflowConverter, List[str]]: # type: ignore
4444

4545

4646
@hookspec(firstresult=True)
47-
def workflow_convert(
48-
workflow: Plan, basedir: Path, output: Optional[Path], output_format: Optional[str]
49-
) -> str: # type: ignore
47+
def workflow_convert( # type: ignore[empty-body]
48+
workflow: Plan,
49+
basedir: Path,
50+
output: Optional[Path],
51+
output_format: Optional[str],
52+
resolve_paths: bool,
53+
nest_workflows: bool,
54+
) -> str:
5055
"""Plugin Hook for ``workflow export`` call.
5156
5257
Can be used to export renku workflows in different formats.
@@ -82,7 +87,13 @@ class WorkflowConverterProtocol(Protocol):
8287
"""Typing protocol to specify type of the workflow converter hook."""
8388

8489
def __call__(
85-
self, workflow: Plan, basedir: Path, output: Optional[Path] = None, output_format: Optional[str] = None
90+
self,
91+
workflow: Plan,
92+
basedir: Path,
93+
output: Optional[Path] = None,
94+
output_format: Optional[str] = None,
95+
resolve_paths: Optional[bool] = None,
96+
nest_workflows: Optional[bool] = None,
8697
) -> str:
8798
"""Dummy method to let mypy know the type of the hook implementation."""
8899
raise NotImplementedError()

renku/core/util/git.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def get_full_repository_path(url: Optional[str]) -> str:
250250
Returns:
251251
The hostname plus path extracted from the URL.
252252
"""
253-
if not str:
253+
if str is None:
254254
return ""
255255

256256
parsed_url = parse_git_url(url)

renku/core/util/yaml.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ def write_yaml(path, data):
7272
yaml.dump(data, fp, default_flow_style=False, Dumper=Dumper)
7373

7474

75+
def dumps_yaml(data) -> str:
76+
"""Convert YAML data to a YAML string."""
77+
return yaml.dump(data)
78+
79+
7580
def load_yaml(data):
7681
"""Load YAML data and return its content as a dict."""
7782
return yaml.load(data, Loader=NoDatesSafeLoader) or {}

renku/core/workflow/converters/cwl.py

Lines changed: 86 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@
2121
import re
2222
import tempfile
2323
from pathlib import Path
24-
from typing import Any, Dict, Optional, Tuple, Union
24+
from typing import Any, Dict, List, Optional, Tuple, Union, cast
2525
from uuid import uuid4
2626

2727
import cwl_utils.parser.cwl_v1_2 as cwl
2828

2929
from renku.core import errors
3030
from renku.core.plugin import hookimpl
3131
from renku.core.plugin.provider import RENKU_ENV_PREFIX
32-
from renku.core.util.yaml import write_yaml
32+
from renku.core.util.yaml import dumps_yaml, write_yaml
3333
from renku.core.workflow.concrete_execution_graph import ExecutionGraph
3434
from renku.domain_model.workflow.composite_plan import CompositePlan
3535
from renku.domain_model.workflow.converters import IWorkflowConverter
@@ -94,38 +94,80 @@ def workflow_format(self):
9494

9595
@hookimpl
9696
def workflow_convert(
97-
self, workflow: Union[CompositePlan, Plan], basedir: Path, output: Optional[Path], output_format: Optional[str]
98-
):
99-
"""Converts the specified workflow to CWL format."""
97+
self,
98+
workflow: Union[CompositePlan, Plan],
99+
basedir: Path,
100+
output: Optional[Path],
101+
output_format: Optional[str],
102+
resolve_paths: Optional[bool],
103+
nest_workflows: Optional[bool],
104+
) -> str:
105+
"""Converts the specified workflow to CWL format.
106+
107+
Args:
108+
worflow(Union[CompositePlan, Plan]): The plan or composite plan to be converted to cwl.
109+
basedir(Path): The path of the base location used as a prefix for all workflow input and outputs.
110+
output(Optional[Path]): The file where the CWL specification should be saved,
111+
if None then no file is created.
112+
output_format(Optional[str]): Not used. Only YAML is generated, regardless of what is provided.
113+
resolve_paths(Optional[bool]): Whether to make all paths absolute and resolve all symlinks,
114+
True by default.
115+
nest_workflows(Optional[bool]): Whether nested CWL workflows should be used or each sub-workflow should be
116+
a separate file, False by default.
117+
118+
Returns:
119+
The contents of the CWL workflow as string. If nested workflows are used then only the parent
120+
specification is returned.
121+
"""
100122
filename = None
123+
124+
if resolve_paths is None:
125+
resolve_paths = True
126+
101127
if output:
102128
if output.is_dir():
103129
tmpdir = output
130+
filename = None
104131
else:
105132
tmpdir = output.parent
106133
filename = output
107134
else:
108135
tmpdir = Path(tempfile.mkdtemp())
109136

137+
cwl_workflow: Union[cwl.Workflow, CommandLineTool]
110138
if isinstance(workflow, CompositePlan):
111-
path = CWLExporter._convert_composite(
112-
workflow, tmpdir, basedir, filename=filename, output_format=output_format
113-
)
139+
cwl_workflow = CWLExporter._convert_composite(workflow, basedir, resolve_paths=resolve_paths)
140+
if nest_workflows:
141+
# INFO: There is only one parent workflow with all children embedded in it
142+
if cwl_workflow.requirements is None:
143+
cwl_workflow.requirements = []
144+
cwl_workflow.requirements.append(cwl.SubworkflowFeatureRequirement())
145+
else:
146+
# INFO: The parent composite worfklow references other workflow files,
147+
# write the child workflows in separate files and reference them in parent
148+
for step in cast(List[WorkflowStep], cwl_workflow.steps):
149+
step_filename = Path(f"{uuid4()}.cwl")
150+
step_path = (tmpdir / step_filename).resolve()
151+
write_yaml(step_path, step.run.save())
152+
step.run = str(step_path)
153+
if filename is None:
154+
filename = Path(f"parent_{uuid4()}.cwl")
114155
else:
115-
_, path = CWLExporter._convert_step(
116-
workflow, tmpdir, basedir, filename=filename, output_format=output_format
117-
)
156+
cwl_workflow = CWLExporter._convert_step(workflow, basedir, resolve_paths=resolve_paths)
157+
if filename is None:
158+
filename = Path(f"{uuid4()}.cwl")
118159

119-
return path.read_text()
160+
cwl_workflow_dict: Dict[str, Any] = cwl_workflow.save()
161+
path = (tmpdir / filename).resolve()
162+
write_yaml(path, cwl_workflow_dict)
163+
return dumps_yaml(cwl_workflow_dict)
120164

121165
@staticmethod
122166
def _sanitize_id(id):
123167
return re.sub(r"/|-", "_", id)
124168

125169
@staticmethod
126-
def _convert_composite(
127-
workflow: CompositePlan, tmpdir: Path, basedir: Path, filename: Optional[Path], output_format: Optional[str]
128-
):
170+
def _convert_composite(workflow: CompositePlan, basedir: Path, resolve_paths: bool) -> cwl.Workflow:
129171
"""Converts a composite plan to a CWL file."""
130172
inputs: Dict[str, str] = {}
131173
arguments = {}
@@ -145,10 +187,8 @@ def _convert_composite(
145187
import networkx as nx
146188

147189
for i, wf in enumerate(nx.topological_sort(graph.workflow_graph)):
148-
cwl_workflow, path = CWLExporter._convert_step(
149-
workflow=wf, tmpdir=tmpdir, basedir=basedir, filename=None, output_format=output_format
150-
)
151-
step = WorkflowStep(in_=[], out=[], run=str(path), id="step_{}".format(i))
190+
step_clitool = CWLExporter._convert_step(workflow=wf, basedir=basedir, resolve_paths=resolve_paths)
191+
step = WorkflowStep(in_=[], out=[], run=step_clitool, id="step_{}".format(i))
152192

153193
for input in wf.inputs:
154194
input_path = input.actual_value
@@ -192,11 +232,17 @@ def _convert_composite(
192232
# check types of paths and add as top level inputs/outputs
193233
for path, id_ in inputs.items():
194234
type_ = "Directory" if os.path.isdir(path) else "File"
235+
location = Path(path)
236+
if resolve_paths:
237+
location = location.resolve()
238+
location_str = str(location.as_uri())
239+
else:
240+
location_str = str(location)
195241
workflow_object.inputs.append(
196242
cwl.WorkflowInputParameter(
197243
id=id_,
198244
type=type_,
199-
default={"location": Path(path).resolve().as_uri(), "class": type_},
245+
default={"location": location_str, "class": type_},
200246
)
201247
)
202248

@@ -211,19 +257,12 @@ def _convert_composite(
211257
id="output_{}".format(index), outputSource="{}/{}".format(step_id, id_), type=type_
212258
)
213259
)
214-
if filename is None:
215-
filename = Path("parent_{}.cwl".format(uuid4()))
216260

217-
output = workflow_object.save()
218-
path = (tmpdir / filename).resolve()
219-
write_yaml(path, output)
220-
return path
261+
return workflow_object
221262

222263
@staticmethod
223-
def _convert_step(
224-
workflow: Plan, tmpdir: Path, basedir: Path, filename: Optional[Path], output_format: Optional[str]
225-
):
226-
"""Converts a single workflow step to a CWL file."""
264+
def _convert_step(workflow: Plan, basedir: Path, resolve_paths: bool) -> CommandLineTool:
265+
"""Converts a single workflow step to a CWL CommandLineTool."""
227266
stdin, stdout, stderr = None, None, None
228267

229268
inputs = list(workflow.inputs)
@@ -276,7 +315,7 @@ def _convert_step(
276315
tool_object.inputs.append(arg)
277316

278317
for input_ in inputs:
279-
tool_input = CWLExporter._convert_input(input_, basedir)
318+
tool_input = CWLExporter._convert_input(input_, basedir, resolve_paths=resolve_paths)
280319

281320
workdir_req.listing.append(
282321
cwl.Dirent(entry="$(inputs.{})".format(tool_input.id), entryname=input_.actual_value, writable=False)
@@ -299,12 +338,18 @@ def _convert_step(
299338
workdir_req.listing.append(
300339
cwl.Dirent(entry="$(inputs.input_renku_metadata)", entryname=".renku", writable=False)
301340
)
341+
location = basedir / ".renku"
342+
if resolve_paths:
343+
location = location.resolve()
344+
location_str = location.as_uri()
345+
else:
346+
location_str = str(location)
302347
tool_object.inputs.append(
303348
cwl.CommandInputParameter(
304349
id="input_renku_metadata",
305350
type="Directory",
306351
inputBinding=None,
307-
default={"location": (basedir / ".renku").resolve().as_uri(), "class": "Directory"},
352+
default={"location": location_str, "class": "Directory"},
308353
)
309354
)
310355

@@ -315,12 +360,7 @@ def _convert_step(
315360
if environment_variables:
316361
tool_object.requirements.append(cwl.EnvVarRequirement(environment_variables)) # type: ignore
317362

318-
output = tool_object.save()
319-
if filename is None:
320-
filename = Path("{}.cwl".format(uuid4()))
321-
path = (tmpdir / filename).resolve()
322-
write_yaml(path, output)
323-
return output, path
363+
return tool_object
324364

325365
@staticmethod
326366
def _convert_parameter(parameter: CommandParameter):
@@ -347,7 +387,7 @@ def _convert_parameter(parameter: CommandParameter):
347387
)
348388

349389
@staticmethod
350-
def _convert_input(input: CommandInput, basedir: Path):
390+
def _convert_input(input: CommandInput, basedir: Path, resolve_paths: bool):
351391
"""Converts an input to a CWL input."""
352392
type_ = (
353393
"Directory"
@@ -371,13 +411,19 @@ def _convert_input(input: CommandInput, basedir: Path):
371411
prefix = prefix[:-1]
372412
separate = True
373413

414+
location = basedir / input.actual_value
415+
if resolve_paths:
416+
location = location.resolve()
417+
location_str = location.as_uri()
418+
else:
419+
location_str = str(location)
374420
return cwl.CommandInputParameter(
375421
id=sanitized_id,
376422
type=type_,
377423
inputBinding=cwl.CommandLineBinding(position=position, prefix=prefix, separate=separate)
378424
if position or prefix
379425
else None,
380-
default={"location": (basedir / input.actual_value).resolve().as_uri(), "class": type_},
426+
default={"location": location_str, "class": type_},
381427
)
382428

383429
@staticmethod

0 commit comments

Comments
 (0)