Skip to content

Commit 7d2094e

Browse files
fix(workflow): failure when re-/executing a subset of workflow file steps (#3263)
* fix(workflow): failure when executing a subset of workflow file steps * fix(workflow): chaining error when re-executing workflow files
1 parent d81a487 commit 7d2094e

File tree

5 files changed

+82
-12
lines changed

5 files changed

+82
-12
lines changed

renku/core/workflow/model/workflow_file.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def to_command_parameter(self, plan_id: str, index: int) -> CommandParameter:
275275
description=self.description,
276276
id=CommandParameter.generate_id(plan_id=plan_id, name=self.name, postfix=postfix),
277277
name=self.name,
278+
name_set_by_user=self.name_set_by_user,
278279
position=self.position,
279280
postfix=postfix,
280281
prefix=self.prefix,
@@ -312,6 +313,7 @@ def to_command_input(self, plan_id: str, index: int) -> CommandInput:
312313
id=CommandInput.generate_id(plan_id=plan_id, name=self.name, postfix=postfix),
313314
mapped_to=MappedIOStream.from_str(self.mapped_to) if self.mapped_to else None,
314315
name=self.name,
316+
name_set_by_user=self.name_set_by_user,
315317
position=self.position,
316318
postfix=postfix,
317319
prefix=self.prefix,
@@ -336,6 +338,7 @@ def to_command_output(self, plan_id: str, index: int) -> CommandOutput:
336338
id=CommandOutput.generate_id(plan_id=plan_id, name=self.name, postfix=postfix),
337339
mapped_to=MappedIOStream.from_str(self.mapped_to) if self.mapped_to else None,
338340
name=self.name,
341+
name_set_by_user=self.name_set_by_user,
339342
position=self.position,
340343
postfix=postfix,
341344
prefix=self.prefix,

renku/core/workflow/workflow_file.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ def filter_steps(workflow: WorkflowFileCompositePlan, steps: List[str]) -> List[
108108
return [s for s in workflow.plans if s.unqualified_name in selected_steps]
109109

110110

111-
def get_all_workflow_file_inputs_and_outputs(workflow_file: WorkflowFile) -> List[str]:
111+
def get_workflow_file_inputs_and_outputs(workflow_file: WorkflowFile, steps: List[str]) -> List[str]:
112112
"""Return a list of all inputs and outputs that must be committed."""
113-
return [io.path for step in workflow_file.steps for io in itertools.chain(step.inputs, step.outputs) if io.persist]
113+
selected_steps = [s for s in workflow_file.steps if s.name in steps] if steps else workflow_file.steps
114+
115+
return [io.path for step in selected_steps for io in itertools.chain(step.inputs, step.outputs) if io.persist]

renku/domain_model/workflow/parameter.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,18 @@ def generate_id(stream_type: str) -> str:
6868
class CommandParameterBase:
6969
"""Represents a parameter for a Plan."""
7070

71+
# NOTE: This attribute is only used by workflow-file machinery to check if plans are the same or not. We need it,
72+
# because names are generated randomly when not set by users which make the comparison return incorrect result.
73+
name_set_by_user: bool = False
74+
7175
def __init__(
7276
self,
7377
*,
7478
default_value: Any,
7579
description: Optional[str],
7680
id: str,
7781
name: Optional[str],
82+
name_set_by_user: bool = False,
7883
position: Optional[int] = None,
7984
prefix: Optional[str] = None,
8085
derived_from: Optional[str] = None,
@@ -90,6 +95,7 @@ def __init__(
9095
self.derived_from: Optional[str] = derived_from
9196
# NOTE: ``postfix`` is used only to generate a nicer ``id`` for a parameter. Its value isn't used anywhere else.
9297
self.postfix: Optional[str] = postfix
98+
self.name_set_by_user: bool = name_set_by_user
9399

94100
if name is not None:
95101
self.name: str = name
@@ -132,10 +138,13 @@ def role(self) -> str:
132138
@staticmethod
133139
def _get_equality_attributes() -> List[str]:
134140
"""Return a list of attributes values that determine if instances are equal."""
135-
return ["name", "description", "default_value", "prefix", "position"]
141+
# NOTE: We treat name differently
142+
return ["description", "default_value", "prefix", "position"]
136143

137144
def is_equal_to(self, other) -> bool:
138145
"""Return if attributes that cause a change in the parameter, are the same."""
146+
if self.name_set_by_user != other.name_set_by_user or (self.name_set_by_user and self.name != other.name):
147+
return False
139148
return all(getattr(self, a) == getattr(other, a) for a in self._get_equality_attributes())
140149

141150
def to_argv(self, quote_string: bool = True) -> List[Any]:
@@ -193,6 +202,7 @@ def __init__(
193202
description: str = None,
194203
id: str,
195204
name: str = None,
205+
name_set_by_user: bool = False,
196206
position: Optional[int] = None,
197207
prefix: str = None,
198208
derived_from: str = None,
@@ -203,6 +213,7 @@ def __init__(
203213
description=description,
204214
id=id,
205215
name=name,
216+
name_set_by_user=name_set_by_user,
206217
position=position,
207218
prefix=prefix,
208219
derived_from=derived_from,
@@ -245,6 +256,7 @@ def __init__(
245256
id: str,
246257
mapped_to: Optional[MappedIOStream] = None,
247258
name: Optional[str] = None,
259+
name_set_by_user: bool = False,
248260
position: Optional[int] = None,
249261
prefix: Optional[str] = None,
250262
encoding_format: Optional[List[str]] = None,
@@ -258,6 +270,7 @@ def __init__(
258270
description=description,
259271
id=id,
260272
name=name,
273+
name_set_by_user=name_set_by_user,
261274
position=position,
262275
prefix=prefix,
263276
derived_from=derived_from,
@@ -323,6 +336,7 @@ def __init__(
323336
id: str,
324337
mapped_to: Optional[MappedIOStream] = None,
325338
name: Optional[str] = None,
339+
name_set_by_user: bool = False,
326340
position: Optional[int] = None,
327341
prefix: Optional[str] = None,
328342
encoding_format: Optional[List[str]] = None,
@@ -336,6 +350,7 @@ def __init__(
336350
description=description,
337351
id=id,
338352
name=name,
353+
name_set_by_user=name_set_by_user,
339354
position=position,
340355
prefix=prefix,
341356
derived_from=derived_from,
@@ -381,7 +396,8 @@ def is_equal_to(self, other) -> bool:
381396
@staticmethod
382397
def _get_equality_attributes() -> List[str]:
383398
"""Return a list of attributes values that determine if instances are equal."""
384-
return CommandParameterBase._get_equality_attributes() + ["encoding_format", "create_folder"]
399+
# NOTE: Don't include ``create_folder`` in comparison since its value is state-dependent
400+
return CommandParameterBase._get_equality_attributes() + ["encoding_format"]
385401

386402
def derive(self, plan_id: str) -> "CommandOutput":
387403
"""Create a new ``CommandOutput`` that is derived from self."""
@@ -400,10 +416,18 @@ def __init__(
400416
description: Optional[str] = None,
401417
id: str,
402418
name: Optional[str] = None,
419+
name_set_by_user: bool = False,
403420
mapped_parameters: List[CommandParameterBase],
404421
**kwargs,
405422
):
406-
super().__init__(default_value=default_value, description=description, id=id, name=name, **kwargs)
423+
super().__init__(
424+
default_value=default_value,
425+
description=description,
426+
id=id,
427+
name=name,
428+
name_set_by_user=name_set_by_user,
429+
**kwargs,
430+
)
407431

408432
self.mapped_parameters: List[CommandParameterBase] = mapped_parameters
409433

renku/ui/cli/run.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@
481481
from renku.core import errors
482482
from renku.core.plugin.workflow_file_parser import read_workflow_file
483483
from renku.core.util.os import is_subpath
484-
from renku.core.workflow.workflow_file import get_all_workflow_file_inputs_and_outputs
484+
from renku.core.workflow.workflow_file import get_workflow_file_inputs_and_outputs
485485
from renku.domain_model.project_context import project_context
486486
from renku.ui.cli.utils.callback import ClickCallback
487487
from renku.ui.cli.utils.plugins import available_workflow_providers
@@ -594,6 +594,7 @@ def is_workflow_file() -> bool:
594594
communicator.warn("All flags other than '--file', '--verbose', '--dry-run', and 'no-commit' are ignored")
595595

596596
path = command_line[0]
597+
steps = command_line[1:]
597598
no_commit = no_commit or dry_run
598599

599600
# NOTE: Read the workflow file to get list of generated files that should be committed
@@ -603,7 +604,9 @@ def is_workflow_file() -> bool:
603604
else:
604605
workflow_file = read_workflow_file(path=path, parser="renku")
605606
commit_only = (
606-
[path] + get_all_workflow_file_inputs_and_outputs(workflow_file) + [str(project_context.metadata_path)]
607+
[path]
608+
+ get_workflow_file_inputs_and_outputs(workflow_file=workflow_file, steps=steps)
609+
+ [str(project_context.metadata_path)]
607610
)
608611

609612
provider = provider or "local"
@@ -612,7 +615,7 @@ def is_workflow_file() -> bool:
612615
run_workflow_file_command(no_commit=no_commit, commit_only=commit_only)
613616
.with_communicator(communicator)
614617
.build()
615-
.execute(path=path, steps=command_line[1:], dry_run=dry_run, workflow_file=workflow_file, provider=provider)
618+
.execute(path=path, steps=steps, dry_run=dry_run, workflow_file=workflow_file, provider=provider)
616619
)
617620

618621
if dry_run:

tests/cli/test_workflow_file.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,15 @@ def test_dry_run_workflow_file(runner, workflow_file_project):
7171

7272
def test_run_workflow_file_with_selected_steps(runner, workflow_file_project):
7373
"""Test running a sub-set of steps of a workflow file."""
74-
result = runner.invoke(cli, ["run", "--dry-run", workflow_file_project.workflow_file, "head", "line-count"])
74+
result = runner.invoke(cli, ["run", workflow_file_project.workflow_file, "head", "tail"])
7575
assert 0 == result.exit_code, format_result_exception(result)
7676

77-
assert "Will execute step 'head': head $n $models $colors > $temporary-result" in result.output
78-
assert "Will execute step 'tail': tail $parameters intermediate > results/output.csv" not in result.output
79-
assert "Will execute step 'line-count': wc -l $models-and-colors > $output" in result.output
77+
assert "Executing step 'workflow-file.head':" in result.output
78+
assert "Executing step 'workflow-file.tail':" in result.output
79+
assert "Executing step 'workflow-file.line-count':" not in result.output
80+
81+
# Third step's output isn't created
82+
assert not (workflow_file_project.path / "results" / "output.csv.wc").exists()
8083

8184

8285
def test_run_workflow_file_with_no_commit(runner, workflow_file_project):
@@ -357,6 +360,41 @@ def test_workflow_file_plan_versioning(runner, workflow_file_project, with_injec
357360
assert line_count_3.derived_from is None
358361

359362

363+
def test_workflow_file_plan_versioning_with_selected_steps(runner, workflow_file_project, with_injection):
364+
"""Test plans are versioned correctly when executing subsets of steps."""
365+
result = runner.invoke(cli, ["run", workflow_file_project.workflow_file, "head", "tail"])
366+
assert 0 == result.exit_code, format_result_exception(result)
367+
time.sleep(1)
368+
369+
with with_injection():
370+
plan_gateway = PlanGateway()
371+
root_plan_1 = plan_gateway.get_by_name("workflow-file")
372+
head_1 = plan_gateway.get_by_name("workflow-file.head")
373+
tail_1 = plan_gateway.get_by_name("workflow-file.tail")
374+
line_count_1 = plan_gateway.get_by_name("workflow-file.line-count")
375+
376+
result = runner.invoke(cli, ["run", workflow_file_project.workflow_file])
377+
assert 0 == result.exit_code, format_result_exception(result)
378+
379+
time.sleep(1)
380+
381+
with with_injection():
382+
plan_gateway = PlanGateway()
383+
root_plan_2 = plan_gateway.get_by_name("workflow-file")
384+
head_2 = plan_gateway.get_by_name("workflow-file.head")
385+
tail_2 = plan_gateway.get_by_name("workflow-file.tail")
386+
line_count_2 = plan_gateway.get_by_name("workflow-file.line-count")
387+
388+
# Plan `line-count` wasn't executed in the first run
389+
assert line_count_1 is None
390+
assert line_count_2 is not None
391+
392+
# Everything else is the same
393+
assert root_plan_2.id == root_plan_1.id
394+
assert head_2.id == head_1.id
395+
assert tail_2.id == tail_1.id
396+
397+
360398
def test_duplicate_workflow_file_plan_name(runner, workflow_file_project):
361399
"""Test workflow file execution fails if a plan with the same name exists."""
362400
workflow_file_project.repository.add(all=True)

0 commit comments

Comments
 (0)