Skip to content

Commit f4c37dd

Browse files
fix(workflow): set modification date when deriving a plan (#3304)
* fix(workflow): set modification date when deriving a plan * fix(graph): various v10 fixes
1 parent 6667779 commit f4c37dd

File tree

7 files changed

+118
-30
lines changed

7 files changed

+118
-30
lines changed

renku/command/checks/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from .project import check_project_id_group
3131
from .storage import check_lfs_info
3232
from .validate_shacl import check_datasets_structure, check_project_structure
33-
from .workflow import check_activity_catalog
33+
from .workflow import check_activity_catalog, check_modification_date
3434

3535
# Checks will be executed in the order as they are listed in __all__. They are mostly used in ``doctor`` command to
3636
# inspect broken things. The order of operations matters when fixing issues, so, don't sort this list.
@@ -48,4 +48,5 @@
4848
"check_missing_files",
4949
"check_project_id_group",
5050
"check_project_structure",
51+
"check_modification_date",
5152
)

renku/command/checks/workflow.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
# limitations under the License.
1818
"""Checks needed to determine integrity of workflows."""
1919

20-
from typing import Optional, Tuple
20+
from typing import List, Optional, Tuple, cast
2121

22+
from renku.command.command_builder import inject
2223
from renku.command.util import WARNING
24+
from renku.core.interface.plan_gateway import IPlanGateway
2325
from renku.core.util import communication
2426
from renku.domain_model.project_context import project_context
27+
from renku.domain_model.workflow.plan import AbstractPlan
2528
from renku.infrastructure.gateway.activity_gateway import reindex_catalog
2629

2730

@@ -58,3 +61,67 @@ def check_activity_catalog(fix, force, **_) -> Tuple[bool, Optional[str]]:
5861
communication.info("Workflow metadata was rebuilt")
5962

6063
return True, None
64+
65+
66+
@inject.autoparams("plan_gateway")
67+
def check_modification_date(fix, plan_gateway: IPlanGateway, **_) -> Tuple[bool, Optional[str]]:
68+
"""Check if all plans have modification date set for them.
69+
70+
Args:
71+
fix(bool): Whether to fix found issues.
72+
plan_gateway(IPlanGateway): Injected PlanGateway.
73+
_: keyword arguments.
74+
75+
Returns:
76+
Tuple[bool, Optional[str]]: Tuple of whether there are plans without modification date and a string of their IDs
77+
"""
78+
plans: List[AbstractPlan] = plan_gateway.get_all_plans()
79+
80+
to_be_processed = []
81+
for plan in plans:
82+
if not hasattr(plan, "date_modified") or plan.date_modified is None:
83+
to_be_processed.append(plan)
84+
85+
if not to_be_processed:
86+
return True, None
87+
if not fix:
88+
ids = [plan.id for plan in to_be_processed]
89+
message = (
90+
WARNING
91+
+ "The following workflows have incorrect modification date (use 'renku doctor --fix' to fix them).:\n\t"
92+
+ "\n\t".join(ids)
93+
)
94+
return False, message
95+
96+
fix_plan_dates(plans=to_be_processed, plan_gateway=plan_gateway)
97+
project_context.database.commit()
98+
communication.info("Workflow modification dates were fixed")
99+
100+
return True, None
101+
102+
103+
def fix_plan_dates(plans: List[AbstractPlan], plan_gateway):
104+
"""Set modification date on a list of plans and fix their creation date."""
105+
processed = set()
106+
# NOTE: switch creation date for modification date
107+
for tail in plans:
108+
to_be_processed: List[AbstractPlan] = []
109+
if tail not in processed:
110+
processed.add(tail)
111+
to_be_processed.append(tail)
112+
creation_date = tail.date_created
113+
plan = tail
114+
115+
while plan.is_derivation():
116+
plan = cast(AbstractPlan, plan_gateway.get_by_id(plan.derived_from))
117+
creation_date = plan.date_created
118+
if plan not in processed:
119+
processed.add(plan)
120+
to_be_processed.append(plan)
121+
122+
while to_be_processed:
123+
plan = to_be_processed.pop()
124+
plan.unfreeze()
125+
plan.date_modified = plan.date_created
126+
plan.date_created = creation_date
127+
plan.freeze()

renku/core/migration/m_0010__metadata_fixes.py

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import zstandard as zstd
2828

29+
from renku.command.checks.workflow import fix_plan_dates
2930
from renku.command.command_builder import inject
3031
from renku.core.interface.activity_gateway import IActivityGateway
3132
from renku.core.interface.dataset_gateway import IDatasetGateway
@@ -170,18 +171,26 @@ def migrate_remote_entity_ids():
170171
"""Change `remote-entity` to `remote-entities` in ids."""
171172
database = project_context.database
172173

173-
datasets: List[Dataset] = list(database["datasets"].values())
174-
175-
for dataset in datasets:
174+
def fix_dataset_files_based_on(dataset):
176175
changed = False
177-
for file in dataset.files:
176+
for file in dataset.dataset_files:
178177
if file.based_on is not None:
178+
file.based_on.id = file.based_on.id.replace("/remote-entity//", "/remote-entities/")
179179
file.based_on.id = file.based_on.id.replace("/remote-entity/", "/remote-entities/")
180180
changed = True
181181

182182
if changed:
183183
dataset._p_changed = True
184184

185+
datasets: List[Dataset] = list(database["datasets-provenance-tails"].values())
186+
187+
for dataset in datasets:
188+
fix_dataset_files_based_on(dataset)
189+
190+
while dataset.derived_from is not None:
191+
dataset = database.get_by_id(id=dataset.derived_from.url_id)
192+
fix_dataset_files_based_on(dataset)
193+
185194
database.commit()
186195

187196

@@ -257,25 +266,7 @@ def fix_plan_times(activity_gateway: IActivityGateway, plan_gateway: IPlanGatewa
257266
plan.date_created = activity_map[plan.id].started_at_time
258267
plan.freeze()
259268

260-
# NOTE: switch creation date for modification date
261-
for tail in plan_gateway.get_newest_plans_by_names(include_deleted=True).values():
262-
stack: List[AbstractPlan] = []
263-
stack.append(tail)
264-
creation_date = tail.date_created
265-
plan = tail
266-
267-
while plan.is_derivation():
268-
plan = cast(AbstractPlan, plan_gateway.get_by_id(plan.derived_from))
269-
creation_date = plan.date_created
270-
stack.append(plan)
271-
272-
while stack:
273-
plan = stack.pop()
274-
plan.unfreeze()
275-
plan.date_modified = plan.date_created
276-
plan.date_created = creation_date
277-
plan.freeze
278-
269+
fix_plan_dates(plans=plans, plan_gateway=plan_gateway)
279270
database.commit()
280271

281272

@@ -293,7 +284,9 @@ def fix_dataset_date_modified(dataset_gateway: IDatasetGateway):
293284
modification_date = dataset.date_removed or dataset.date_created
294285

295286
if modification_date is not None:
296-
assert modification_date <= previous_modification_date
287+
# NOTE: This happened in a project due to a timezone change
288+
if modification_date > previous_modification_date:
289+
modification_date = previous_modification_date
297290
dataset.unfreeze()
298291
dataset.date_modified = modification_date
299292
dataset.freeze()

renku/domain_model/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ def __init__(self, *, checksum: str, id: Optional[str] = None, path: Union[Path,
213213
def generate_id(checksum: str, path: Union[Path, str], url: str) -> str:
214214
"""Generate an id."""
215215
parsed_url = urlparse(url)
216-
prefix = quote(posixpath.join(parsed_url.netloc, parsed_url.path))
217-
path = quote(str(path))
216+
prefix = quote(posixpath.join(parsed_url.netloc.strip("/"), parsed_url.path.strip("/")))
217+
path = quote(str(path).strip("/"))
218218
return f"/remote-entities/{prefix}/{checksum}/{path}"
219219

220220
def __eq__(self, other):

renku/domain_model/workflow/plan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def derive(self, creator: Optional[Person] = None) -> "Plan":
317317
"""Create a new ``Plan`` that is derived from self."""
318318
derived = copy.copy(self)
319319
derived.derived_from = self.id
320-
derived.date_created = local_now()
320+
derived.date_modified = local_now()
321321
derived.parameters = self.parameters.copy()
322322
derived.inputs = self.inputs.copy()
323323
derived.keywords = copy.deepcopy(self.keywords)

tests/cli/test_workflow.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import shutil
2626
import sys
2727
import tempfile
28+
import time
2829
import uuid
2930
from pathlib import Path
3031

@@ -36,6 +37,7 @@
3637
from renku.core.plugin.provider import available_workflow_providers
3738
from renku.core.util.git import with_commit
3839
from renku.core.util.yaml import write_yaml
40+
from renku.domain_model.workflow.plan import Plan
3941
from renku.infrastructure.database import Database
4042
from renku.infrastructure.gateway.activity_gateway import ActivityGateway
4143
from renku.infrastructure.gateway.plan_gateway import PlanGateway
@@ -428,17 +430,21 @@ def _get_plan_id(output):
428430
database = Database.from_path(project.database_path)
429431
test_plan = database["plans-by-name"][workflow_name]
430432

433+
time.sleep(1)
434+
431435
cmd = ["workflow", "edit", workflow_name, "--name", "first"]
432436
result = runner.invoke(cli, cmd)
433437
assert 0 == result.exit_code, format_result_exception(result)
434438

435439
workflow_name = "first"
436440
database = Database.from_path(project.database_path)
437-
first_plan = database["plans-by-name"]["first"]
441+
first_plan: Plan = database["plans-by-name"]["first"]
438442

439443
assert first_plan
440444
assert first_plan.name == "first"
441445
assert first_plan.derived_from == test_plan.id
446+
assert first_plan.date_created == test_plan.date_created
447+
assert (first_plan.date_modified - first_plan.date_created).total_seconds() >= 1
442448

443449
cmd = ["workflow", "edit", workflow_name, "--description", "Test workflow"]
444450
result = runner.invoke(cli, cmd)

tests/core/test_plan.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
# See the License for the specific language governing permissions and
1717
# limitations under the License.
1818
"""Renku plan management tests."""
19+
from datetime import datetime
1920

2021
import pytest
2122

23+
from renku.command.checks import check_modification_date
2224
from renku.core import errors
2325
from renku.core.workflow.plan import (
2426
get_activities,
@@ -160,3 +162,22 @@ def test_get_activities(project_with_injection):
160162
plan_activities = set(get_activities(plan))
161163

162164
assert set(activities[0:5]) == plan_activities
165+
166+
167+
def test_modification_date_fix(project_with_injection):
168+
"""Check that plans without modification date are fixed."""
169+
_, _, plan, _, _, unrelated = create_dummy_plans()
170+
171+
date_created = plan.date_created
172+
dummy_date = datetime(2023, 2, 8, 0, 42, 0)
173+
174+
# Remove change modification and creation dates on some plans
175+
plan.date_created = dummy_date
176+
del plan.date_modified
177+
unrelated.date_modified = None
178+
179+
check_modification_date(fix=True)
180+
181+
assert dummy_date == plan.date_modified
182+
assert unrelated.date_created == unrelated.date_modified
183+
assert date_created == plan.date_created

0 commit comments

Comments
 (0)