Skip to content

Commit df89da5

Browse files
committed
Merge branch 'develop' into release/v1.2.0
2 parents 465906d + 54f5abe commit df89da5

File tree

6 files changed

+81
-39
lines changed

6 files changed

+81
-39
lines changed

renku/core/dataset/dataset_add.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,9 @@ def _create_destination_directory(
248248
# what will be its name
249249
dataset_datadir.mkdir(parents=True, exist_ok=True)
250250

251-
destination = destination or Path(".")
252-
destination = cast(Path, get_relative_path(destination, base=dataset_datadir, strict=True))
253-
destination = dataset_datadir / destination
254-
return cast(Path, destination)
251+
destination = destination or ""
252+
relative_path = cast(str, get_relative_path(destination, base=dataset_datadir, strict=True))
253+
return dataset_datadir / relative_path
255254

256255

257256
def _check_ignored_files(client: "LocalClient", files_to_commit: Set[str], files: List[Dict]):
@@ -365,7 +364,7 @@ def check_sources_are_within_remote_repo():
365364
if not sources:
366365
return
367366
for source in sources:
368-
if get_relative_path(path=source, base=repository.path) is None:
367+
if not is_subpath(path=source, base=repository.path):
369368
raise errors.ParameterError(f"Path '{source}' is not within the repository")
370369

371370
def get_paths_from_remote_repo() -> Set[Path]:

renku/core/util/os.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ def get_safe_relative_path(path: Union[Path, str], base: Union[Path, str]) -> Pa
5757
raise ValueError(f"Path '{path}' is not with base directory '{base}'")
5858

5959

60-
def get_relative_path(path: Union[Path, str], base: Union[Path, str], strict: bool = False) -> Optional[Path]:
60+
def get_relative_path(path: Union[Path, str], base: Union[Path, str], strict: bool = False) -> Optional[str]:
6161
"""Return a relative path to the base if path is within base without resolving symlinks."""
6262
try:
6363
absolute_path = get_absolute_path(path=path, base=base)
64-
return Path(absolute_path).relative_to(base)
64+
return str(Path(absolute_path).relative_to(base))
6565
except ValueError:
6666
if strict:
6767
raise errors.ParameterError(f"File {path} is not within path {base}")
@@ -83,7 +83,7 @@ def get_relative_paths(base: Union[Path, str], paths: Sequence[Union[Path, str]]
8383
if relative_path is None:
8484
raise errors.ParameterError(f"Path '{path}' is not within base path '{base}'")
8585

86-
relative_paths.append(str(relative_path))
86+
relative_paths.append(relative_path)
8787

8888
return relative_paths
8989

renku/core/workflow/plan_factory.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from contextlib import contextmanager
2525
from itertools import chain
2626
from pathlib import Path
27-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
27+
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
2828

2929
import click
3030
import yaml
@@ -36,7 +36,7 @@
3636
from renku.core.management import RENKU_HOME
3737
from renku.core.util.git import is_path_safe
3838
from renku.core.util.metadata import is_external_file
39-
from renku.core.util.os import get_relative_path
39+
from renku.core.util.os import get_absolute_path, get_relative_path, is_subpath
4040
from renku.core.workflow.types import PATH_OBJECTS, Directory, File
4141
from renku.domain_model.datastructures import DirectoryTree
4242
from renku.domain_model.workflow.parameter import (
@@ -63,8 +63,8 @@ class PlanFactory:
6363
def __init__(
6464
self,
6565
command_line: str,
66-
explicit_inputs: Optional[List[Tuple[str, Optional[str]]]] = None,
67-
explicit_outputs: Optional[List[Tuple[str, Optional[str]]]] = None,
66+
explicit_inputs: Optional[List[Tuple[str, str]]] = None,
67+
explicit_outputs: Optional[List[Tuple[str, str]]] = None,
6868
explicit_parameters: Optional[List[Tuple[str, Optional[str]]]] = None,
6969
directory: Optional[str] = None,
7070
working_dir: Optional[str] = None,
@@ -102,11 +102,11 @@ def __init__(
102102

103103
self.success_codes = success_codes or []
104104

105-
self.explicit_inputs = (
106-
[(Path(os.path.abspath(path)), name) for path, name in explicit_inputs] if explicit_inputs else []
105+
self.explicit_inputs: List[Tuple[str, str]] = (
106+
[(get_absolute_path(path), name) for path, name in explicit_inputs] if explicit_inputs else []
107107
)
108-
self.explicit_outputs = (
109-
[(Path(os.path.abspath(path)), name) for path, name in explicit_outputs] if explicit_outputs else []
108+
self.explicit_outputs: List[Tuple[str, str]] = (
109+
[(get_absolute_path(path), name) for path, name in explicit_outputs] if explicit_outputs else []
110110
)
111111
self.explicit_parameters = explicit_parameters if explicit_parameters else []
112112

@@ -146,19 +146,16 @@ def _is_ignored_path(candidate: Union[Path, str], ignored_list: Set[str] = None)
146146
"""Return True if the path is in ignored list."""
147147
return ignored_list is not None and str(candidate) in ignored_list
148148

149-
def _resolve_existing_subpath(self, candidate) -> Optional[Path]:
149+
def _resolve_existing_subpath(self, candidate: Union[Path, str]) -> Optional[Path]:
150150
"""Return a path instance if it exists in the project's directory."""
151-
candidate = Path(candidate)
152-
153-
if not candidate.is_absolute():
154-
candidate = self.directory / candidate
151+
candidate = self.directory / candidate if not os.path.isabs(candidate) else Path(candidate)
155152

156153
if candidate.exists() or candidate.is_symlink():
157154
path = candidate.resolve()
158155

159-
# NOTE: If relative_path is None then it's is either an external file or an absolute path (e.g. /bin/bash)
160-
relative_path = get_relative_path(path=path, base=self.working_dir)
161-
if relative_path is not None:
156+
# NOTE: If resolved path is not within the project then it's is either an external file or an absolute path
157+
# (e.g. /bin/bash)
158+
if is_subpath(path, base=self.working_dir):
162159
return path
163160
elif is_external_file(path=candidate, client_path=self.working_dir):
164161
return Path(os.path.abspath(candidate))
@@ -324,8 +321,7 @@ def _check_potential_output_directory(
324321
) -> Set[Tuple[str, Optional[str]]]:
325322
"""Check an input/parameter for being a potential output directory."""
326323
subpaths = {str(input_path / path) for path in tree.get(input_path, default=[])}
327-
absolute_path = os.path.abspath(input_path)
328-
if all(Path(absolute_path) != path for path, _ in self.explicit_outputs):
324+
if not self._is_explicit(input_path, self.explicit_outputs):
329325
content = {str(path) for path in input_path.rglob("*") if not path.is_dir() and path.name != ".gitkeep"}
330326
preexisting_paths = content - subpaths
331327
if preexisting_paths:
@@ -371,6 +367,7 @@ def get_stream_mapping_for_value(self, value: Any):
371367
"""Return a stream mapping if value is a path mapped to a stream."""
372368
if self.stdin and self.stdin == value:
373369
return MappedIOStream(id=MappedIOStream.generate_id("stdin"), stream_type="stdin")
370+
374371
if self.stdout and self.stdout == value:
375372
return MappedIOStream(id=MappedIOStream.generate_id("stdout"), stream_type="stdout")
376373
if self.stderr and self.stderr == value:
@@ -386,7 +383,7 @@ def add_command_input(
386383
encoding_format: Optional[List[str]] = None,
387384
):
388385
"""Create a CommandInput."""
389-
if self.no_input_detection and all(Path(default_value).resolve() != path for path, _ in self.explicit_inputs):
386+
if self.no_input_detection and not self._is_explicit(default_value, self.explicit_inputs):
390387
return
391388

392389
mapped_stream = self.get_stream_mapping_for_value(default_value)
@@ -420,7 +417,7 @@ def add_command_output(
420417
mapped_to: Optional[MappedIOStream] = None,
421418
):
422419
"""Create a CommandOutput."""
423-
if self.no_output_detection and all(Path(default_value).resolve() != path for path, _ in self.explicit_outputs):
420+
if self.no_output_detection and not self._is_explicit(default_value, self.explicit_outputs):
424421
return
425422

426423
create_folder = False
@@ -478,7 +475,7 @@ def add_command_output_from_parameter(self, parameter: CommandParameter, name):
478475
"""Create a CommandOutput from a parameter."""
479476
self.parameters.remove(parameter)
480477
value = Path(self._path_relative_to_root(parameter.default_value))
481-
encoding_format = [DIRECTORY_MIME_TYPE] if value.resolve().is_dir() else self._get_mimetype(value)
478+
encoding_format = [DIRECTORY_MIME_TYPE] if value.is_dir() else self._get_mimetype(value)
482479
self.add_command_output(
483480
default_value=str(value),
484481
prefix=parameter.prefix,
@@ -512,8 +509,8 @@ def add_explicit_inputs(self):
512509

513510
for explicit_input, name in self.explicit_inputs:
514511
try:
515-
relative_explicit_input = str(explicit_input.relative_to(self.working_dir))
516-
except ValueError:
512+
relative_explicit_input = get_relative_path(explicit_input, base=self.working_dir, strict=True)
513+
except errors.ParameterError:
517514
raise errors.UsageError(
518515
"The input file or directory is not in the repository."
519516
"\n\n\t" + click.style(str(explicit_input), fg="yellow") + "\n\n"
@@ -611,11 +608,15 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):
611608
candidates |= {(o.b_path, None) for o in repository.unstaged_changes if not o.deleted}
612609

613610
# Filter out explicit outputs
614-
explicit_output_paths = {str(path.relative_to(self.working_dir)) for path, _ in self.explicit_outputs}
611+
explicit_output_paths = {
612+
str(Path(path).relative_to(self.working_dir)) for path, _ in self.explicit_outputs
613+
}
615614
candidates = {c for c in candidates if c[0] not in explicit_output_paths}
616615

617616
# Include explicit outputs
618-
candidates |= {(str(path.relative_to(self.working_dir)), name) for path, name in self.explicit_outputs}
617+
candidates |= {
618+
(str(Path(path).relative_to(self.working_dir)), name) for path, name in self.explicit_outputs
619+
}
619620

620621
candidates = {(path, name) for path, name in candidates if is_path_safe(path)}
621622

@@ -626,7 +627,7 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):
626627
if (
627628
stream
628629
and all(stream != path for path, _ in candidates)
629-
and (Path(os.path.abspath(stream)) != path for path, _ in self.explicit_outputs)
630+
and not self._is_explicit(stream, self.explicit_outputs)
630631
):
631632
unmodified.add(stream)
632633
elif stream:
@@ -652,7 +653,8 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):
652653

653654
def _path_relative_to_root(self, path) -> str:
654655
"""Make a potentially relative path in a subdirectory relative to the root of the repository."""
655-
return str((self.directory / path).resolve().relative_to(self.working_dir))
656+
absolute_path = get_absolute_path(path, base=self.directory)
657+
return cast(str, get_relative_path(absolute_path, base=self.working_dir, strict=True))
656658

657659
def _include_indirect_parameters(self):
658660
run_parameters = read_indirect_parameters(self.working_dir)
@@ -668,7 +670,7 @@ def add_indirect_inputs(self):
668670

669671
for name, indirect_input in read_files_list(indirect_inputs_list).items():
670672
# treat indirect inputs like explicit inputs
671-
path = Path(os.path.abspath(indirect_input))
673+
path = get_absolute_path(indirect_input)
672674
self.explicit_inputs.append((path, name))
673675

674676
# add new explicit inputs (if any) to inputs
@@ -680,14 +682,19 @@ def add_indirect_outputs(self):
680682

681683
for name, indirect_output in read_files_list(indirect_outputs_list).items():
682684
# treat indirect outputs like explicit outputs
683-
path = Path(os.path.abspath(indirect_output))
685+
path = get_absolute_path(indirect_output)
684686
self.explicit_outputs.append((path, name))
685687

686688
def iter_input_files(self, basedir):
687689
"""Yield tuples with input id and path."""
688690
for input_ in self.inputs:
689691
yield input_.id, os.path.normpath(os.path.join(basedir, input_.default_value))
690692

693+
@staticmethod
694+
def _is_explicit(path: Union[Path, str], explicits_collection: List[Tuple[str, str]]) -> bool:
695+
absolute_path = get_absolute_path(path)
696+
return any(absolute_path == path for path, _ in explicits_collection)
697+
691698
@inject.autoparams("project_gateway")
692699
def to_plan(
693700
self,

renku/data/shacl_shape.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@
152152
},
153153
{
154154
"sh:pattern": "mailto:[^@\\s]+@\\S+\\.\\S+"
155+
},
156+
{
157+
"sh:pattern": "http(s)?://[\\S]*orcid.org/[0-9-]+"
155158
}
156159
]
157160
},
@@ -370,6 +373,9 @@
370373
},
371374
{
372375
"sh:pattern": "mailto:[^@\\s]+@\\S+\\.\\S+"
376+
},
377+
{
378+
"sh:pattern": "http(s)?://[\\S]*orcid.org/[0-9-]+"
373379
}
374380
]
375381
},

tests/cli/test_run.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def test_run_metadata(renku_cli, runner, client, client_database_injection_manag
120120
assert 0 == result.exit_code, format_result_exception(result)
121121

122122

123-
def test_run_external_file(renku_cli, runner, client, client_database_injection_manager, tmpdir):
124-
"""Test run with workflow metadata."""
123+
def test_run_with_outside_files(renku_cli, runner, client, client_database_injection_manager, tmpdir):
124+
"""Test run with files that are outside the project."""
125125

126126
external_file = tmpdir.join("file_1")
127127
external_file.write(str(1))
@@ -265,3 +265,15 @@ def test_run_prints_plan_when_stderr_redirected(split_runner, client):
265265
assert 0 == result.exit_code, format_result_exception(result)
266266
assert "Name: echo-command" in (client.path / "output").read_text()
267267
assert "Name:" not in result.output
268+
269+
270+
def test_run_with_external_files(split_runner, client, directory_tree):
271+
"""Test run commands that use external files."""
272+
assert 0 == split_runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code
273+
274+
path = client.path / "data" / "my-dataset" / "directory_tree" / "file1"
275+
276+
result = split_runner.invoke(cli, ["run", "tail", path], stdout="output")
277+
278+
assert 0 == result.exit_code, format_result_exception(result)
279+
assert "file1" in (client.path / "output").read_text()

tests/cli/test_update.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,3 +490,21 @@ def test_update_with_execute(runner, client, renku_cli, client_database_injectio
490490

491491
assert "content_aeven more modified\n" == (client.path / output1).read_text()
492492
assert "content_beven more modified\n" == (client.path / output2).read_text()
493+
494+
495+
def test_update_with_external_files(split_runner, client, directory_tree):
496+
"""Test update commands that use external files."""
497+
assert 0 == split_runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code
498+
499+
path = client.path / "data" / "my-dataset" / "directory_tree" / "file1"
500+
501+
assert 0 == split_runner.invoke(cli, ["run", "tail", path], stdout="output").exit_code
502+
503+
(directory_tree / "file1").write_text("updated file1")
504+
505+
assert 0 == split_runner.invoke(cli, ["dataset", "update", "--all"]).exit_code
506+
507+
result = split_runner.invoke(cli, ["update", "--all"])
508+
509+
assert 0 == result.exit_code, format_result_exception(result)
510+
assert "updated file1" in (client.path / "output").read_text()

0 commit comments

Comments
 (0)