Skip to content

Commit 038c114

Browse files
authored
hotfix: release v1.7.1 (#3090)
* feat(core): add existing data directory files to dataset on creation
1 parent 8358a0e commit 038c114

File tree

9 files changed

+99
-13
lines changed

9 files changed

+99
-13
lines changed

CHANGES.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,21 @@
1818
Changes
1919
=======
2020

21+
`1.7.1 <https://github.com/SwissDataScienceCenter/renku-python/compare/v1.7.0...v1.7.1>`__ (2022-09-06)
22+
-------------------------------------------------------------------------------------------------------
23+
24+
Bug Fixes
25+
~~~~~~~~~
26+
27+
- **cli:** fix bug with adding file to dataset that's already in its data directory
28+
(`#3090 <https://github.com/SwissDataScienceCenter/renku-python/pull/3090>`__)
29+
30+
Features
31+
~~~~~~~~
32+
33+
- **cli:** add existing data directory files to dataset on creation
34+
(`#3090 <https://github.com/SwissDataScienceCenter/renku-python/pull/3090>`__)
35+
2136
`1.7.0 <https://github.com/SwissDataScienceCenter/renku-python/compare/v1.6.0...v1.7.0>`__ (2022-09-05)
2237
-------------------------------------------------------------------------------------------------------
2338

helm-chart/renku-core/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ appVersion: "1.0"
33
description: A Helm chart for Kubernetes
44
name: renku-core
55
icon: https://avatars0.githubusercontent.com/u/53332360?s=400&u=a4311d22842343604ef61a8c8a1e5793209a67e9&v=4
6-
version: 1.7.0
6+
version: 1.7.1

helm-chart/renku-core/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ versions:
111111
fullnameOverride: ""
112112
image:
113113
repository: renku/renku-core
114-
tag: "v1.7.0"
114+
tag: "v1.7.1"
115115
pullPolicy: IfNotPresent
116116
v8:
117117
name: v8

renku/core/dataset/dataset.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
delete_dataset_file,
5050
delete_path,
5151
get_absolute_path,
52+
get_files,
5253
get_safe_relative_path,
5354
hash_file,
5455
is_path_empty,
@@ -180,6 +181,8 @@ def create_dataset(
180181
if storage:
181182
provider = ProviderFactory.get_create_provider(uri=storage)
182183
provider.on_create(dataset=dataset)
184+
else:
185+
add_datadir_files_to_dataset(client, dataset)
183186

184187
if update_provenance:
185188
datasets_provenance.add_or_update(dataset)
@@ -794,11 +797,38 @@ def show_dataset(name: str, tag: Optional[str] = None):
794797
return DatasetDetailsJson().dump(dataset)
795798

796799

800+
def add_datadir_files_to_dataset(client: "LocalClient", dataset: Dataset) -> None:
801+
"""Add all files in a datasets data directory to the dataset.
802+
803+
Args:
804+
client(LocalClient): The ``LocalClient``.
805+
dataset(Dataset): The dataset to add data dir files to.
806+
"""
807+
datadir = get_safe_relative_path(dataset.get_datadir(), client.path)
808+
809+
if datadir.exists():
810+
# NOTE: Add existing files to dataset
811+
dataset_files: List[DatasetFile] = []
812+
files: List[Path] = []
813+
for file in get_files(datadir):
814+
files.append(file)
815+
dataset_files.append(DatasetFile.from_path(client=client, path=file, source=file))
816+
817+
if not dataset_files:
818+
return
819+
820+
if client.check_external_storage():
821+
client.track_paths_in_storage(*files)
822+
client.repository.add(*files)
823+
824+
dataset.add_or_update_files(dataset_files)
825+
826+
797827
def set_dataset_images(client: "LocalClient", dataset: Dataset, images: Optional[List[ImageRequestModel]]):
798828
"""Set a dataset's images.
799829
800830
Args:
801-
client("LocalClient"): The ``LocalClient``.
831+
client(LocalClient): The ``LocalClient``.
802832
dataset(Dataset): The dataset to set images on.
803833
images(List[ImageRequestModel]): The images to set.
804834
@@ -1238,7 +1268,9 @@ def pull_external_data(
12381268
"""Pull/copy data for an external storage to a dataset's data directory or a specified location.
12391269
12401270
Args:
1241-
name(str): Name of the dataset
1271+
client_dispatcher(IClientDispatcher): The client dispatcher.
1272+
storage_factory(IStorageFactory): The storage factory.
1273+
name(str): Name of the dataset.
12421274
location(Optional[Path]): A directory to copy data to (Default value = None).
12431275
"""
12441276
client = client_dispatcher.current_client

renku/core/dataset/dataset_add.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from renku.core.util.dataset import check_url
3636
from renku.core.util.dispatcher import get_client, get_database
3737
from renku.core.util.git import get_git_user
38-
from renku.core.util.os import delete_dataset_file, get_relative_path
38+
from renku.core.util.os import delete_dataset_file, get_files, get_relative_path
3939
from renku.domain_model.dataset import Dataset, DatasetFile
4040

4141
if TYPE_CHECKING:
@@ -83,6 +83,12 @@ def add_to_dataset(
8383

8484
client.check_external_storage() # TODO: This is not required for external storages
8585

86+
datadir = cast(Path, client.path / dataset.get_datadir())
87+
if create and datadir.exists():
88+
# NOTE: Add datadir to paths to add missing files on create
89+
for file in get_files(datadir):
90+
urls.append(str(file))
91+
8692
files = _download_files(
8793
client=client,
8894
urls=urls,

renku/core/dataset/providers/local.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from renku.core.dataset.providers.api import ExporterApi, ProviderApi, ProviderPriority
2828
from renku.core.util import communication
2929
from renku.core.util.dataset import check_url
30-
from renku.core.util.os import get_absolute_path, is_path_empty
30+
from renku.core.util.os import get_absolute_path, is_path_empty, is_subpath
3131

3232
if TYPE_CHECKING:
3333
from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter
@@ -166,6 +166,7 @@ def get_destination_root():
166166

167167
def get_metadata(src: Path) -> DatasetAddMetadata:
168168
is_tracked = client.repository.contains(src)
169+
in_datadir = is_subpath(src, absolute_dataset_data_dir)
169170

170171
relative_path = src.relative_to(source_root)
171172
dst = destination_root / relative_path
@@ -175,12 +176,14 @@ def get_metadata(src: Path) -> DatasetAddMetadata:
175176

176177
if not is_tracked and not external and action == DatasetAddAction.SYMLINK:
177178
# NOTE: we need to commit src if it is linked to and not external.
179+
if client.check_external_storage():
180+
client.track_paths_in_storage(src)
178181
client.repository.add(src)
179-
182+
source_url = os.path.relpath(src, client.path)
180183
return DatasetAddMetadata(
181-
entity_path=dst.relative_to(client.path),
182-
url=os.path.relpath(src, client.path),
183-
action=action,
184+
entity_path=Path(source_url) if in_datadir else dst.relative_to(client.path),
185+
url=source_url,
186+
action=DatasetAddAction.NONE if in_datadir else action,
184187
source=src,
185188
destination=dst,
186189
)

renku/ui/cli/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,7 @@ def add(name, urls, force, overwrite, create, destination, datadir, **kwargs):
818818
.with_communicator(communicator)
819819
.build()
820820
.execute(
821-
urls=urls,
821+
urls=list(urls),
822822
dataset_name=name,
823823
force=force,
824824
overwrite=overwrite,

renku/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
except ImportError:
2525
from importlib_metadata import distribution # type: ignore
2626

27-
__version__ = "1.7.0"
27+
__version__ = "1.7.1"
2828
__template_version__ = "0.3.1"
2929
__minimum_project_version__ = "1.7.0"
3030

tests/cli/test_datasets.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,27 @@ def test_datasets_create_clean_with_datadir(runner, project, client, load_datase
7070
assert not client.repository.is_dirty(untracked_files=True)
7171

7272

73+
def test_datasets_create_with_datadir_with_files(runner, project, client, load_dataset_with_injection):
74+
"""Test creating a dataset in clean repository."""
75+
76+
datadir = Path("my/data/dir")
77+
datadir.mkdir(parents=True, exist_ok=True)
78+
79+
file = datadir / "my_file"
80+
file.write_text("content")
81+
82+
result = runner.invoke(cli, ["dataset", "create", "--datadir", datadir, "dataset"])
83+
assert 0 == result.exit_code, format_result_exception(result)
84+
assert "OK" in result.output
85+
86+
dataset = load_dataset_with_injection("dataset", client)
87+
assert isinstance(dataset, Dataset)
88+
assert datadir == dataset.get_datadir(client)
89+
assert dataset.find_file(file)
90+
91+
assert not client.repository.is_dirty(untracked_files=True)
92+
93+
7394
def test_datasets_create_dirty(runner, project, client, load_dataset_with_injection):
7495
"""Test creating a dataset in a dirty repository."""
7596
(client.path / "untracked").write_text("untracked")
@@ -475,6 +496,13 @@ def test_add_and_create_dataset(
475496
assert 1 == result.exit_code
476497
assert 'Dataset "new-dataset" does not exist.' in result.output
477498

499+
existing_file = client.path / datadir / "myfolder" / "myfile"
500+
existing_file.parent.mkdir(parents=True, exist_ok=True)
501+
existing_file.write_text("content")
502+
503+
existing_folder = client.path / datadir / "my_other_folder"
504+
existing_folder.mkdir(parents=True, exist_ok=True)
505+
478506
# Add succeeds with --create
479507
result = runner.invoke(
480508
cli,
@@ -491,7 +519,9 @@ def test_add_and_create_dataset(
491519
assert os.stat(path2)
492520
assert os.stat(path3)
493521
dataset = load_dataset_with_injection("new-dataset", client)
494-
assert {os.path.relpath(p, client.path) for p in [path1, path2, path3]} == {f.entity.path for f in dataset.files}
522+
assert {os.path.relpath(p, client.path) for p in [path1, path2, path3, existing_file]} == {
523+
f.entity.path for f in dataset.files
524+
}
495525

496526
# Further, add with --create fails
497527
result = runner.invoke(cli, ["dataset", "add", "--copy", "--create", "new-dataset", str(directory_tree)])

0 commit comments

Comments
 (0)