37
37
from renku .core .dataset .request_model import ImageRequestModel
38
38
from renku .core .dataset .tag import get_dataset_by_tag , prompt_access_token , prompt_tag_selection
39
39
from renku .core .interface .dataset_gateway import IDatasetGateway
40
- from renku .core .interface .storage import IStorageFactory
41
40
from renku .core .storage import check_external_storage , pull_paths_from_storage , track_paths_in_storage
42
41
from renku .core .util import communication
43
42
from renku .core .util .datetime8601 import local_now
@@ -399,7 +398,7 @@ def export_dataset(name, provider_name, tag, **kwargs):
399
398
400
399
dataset : Optional [Dataset ] = datasets_provenance .get_by_name (name , strict = True , immutable = True )
401
400
402
- provider = ProviderFactory .from_name ( provider_name )
401
+ provider = ProviderFactory .get_export_provider ( provider_name = provider_name )
403
402
404
403
selected_tag = None
405
404
tags = datasets_provenance .get_all_tags (dataset ) # type: ignore
@@ -879,11 +878,7 @@ def update_dataset_custom_metadata(
879
878
880
879
881
880
@inject .autoparams ("dataset_gateway" )
882
- def move_files (
883
- dataset_gateway : IDatasetGateway ,
884
- files : Dict [Path , Path ],
885
- to_dataset_name : Optional [str ] = None ,
886
- ):
881
+ def move_files (dataset_gateway : IDatasetGateway , files : Dict [Path , Path ], to_dataset_name : Optional [str ] = None ):
887
882
"""Move files and their metadata from one or more datasets to a target dataset.
888
883
889
884
Args:
@@ -1222,13 +1217,11 @@ def should_include(filepath: Path) -> bool:
1222
1217
return sorted (records , key = lambda r : r .date_added )
1223
1218
1224
1219
1225
- @inject .autoparams ("storage_factory" )
1226
- def pull_external_data (name : str , storage_factory : IStorageFactory , location : Optional [Path ] = None ) -> None :
1220
+ def pull_external_data (name : str , location : Optional [Path ] = None ) -> None :
1227
1221
"""Pull/copy data for an external storage to a dataset's data directory or a specified location.
1228
1222
1229
1223
Args:
1230
1224
name(str): Name of the dataset
1231
- storage_factory(IStorageFactory):Injected storage factory.
1232
1225
location(Optional[Path]): A directory to copy data to (Default value = None).
1233
1226
"""
1234
1227
datasets_provenance = DatasetsProvenance ()
@@ -1256,28 +1249,25 @@ def pull_external_data(name: str, storage_factory: IStorageFactory, location: Op
1256
1249
create_symlinks = False
1257
1250
1258
1251
provider = ProviderFactory .get_pull_provider (uri = dataset .storage )
1252
+ storage = provider .get_storage ()
1259
1253
1260
- credentials = S3Credentials (provider )
1261
- prompt_for_credentials (credentials )
1262
-
1263
- storage = storage_factory .get_storage (provider = provider , credentials = credentials )
1264
1254
updated_files = []
1265
1255
1266
1256
for file in dataset .files :
1267
1257
path = Path (destination ) / file .entity .path
1268
1258
path .parent .mkdir (parents = True , exist_ok = True )
1269
1259
# NOTE: Don't check if destination exists. ``IStorage.copy`` won't copy a file if it exists and is not modified.
1270
1260
1271
- if not file .source :
1261
+ if not file .based_on :
1272
1262
raise errors .DatasetImportError (f"Dataset file doesn't have a URI: { file .entity .path } " )
1273
1263
1274
1264
with communication .busy (f"Copying { file .entity .path } ..." ):
1275
- storage .copy (file .source , path )
1265
+ storage .download (file .based_on . url , path )
1276
1266
1277
1267
# NOTE: Make files read-only since we don't support pushing data to the remote storage
1278
1268
os .chmod (path , 0o400 )
1279
1269
1280
- if file . based_on and not file .based_on .checksum :
1270
+ if not file .based_on .checksum :
1281
1271
md5_hash = hash_file (path , hash_type = "md5" ) or ""
1282
1272
file .based_on = RemoteEntity (checksum = md5_hash , url = file .based_on .url , path = file .based_on .path )
1283
1273
@@ -1313,20 +1303,13 @@ def read_dataset_data_location(dataset: Dataset) -> Optional[str]:
1313
1303
return get_value (section = "dataset-locations" , key = dataset .name , config_filter = ConfigFilter .LOCAL_ONLY )
1314
1304
1315
1305
1316
- @inject .autoparams ("storage_factory" )
1317
- def mount_external_storage (
1318
- name : str ,
1319
- existing : Optional [Path ],
1320
- yes : bool ,
1321
- storage_factory : IStorageFactory ,
1322
- ) -> None :
1306
+ def mount_external_storage (name : str , existing : Optional [Path ], yes : bool ) -> None :
1323
1307
"""Mount an external storage to a dataset's data directory.
1324
1308
1325
1309
Args:
1326
1310
name(str): Name of the dataset
1327
1311
existing(Optional[Path]): An existing mount point to use instead of actually mounting the external storage.
1328
1312
yes(bool): Don't prompt when removing non-empty dataset's data directory.
1329
- storage_factory(IStorageFactory): Injected storage factory.
1330
1313
"""
1331
1314
dataset , datadir = _get_dataset_with_external_storage (name = name )
1332
1315
@@ -1350,8 +1333,8 @@ def mount_external_storage(
1350
1333
provider = ProviderFactory .get_mount_provider (uri = dataset .storage )
1351
1334
credentials = S3Credentials (provider )
1352
1335
prompt_for_credentials (credentials )
1336
+ storage = provider .get_storage (credentials = credentials )
1353
1337
1354
- storage = storage_factory .get_storage (provider = provider , credentials = credentials )
1355
1338
with communication .busy (f"Mounting { provider .uri } " ):
1356
1339
storage .mount (datadir )
1357
1340
0 commit comments