36
36
from renku .core .dataset .providers import ProviderFactory
37
37
from renku .core .dataset .providers .models import ProviderDataset , ProviderDatasetFile
38
38
from renku .core .dataset .request_model import ImageRequestModel
39
- from renku .core .dataset .tag import add_dataset_tag , prompt_access_token , prompt_tag_selection
39
+ from renku .core .dataset .tag import add_dataset_tag , get_dataset_by_tag , prompt_access_token , prompt_tag_selection
40
40
from renku .core .interface .client_dispatcher import IClientDispatcher
41
41
from renku .core .interface .database_dispatcher import IDatabaseDispatcher
42
42
from renku .core .interface .dataset_gateway import IDatasetGateway
@@ -249,10 +249,11 @@ def edit_dataset(
249
249
return updated
250
250
251
251
252
- @inject .autoparams ()
252
+ @inject .autoparams ("client_dispatcher" )
253
253
def list_dataset_files (
254
254
client_dispatcher : IClientDispatcher ,
255
- datasets = None ,
255
+ datasets : List [str ] = None ,
256
+ tag : Optional [str ] = None ,
256
257
creators = None ,
257
258
include = None ,
258
259
exclude = None ,
@@ -261,19 +262,22 @@ def list_dataset_files(
261
262
262
263
Args:
263
264
client_dispatcher(IClientDispatcher): Injected client dispatcher.
264
- datasets: Datasets to list files for (Default value = None).
265
+ datasets(List[str]): Datasets to list files for (Default value = None).
266
+ tag(str): Tag to filter by (Default value = None).
265
267
creators: Creators to filter by (Default value = None).
266
268
include: Include filters for file paths (Default value = None).
267
269
exclude: Exclude filters for file paths (Default value = None).
268
270
269
271
Returns:
270
272
List[DynamicProxy]: Filtered dataset files.
271
273
"""
272
- from renku .command .format .dataset_files import get_lfs_file_sizes , get_lfs_tracking
274
+ from renku .command .format .dataset_files import get_lfs_tracking_and_file_sizes
273
275
274
276
client = client_dispatcher .current_client
275
277
276
- records = filter_dataset_files (names = datasets , creators = creators , include = include , exclude = exclude , immutable = True )
278
+ records = filter_dataset_files (
279
+ names = datasets , tag = tag , creators = creators , include = include , exclude = exclude , immutable = True
280
+ )
277
281
for record in records :
278
282
record .title = record .dataset .title
279
283
record .dataset_name = record .dataset .name
@@ -285,8 +289,7 @@ def list_dataset_files(
285
289
record .name = Path (record .entity .path ).name
286
290
record .added = record .date_added
287
291
288
- get_lfs_file_sizes (records )
289
- get_lfs_tracking (records )
292
+ get_lfs_tracking_and_file_sizes (records , has_tag = bool (tag ))
290
293
291
294
return records
292
295
@@ -1145,90 +1148,93 @@ def update_external_files(client: "LocalClient", records: List[DynamicProxy], dr
1145
1148
return updated_files
1146
1149
1147
1150
1148
- @inject .autoparams ()
1151
+ @inject .autoparams ("client_dispatcher" , "dataset_gateway" )
1149
1152
def filter_dataset_files (
1150
1153
client_dispatcher : IClientDispatcher ,
1151
1154
dataset_gateway : IDatasetGateway ,
1152
- names = None ,
1153
- creators = None ,
1154
- include = None ,
1155
- exclude = None ,
1156
- ignore = None ,
1157
- immutable = False ,
1155
+ names : Optional [List [str ]] = None ,
1156
+ tag : Optional [str ] = None ,
1157
+ creators : Optional [Union [str , List [str ], Tuple [str ]]] = None ,
1158
+ include : Optional [List [str ]] = None ,
1159
+ exclude : Optional [List [str ]] = None ,
1160
+ ignore : Optional [List [str ]] = None ,
1161
+ immutable : bool = False ,
1158
1162
) -> List [DynamicProxy ]:
1159
1163
"""Filter dataset files by specified filters.
1160
1164
1161
1165
Args:
1162
1166
client_dispatcher(IClientDispatcher): Injected client dispatcher.
1163
1167
dataset_gateway(IDatasetGateway):Injected dataset gateway.
1164
- names: Filter by specified dataset names. (Default value = None).
1165
- creators: Filter by creators. (Default value = None).
1166
- include: Include files matching file pattern. (Default value = None).
1167
- exclude: Exclude files matching file pattern. (Default value = None).
1168
- ignore: Ignored datasets. (Default value = None).
1169
- immutable: Return immutable copies of dataset objects. (Default value = False).
1168
+ names(Optional[List[str]]): Filter by specified dataset names (Default value = None).
1169
+ tag(Optional[str]): Filter by specified tag (Default value = None).
1170
+ creators(Optional[Union[str, List[str], Tuple[str]]]): Filter by creators (Default value = None).
1171
+ include(Optional[List[str]]): Tuple containing patterns to which include from result (Default value = None).
1172
+ exclude(Optional[List[str]]): Tuple containing patterns to which exclude from result (Default value = None).
1173
+ ignore(Optional[List[str]]): Ignored datasets (Default value = None).
1174
+ immutable(bool): Return immutable copies of dataset objects (Default value = False).
1170
1175
1171
1176
Returns:
1172
1177
List[DynamicProxy]: List of filtered files sorted by date added.
1173
1178
"""
1179
+
1180
+ def should_include (filepath : Path ) -> bool :
1181
+ """Check if file matches one of include filters and not in exclude filter."""
1182
+ if exclude :
1183
+ for pattern in exclude :
1184
+ if filepath .match (pattern ):
1185
+ return False
1186
+
1187
+ if include :
1188
+ for pattern in include :
1189
+ if filepath .match (pattern ):
1190
+ return True
1191
+ return False
1192
+
1193
+ return True
1194
+
1174
1195
client = client_dispatcher .current_client
1175
1196
1176
1197
if isinstance (creators , str ):
1177
- creators = set (creators .split ("," ))
1178
-
1179
- if isinstance (creators , list ) or isinstance (creators , tuple ):
1180
- creators = set (creators )
1198
+ creators_set = set (creators .split ("," ))
1199
+ elif isinstance (creators , list ) or isinstance (creators , tuple ):
1200
+ creators_set = set (creators )
1201
+ else :
1202
+ creators_set = set ()
1181
1203
1182
1204
records = []
1183
- unused_names = set (names )
1205
+ unused_names = set (names ) if names is not None else set ()
1206
+
1184
1207
for dataset in dataset_gateway .get_all_active_datasets ():
1208
+ if (names and dataset .name not in names ) or (ignore and dataset .name in ignore ):
1209
+ continue
1210
+
1211
+ if tag :
1212
+ dataset = get_dataset_by_tag (dataset = dataset , tag = tag ) # type: ignore
1213
+ if not dataset :
1214
+ continue
1215
+
1185
1216
if not immutable :
1186
1217
dataset = dataset .copy ()
1187
- if (not names or dataset .name in names ) and (not ignore or dataset .name not in ignore ):
1188
- if unused_names :
1189
- unused_names .remove (dataset .name )
1190
- for file in dataset .files :
1191
- record = DynamicProxy (file )
1192
- record .dataset = dataset
1193
- record .client = client
1194
- path = Path (record .entity .path )
1195
- match = _include_exclude (path , include , exclude )
1196
-
1197
- if creators :
1198
- c : Person
1199
- dataset_creators = {c .name for c in dataset .creators }
1200
- match = match and creators .issubset (dataset_creators )
1201
-
1202
- if match :
1203
- records .append (record )
1204
1218
1205
- if unused_names :
1206
- unused_names_str = ", " .join (unused_names )
1207
- raise errors .ParameterError (f"Dataset does not exist: { unused_names_str } " )
1219
+ if unused_names :
1220
+ unused_names .remove (dataset .name )
1208
1221
1209
- return sorted (records , key = lambda r : r .date_added )
1222
+ if creators_set :
1223
+ dataset_creators = {creator .name for creator in dataset .creators }
1224
+ if not creators_set .issubset (dataset_creators ):
1225
+ continue
1210
1226
1227
+ for file in dataset .files :
1228
+ if not should_include (Path (file .entity .path )):
1229
+ continue
1211
1230
1212
- def _include_exclude (file_path , include = None , exclude = None ):
1213
- """Check if file matches one of include filters and not in exclude filter.
1231
+ record = DynamicProxy (file )
1232
+ record .dataset = dataset
1233
+ record .client = client
1234
+ records .append (record )
1214
1235
1215
- Args:
1216
- file_path: Path to the file.
1217
- include: Tuple containing patterns to which include from result (Default value = None).
1218
- exclude: Tuple containing patterns to which exclude from result (Default value = None).
1236
+ if unused_names :
1237
+ unused_names_str = ", " .join (unused_names )
1238
+ raise errors .ParameterError (f"These datasets don't exist: { unused_names_str } " )
1219
1239
1220
- Returns:
1221
- bool: True if a file should be included, False otherwise.
1222
- """
1223
- if exclude is not None and exclude :
1224
- for pattern in exclude :
1225
- if file_path .match (pattern ):
1226
- return False
1227
-
1228
- if include is not None and include :
1229
- for pattern in include :
1230
- if file_path .match (pattern ):
1231
- return True
1232
- return False
1233
-
1234
- return True
1240
+ return sorted (records , key = lambda r : r .date_added )
0 commit comments