6666 )
6767 from datachain .dataset import DatasetListVersion
6868 from datachain .job import Job
69+ from datachain .lib .listing_info import ListingInfo
6970 from datachain .listing import Listing
7071
7172logger = logging .getLogger ("datachain" )
@@ -1116,13 +1117,16 @@ def get_dataset_dependencies(
11161117 return direct_dependencies
11171118
11181119 def ls_datasets (
1119- self , include_listing : bool = False , studio : bool = False
1120+ self ,
1121+ prefix : Optional [str ] = None ,
1122+ include_listing : bool = False ,
1123+ studio : bool = False ,
11201124 ) -> Iterator [DatasetListRecord ]:
11211125 from datachain .remote .studio import StudioClient
11221126
11231127 if studio :
11241128 client = StudioClient ()
1125- response = client .ls_datasets ()
1129+ response = client .ls_datasets (prefix = prefix )
11261130 if not response .ok :
11271131 raise DataChainError (response .message )
11281132 if not response .data :
@@ -1133,6 +1137,8 @@ def ls_datasets(
11331137 for d in response .data
11341138 if not d .get ("name" , "" ).startswith (QUERY_DATASET_PREFIX )
11351139 )
1140+ elif prefix :
1141+ datasets = self .metastore .list_datasets_by_prefix (prefix )
11361142 else :
11371143 datasets = self .metastore .list_datasets ()
11381144
@@ -1142,39 +1148,55 @@ def ls_datasets(
11421148
11431149 def list_datasets_versions (
11441150 self ,
1151+ prefix : Optional [str ] = None ,
11451152 include_listing : bool = False ,
1153+ with_job : bool = True ,
11461154 studio : bool = False ,
11471155 ) -> Iterator [tuple [DatasetListRecord , "DatasetListVersion" , Optional ["Job" ]]]:
11481156 """Iterate over all dataset versions with related jobs."""
11491157 datasets = list (
1150- self .ls_datasets (include_listing = include_listing , studio = studio )
1158+ self .ls_datasets (
1159+ prefix = prefix , include_listing = include_listing , studio = studio
1160+ )
11511161 )
11521162
11531163 # preselect dataset versions jobs from db to avoid multiple queries
1154- jobs_ids : set [str ] = {
1155- v .job_id for ds in datasets for v in ds .versions if v .job_id
1156- }
11571164 jobs : dict [str , Job ] = {}
1158- if jobs_ids :
1159- jobs = {j .id : j for j in self .metastore .list_jobs_by_ids (list (jobs_ids ))}
1165+ if with_job :
1166+ jobs_ids : set [str ] = {
1167+ v .job_id for ds in datasets for v in ds .versions if v .job_id
1168+ }
1169+ if jobs_ids :
1170+ jobs = {
1171+ j .id : j for j in self .metastore .list_jobs_by_ids (list (jobs_ids ))
1172+ }
11601173
11611174 for d in datasets :
11621175 yield from (
1163- (d , v , jobs .get (str (v .job_id )) if v .job_id else None )
1176+ (d , v , jobs .get (str (v .job_id )) if with_job and v .job_id else None )
11641177 for v in d .versions
11651178 )
11661179
1167- def listings (self ) :
1180+ def listings (self , prefix : Optional [ str ] = None ) -> list [ "ListingInfo" ] :
11681181 """
11691182 Returns list of ListingInfo objects which are representing specific
11701183 storage listing datasets
11711184 """
1172- from datachain .lib .listing import is_listing_dataset
1185+ from datachain .lib .listing import LISTING_PREFIX , is_listing_dataset
11731186 from datachain .lib .listing_info import ListingInfo
11741187
1188+ if prefix and not prefix .startswith (LISTING_PREFIX ):
1189+ prefix = LISTING_PREFIX + prefix
1190+
1191+ listing_datasets_versions = self .list_datasets_versions (
1192+ prefix = prefix ,
1193+ include_listing = True ,
1194+ with_job = False ,
1195+ )
1196+
11751197 return [
11761198 ListingInfo .from_models (d , v , j )
1177- for d , v , j in self . list_datasets_versions ( include_listing = True )
1199+ for d , v , j in listing_datasets_versions
11781200 if is_listing_dataset (d .name )
11791201 ]
11801202
0 commit comments