File tree Expand file tree Collapse file tree 3 files changed +8
-6
lines changed Expand file tree Collapse file tree 3 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -111,10 +111,6 @@ message DataSourceAccess {
111
111
// URL referring to the data being used.
112
112
// If provided together with a source, the url should correspond to the source
113
113
// or part of the source.
114
- // copybara:strip_begin
115
- // If referring to a DataHub dataset, use the following format:
116
- // http://data/details/mldataset.tfds.mnist
117
- // copybara:strip_end
118
114
Url url = 5 ;
119
115
}
120
116
Original file line number Diff line number Diff line change @@ -41,7 +41,7 @@ def get_tfds_dataset_name(dataset: mlc.Dataset) -> str:
41
41
return huggingface_utils .convert_hf_name (dataset_name )
42
42
43
43
44
- def get_record_set_ids (metadata : mlc .Metadata ) -> typing . Sequence [str ]:
44
+ def get_record_set_ids (metadata : mlc .Metadata ) -> list [str ]:
45
45
"""Returns record set ids of the given MLcroissant metadata.
46
46
47
47
Record sets which have the attribute `cr:Data` are excluded (e.g. splits that
Original file line number Diff line number Diff line change @@ -106,6 +106,12 @@ def dataset(self) -> mlc.Dataset:
106
106
def dataset_name (self ) -> str :
107
107
return croissant_utils .get_dataset_name (self .dataset )
108
108
109
+ @functools .cached_property
110
+ def record_set_ids (self ) -> list [str ]:
111
+ return self .record_sets or croissant_utils .get_record_set_ids (
112
+ self .dataset .metadata
113
+ )
114
+
109
115
110
116
def register_subparser (parsers : argparse ._SubParsersAction ):
111
117
"""Add subparser for `convert_format` command."""
@@ -133,7 +139,7 @@ def prepare_croissant_builder(args: CmdArgs) -> None:
133
139
"""
134
140
builder = croissant_builder .CroissantBuilder (
135
141
jsonld = args .jsonld ,
136
- record_set_ids = args .record_sets or None ,
142
+ record_set_ids = args .record_set_ids ,
137
143
file_format = args .file_format ,
138
144
data_dir = args .data_dir ,
139
145
mapping = args .mapping_json ,
You can’t perform that action at this time.
0 commit comments