28
28
29
29
import argparse
30
30
import dataclasses
31
+ import functools
31
32
import json
32
33
import typing
33
34
34
35
from etils import epath
36
+ import mlcroissant as mlc
35
37
import simple_parsing
36
38
from tensorflow_datasets .core import file_adapters
37
39
from tensorflow_datasets .core .dataset_builders import croissant_builder
40
+ from tensorflow_datasets .core .utils import croissant_utils
38
41
from tensorflow_datasets .scripts .cli import cli_utils
39
42
40
43
@@ -84,6 +87,25 @@ class CmdArgs(simple_parsing.helpers.FrozenSerializable):
84
87
overwrite : bool = False
85
88
overwrite_version : str | None = None
86
89
90
+ @functools .cached_property
91
+ def mapping_json (self ) -> dict [str , epath .PathLike ]:
92
+ if self .mapping :
93
+ try :
94
+ return json .loads (self .mapping )
95
+ except json .JSONDecodeError as e :
96
+ raise ValueError (
97
+ f'Error parsing mapping parameter: { self .mapping } '
98
+ ) from e
99
+ return {}
100
+
101
+ @functools .cached_property
102
+ def dataset (self ) -> mlc .Dataset :
103
+ return mlc .Dataset (jsonld = self .jsonld , mapping = self .mapping_json )
104
+
105
+ @functools .cached_property
106
+ def dataset_name (self ) -> str :
107
+ return croissant_utils .get_dataset_name (self .dataset )
108
+
87
109
88
110
def register_subparser (parsers : argparse ._SubParsersAction ):
89
111
"""Add subparser for `convert_format` command."""
@@ -109,22 +131,12 @@ def prepare_croissant_builder(args: CmdArgs) -> None:
109
131
Args:
110
132
args: CLI arguments.
111
133
"""
112
- if args .mapping :
113
- try :
114
- mapping = json .loads (args .mapping )
115
- except json .JSONDecodeError as e :
116
- raise ValueError (
117
- f'Error parsing mapping parameter: { args .mapping } '
118
- ) from e
119
- else :
120
- mapping = None
121
-
122
134
builder = croissant_builder .CroissantBuilder (
123
135
jsonld = args .jsonld ,
124
136
record_set_ids = args .record_sets or None ,
125
137
file_format = args .file_format ,
126
138
data_dir = args .data_dir ,
127
- mapping = mapping ,
139
+ mapping = args . mapping_json ,
128
140
overwrite_version = args .overwrite_version ,
129
141
)
130
142
cli_utils .download_and_prepare (
0 commit comments