19
19
```
20
20
tfds build_croissant \
21
21
--jsonld=/tmp/croissant.json \
22
- --record_sets=record1 --record_sets=record2
23
- --file_format=array_record
24
- --out_dir=/tmp/foo
22
+ --out_dir=/tmp/foo \
23
+ --file_format=array_record \
24
+ --record_sets=record1 --record_sets=record2 \
25
25
--mapping='{"document.csv": "~/Downloads/document.csv"}"'
26
26
```
27
27
"""
@@ -43,12 +43,18 @@ def add_parser_arguments(parser: argparse.ArgumentParser) -> None:
43
43
help = 'The Croissant config file for the given dataset.' ,
44
44
required = True ,
45
45
)
46
+ parser .add_argument (
47
+ '--out_dir' ,
48
+ type = epath .Path ,
49
+ help = 'Path where the converted dataset will be stored.' ,
50
+ required = True ,
51
+ )
46
52
parser .add_argument (
47
53
'--file_format' ,
54
+ default = file_adapters .FileFormat .ARRAY_RECORD .value ,
48
55
type = str ,
49
56
choices = [file_format .value for file_format in file_adapters .FileFormat ],
50
57
help = 'File format to convert the dataset to.' ,
51
- required = True ,
52
58
)
53
59
parser .add_argument (
54
60
'--record_sets' ,
@@ -59,12 +65,6 @@ def add_parser_arguments(parser: argparse.ArgumentParser) -> None:
59
65
' the record sets'
60
66
),
61
67
)
62
- parser .add_argument (
63
- '--out_dir' ,
64
- type = epath .Path ,
65
- help = 'Path where the converted dataset will be stored.' ,
66
- required = True ,
67
- )
68
68
parser .add_argument (
69
69
'--mapping' ,
70
70
type = str ,
@@ -87,30 +87,30 @@ def register_subparser(parsers: argparse._SubParsersAction) -> None:
87
87
parser .set_defaults (
88
88
subparser_fn = lambda args : prepare_croissant_builder (
89
89
jsonld = args .jsonld ,
90
- record_sets = args .record_sets ,
91
- out_file_format = args .file_format ,
92
90
out_dir = args .out_dir ,
91
+ out_file_format = args .file_format ,
92
+ record_sets = args .record_sets ,
93
93
mapping = args .mapping ,
94
94
)
95
95
)
96
96
97
97
98
98
def prepare_croissant_builder (
99
99
jsonld : epath .PathLike ,
100
- record_sets : Sequence [str ],
101
- out_file_format : str ,
102
100
out_dir : epath .PathLike ,
101
+ out_file_format : str ,
102
+ record_sets : Sequence [str ],
103
103
mapping : str | None ,
104
104
) -> None :
105
105
"""Creates a Croissant Builder and runs the preparation.
106
106
107
107
Args:
108
108
jsonld: The Croissant config file for the given dataset
109
+ out_dir: Path where the converted dataset will be stored.
110
+ out_file_format: File format to convert the dataset to.
109
111
record_sets: The `@id`s of the record sets to generate. Each record set will
110
112
correspond to a separate config. If not specified, it will use all the
111
113
record sets
112
- out_file_format: File format to convert the dataset to.
113
- out_dir: Path where the converted dataset will be stored.
114
114
mapping: Mapping filename->filepath as a Python dict[str, str] to handle
115
115
manual downloads. If `document.csv` is the FileObject and you downloaded
116
116
it to `~/Downloads/document.csv`, you can specify
0 commit comments