|
2 | 2 | import os |
3 | 3 | from argparse import ArgumentParser |
4 | 4 | from collections.abc import Generator |
5 | | -from pathlib import Path |
6 | | -from shutil import copyfile, rmtree |
| 5 | +from shutil import rmtree |
7 | 6 |
|
8 | 7 | import datasets.config |
9 | 8 | from datasets.builder import DatasetBuilder |
10 | 9 | from datasets.commands import BaseDatasetsCLICommand |
11 | 10 | from datasets.download.download_manager import DownloadMode |
| 11 | +from datasets.info import DatasetInfosDict |
12 | 12 | from datasets.load import dataset_module_factory, get_dataset_builder_class |
13 | 13 | from datasets.utils.info_utils import VerificationMode |
14 | 14 | from datasets.utils.logging import ERROR, get_logger |
@@ -157,35 +157,15 @@ def get_builders() -> Generator[DatasetBuilder, None, None]: |
157 | 157 | num_proc=self._num_proc, |
158 | 158 | ) |
159 | 159 | builder.as_dataset() |
160 | | - if self._save_infos: |
161 | | - builder._save_infos() |
162 | 160 |
|
163 | | - # If save_infos=True, the dataset card (README.md) is created next to the loaded module file. |
| 161 | + # If save_infos=True, we create the dataset card (README.md) |
164 | 162 | # The dataset_infos are saved in the YAML part of the README.md |
165 | | - |
166 | | - # Let's move it to the original directory of the dataset, to allow the user to |
167 | | - # upload them on HF at the same time afterwards. |
| 163 | + # This is to allow the user to upload them on HF afterwards. |
168 | 164 | if self._save_infos: |
169 | | - dataset_readme_path = os.path.join( |
170 | | - builder_cls.get_imported_module_dir(), datasets.config.REPOCARD_FILENAME |
171 | | - ) |
172 | | - name = Path(path).name + ".py" |
173 | | - combined_path = os.path.join(path, name) |
174 | | - if os.path.isfile(path): |
175 | | - dataset_dir = os.path.dirname(path) |
176 | | - elif os.path.isfile(combined_path): |
177 | | - dataset_dir = path |
178 | | - elif os.path.isdir(path): # for local directories containing only data files |
179 | | - dataset_dir = path |
180 | | - else: # in case of a remote dataset |
181 | | - dataset_dir = None |
182 | | - print(f"Dataset card saved at {dataset_readme_path}") |
183 | | - |
184 | | - # Move dataset_info back to the user |
185 | | - if dataset_dir is not None: |
186 | | - user_dataset_readme_path = os.path.join(dataset_dir, datasets.config.REPOCARD_FILENAME) |
187 | | - copyfile(dataset_readme_path, user_dataset_readme_path) |
188 | | - print(f"Dataset card saved at {user_dataset_readme_path}") |
| 165 | + save_infos_dir = os.path.basename(path) if not os.path.isdir(path) else path |
| 166 | + os.makedirs(save_infos_dir, exist_ok=True) |
| 167 | + DatasetInfosDict(**{builder.config.name: builder.info}).write_to_directory(save_infos_dir) |
| 168 | + print(f"Dataset card saved at {os.path.join(save_infos_dir, datasets.config.REPOCARD_FILENAME)}") |
189 | 169 |
|
190 | 170 | # If clear_cache=True, the download folder and the dataset builder cache directory are deleted |
191 | 171 | if self._clear_cache: |
|
0 commit comments