|
56 | 56 | rename, |
57 | 57 | ) |
58 | 58 | from .fingerprint import Hasher |
59 | | -from .info import DatasetInfo, DatasetInfosDict, PostProcessedInfo |
| 59 | +from .info import DatasetInfo, PostProcessedInfo |
60 | 60 | from .iterable_dataset import ArrowExamplesIterable, ExamplesIterable, IterableDataset |
61 | 61 | from .keyhash import DuplicatedKeysError |
62 | 62 | from .naming import INVALID_WINDOWS_CHARACTERS_IN_PATH, camelcase_to_snakecase |
@@ -349,9 +349,7 @@ def __init__( |
349 | 349 | # prepare info: DatasetInfo are a standardized dataclass across all datasets |
350 | 350 | # Prefill datasetinfo |
351 | 351 | if info is None: |
352 | | - # TODO FOR PACKAGED MODULES IT IMPORTS DATA FROM src/packaged_modules which doesn't make sense |
353 | | - info = self.get_exported_dataset_info() |
354 | | - info.update(self._info()) |
| 352 | + info = self._info() |
355 | 353 | info.builder_name = self.name |
356 | 354 | info.dataset_name = self.dataset_name |
357 | 355 | info.config_name = self.config.name |
@@ -391,7 +389,7 @@ def __init__( |
391 | 389 | if os.path.exists(self._cache_dir): # check if data exist |
392 | 390 | if len(os.listdir(self._cache_dir)) > 0: |
393 | 391 | if os.path.exists(os.path.join(self._cache_dir, config.DATASET_INFO_FILENAME)): |
394 | | - logger.info("Overwrite dataset info from restored data version if exists.") |
| 392 | + logger.debug("Overwrite dataset info from restored data version if exists.") |
395 | 393 | self.info = DatasetInfo.from_directory(self._cache_dir) |
396 | 394 | else: # dir exists but no data, remove the empty dir as data aren't available anymore |
397 | 395 | logger.warning( |
@@ -503,35 +501,6 @@ def update_hash_with_config_parameters(hash: str, config_parameters: dict) -> st |
503 | 501 | if os.path.isdir(legacy_cache_dir): |
504 | 502 | return legacy_relative_data_dir |
505 | 503 |
|
506 | | - @classmethod |
507 | | - def get_all_exported_dataset_infos(cls) -> DatasetInfosDict: |
508 | | - """Empty dict if doesn't exist |
509 | | -
|
510 | | - Example: |
511 | | -
|
512 | | - ```py |
513 | | - >>> from datasets import load_dataset_builder |
514 | | - >>> ds_builder = load_dataset_builder('vivos') |
515 | | - >>> ds_builder.get_all_exported_dataset_infos() |
516 | | - {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value('string'), 'path': Value('string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value('string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)} |
517 | | - ``` |
518 | | - """ |
519 | | - return DatasetInfosDict.from_directory(cls.get_imported_module_dir()) |
520 | | - |
521 | | - def get_exported_dataset_info(self) -> DatasetInfo: |
522 | | - """Empty `DatasetInfo` if doesn't exist |
523 | | -
|
524 | | - Example: |
525 | | -
|
526 | | - ```py |
527 | | - >>> from datasets import load_dataset_builder |
528 | | - >>> ds_builder = load_dataset_builder('cornell-movie-review-data/rotten_tomatoes') |
529 | | - >>> ds_builder.get_exported_dataset_info() |
530 | | - DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value('string'), 'path': Value('string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value('string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None) |
531 | | - ``` |
532 | | - """ |
533 | | - return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo()) |
534 | | - |
535 | 504 | def _create_builder_config( |
536 | 505 | self, config_name=None, custom_features=None, **config_kwargs |
537 | 506 | ) -> tuple[BuilderConfig, str]: |
|
0 commit comments