Skip to content

Commit 6569014

Browse files
authored
Fix push_to_hub with no dataset_infos (#5598)
fix push_to_hub with no dataset_infos
1 parent f965477 commit 6569014

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

src/datasets/arrow_dataset.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5271,8 +5271,11 @@ def push_to_hub(
52715271
)
52725272
dataset_metadata = DatasetMetadata.from_readme(Path(dataset_readme_path))
52735273
dataset_infos: DatasetInfosDict = DatasetInfosDict.from_metadata(dataset_metadata)
5274-
repo_info = dataset_infos[next(iter(dataset_infos))]
5275-
# get the deprecated dataset_infos.json to uodate them
5274+
if dataset_infos:
5275+
repo_info = dataset_infos[next(iter(dataset_infos))]
5276+
else:
5277+
repo_info = None
5278+
# get the deprecated dataset_infos.json to update them
52765279
elif config.DATASETDICT_INFOS_FILENAME in repo_files:
52775280
dataset_metadata = DatasetMetadata()
52785281
download_config = DownloadConfig()
@@ -5284,7 +5287,10 @@ def push_to_hub(
52845287
)
52855288
with open(dataset_infos_path, encoding="utf-8") as f:
52865289
dataset_infos: DatasetInfosDict = json.load(f)
5287-
repo_info = DatasetInfo.from_dict(dataset_infos[next(iter(dataset_infos))])
5290+
if dataset_infos:
5291+
repo_info = DatasetInfo.from_dict(dataset_infos[next(iter(dataset_infos))])
5292+
else:
5293+
repo_info = None
52885294
else:
52895295
dataset_metadata = DatasetMetadata()
52905296
repo_info = None

src/datasets/info.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def _from_yaml_dict(cls, yaml_data: dict) -> "DatasetInfo":
403403

404404

405405
class DatasetInfosDict(Dict[str, DatasetInfo]):
406-
def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=False):
406+
def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=False) -> None:
407407
total_dataset_infos = {}
408408
dataset_infos_path = os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME)
409409
dataset_readme_path = os.path.join(dataset_infos_dir, "README.md")
@@ -427,7 +427,7 @@ def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=Fa
427427
dataset_metadata.to_readme(Path(dataset_readme_path))
428428

429429
@classmethod
430-
def from_directory(cls, dataset_infos_dir):
430+
def from_directory(cls, dataset_infos_dir) -> "DatasetInfosDict":
431431
logger.info(f"Loading Dataset Infos from {dataset_infos_dir}")
432432
# Load the info from the YAML part of README.md
433433
if os.path.exists(os.path.join(dataset_infos_dir, "README.md")):
@@ -447,7 +447,7 @@ def from_directory(cls, dataset_infos_dir):
447447
return cls()
448448

449449
@classmethod
450-
def from_metadata(cls, dataset_metadata: DatasetMetadata):
450+
def from_metadata(cls, dataset_metadata: DatasetMetadata) -> "DatasetInfosDict":
451451
if isinstance(dataset_metadata.get("dataset_info"), (list, dict)):
452452
if isinstance(dataset_metadata["dataset_info"], list):
453453
return cls(

0 commit comments

Comments
 (0)