diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 36b744a024a..97f60d69b7b 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -6076,7 +6076,11 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], @transmit_format @fingerprint_transform(inplace=False) def add_column( - self, name: str, column: Union[list, np.ndarray], new_fingerprint: str, feature: Optional[FeatureType] = None + self, + name: str, + column: Union[list, np.ndarray], + new_fingerprint: Optional[str] = None, + feature: Optional[FeatureType] = None, ): """Add column to Dataset. @@ -6333,7 +6337,7 @@ def add_elasticsearch_index( @transmit_format @fingerprint_transform(inplace=False) - def add_item(self, item: dict, new_fingerprint: str): + def add_item(self, item: dict, new_fingerprint: Optional[str] = None): """Add item to Dataset. diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py index 8e76952d6ca..43081c6d3d6 100644 --- a/tests/test_arrow_dataset.py +++ b/tests/test_arrow_dataset.py @@ -4783,3 +4783,13 @@ def test_from_polars_save_to_disk_and_load_from_disk_round_trip_with_large_list( def test_polars_round_trip(): ds = Dataset.from_dict({"x": [[1, 2], [3, 4, 5]], "y": ["a", "b"]}) assert isinstance(Dataset.from_polars(ds.to_polars()), Dataset) + + +def test_add_column(): + from datasets import Dataset + + ds = Dataset.from_dict({"a": [1, 2]}) + ds = ds.add_column("b", [3, 4]) + assert "b" in ds.features + assert ds[0] == {"a": 1, "b": 3} + assert ds[1] == {"a": 2, "b": 4}