Skip to content

Commit c3c3468

Browse files
authored
Fix deepcopy of primitive types (#857)
1 parent 0e381fa commit c3c3468

File tree

5 files changed

+127
-1
lines changed

5 files changed

+127
-1
lines changed

pyiceberg/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def is_struct(self) -> bool:
160160
return isinstance(self, StructType)
161161

162162

163-
class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton):
163+
class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
164164
"""Base class for all Iceberg Primitive Types."""
165165

166166
root: Any = Field()

pyiceberg/utils/singleton.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,16 @@ def __new__(cls, *args, **kwargs): # type: ignore
4747
if key not in cls._instances:
4848
cls._instances[key] = super().__new__(cls)
4949
return cls._instances[key]
50+
51+
def __deepcopy__(self, memo: Dict[int, Any]) -> Any:
52+
"""
53+
Prevent deep copy operations for singletons.
54+
55+
The IcebergRootModel inherits from Pydantic RootModel,
56+
which has its own implementation of deepcopy. When deepcopy
57+
runs, it calls the RootModel __deepcopy__ method and ignores
58+
that it's a Singleton. To handle this, the order of inheritance
59+
is adjusted and a __deepcopy__ method is implemented for
60+
singletons that simply returns itself.
61+
"""
62+
return self

tests/conftest.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,12 +865,70 @@ def generate_snapshot(
865865
"refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}},
866866
}
867867

868+
TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES = {
869+
"format-version": 2,
870+
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
871+
"location": "s3://bucket/test/location",
872+
"last-sequence-number": 34,
873+
"last-updated-ms": 1602638573590,
874+
"last-column-id": 7,
875+
"current-schema-id": 1,
876+
"schemas": [
877+
{
878+
"type": "struct",
879+
"schema-id": 1,
880+
"identifier-field-ids": [1],
881+
"fields": [
882+
{"id": 1, "name": "x", "required": True, "type": "long"},
883+
{"id": 4, "name": "a", "required": True, "type": "decimal(16, 2)"},
884+
{"id": 5, "name": "b", "required": True, "type": "decimal(16, 8)"},
885+
{"id": 6, "name": "c", "required": True, "type": "fixed[16]"},
886+
{"id": 7, "name": "d", "required": True, "type": "fixed[18]"},
887+
],
888+
}
889+
],
890+
"default-spec-id": 0,
891+
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
892+
"last-partition-id": 1000,
893+
"properties": {"read.split.target.size": "134217728"},
894+
"current-snapshot-id": 3055729675574597004,
895+
"snapshots": [
896+
{
897+
"snapshot-id": 3051729675574597004,
898+
"timestamp-ms": 1515100955770,
899+
"sequence-number": 0,
900+
"summary": {"operation": "append"},
901+
"manifest-list": "s3://a/b/1.avro",
902+
},
903+
{
904+
"snapshot-id": 3055729675574597004,
905+
"parent-snapshot-id": 3051729675574597004,
906+
"timestamp-ms": 1555100955770,
907+
"sequence-number": 1,
908+
"summary": {"operation": "append"},
909+
"manifest-list": "s3://a/b/2.avro",
910+
"schema-id": 1,
911+
},
912+
],
913+
"snapshot-log": [
914+
{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
915+
{"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
916+
],
917+
"metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}],
918+
"refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}},
919+
}
920+
868921

869922
@pytest.fixture
870923
def example_table_metadata_v2() -> Dict[str, Any]:
871924
return EXAMPLE_TABLE_METADATA_V2
872925

873926

927+
@pytest.fixture
928+
def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
929+
return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES
930+
931+
874932
@pytest.fixture(scope="session")
875933
def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
876934
from pyiceberg.io.pyarrow import PyArrowFileIO
@@ -2064,6 +2122,22 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
20642122
)
20652123

20662124

2125+
@pytest.fixture
2126+
def table_v2_with_fixed_and_decimal_types(
2127+
table_metadata_v2_with_fixed_and_decimal_types: Dict[str, Any],
2128+
) -> Table:
2129+
table_metadata = TableMetadataV2(
2130+
**table_metadata_v2_with_fixed_and_decimal_types,
2131+
)
2132+
return Table(
2133+
identifier=("database", "table"),
2134+
metadata=table_metadata,
2135+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2136+
io=load_file_io(),
2137+
catalog=NoopCatalog("NoopCatalog"),
2138+
)
2139+
2140+
20672141
@pytest.fixture
20682142
def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table:
20692143
table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)

tests/table/test_init.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@
9393
BinaryType,
9494
BooleanType,
9595
DateType,
96+
DecimalType,
9697
DoubleType,
98+
FixedType,
9799
FloatType,
98100
IntegerType,
99101
ListType,
@@ -845,6 +847,32 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
845847
assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"}
846848

847849

850+
def test_update_metadata_schema_immutability(
851+
table_v2_with_fixed_and_decimal_types: TableMetadataV2,
852+
) -> None:
853+
update = SetSnapshotRefUpdate(
854+
ref_name="main",
855+
type="branch",
856+
snapshot_id=3051729675574597004,
857+
max_ref_age_ms=123123123,
858+
max_snapshot_age_ms=12312312312,
859+
min_snapshots_to_keep=1,
860+
)
861+
862+
new_metadata = update_table_metadata(
863+
table_v2_with_fixed_and_decimal_types.metadata,
864+
(update,),
865+
)
866+
867+
assert new_metadata.schemas[0].fields == (
868+
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
869+
NestedField(field_id=4, name="a", field_type=DecimalType(precision=16, scale=2), required=True),
870+
NestedField(field_id=5, name="b", field_type=DecimalType(precision=16, scale=8), required=True),
871+
NestedField(field_id=6, name="c", field_type=FixedType(length=16), required=True),
872+
NestedField(field_id=7, name="d", field_type=FixedType(length=18), required=True),
873+
)
874+
875+
848876
def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
849877
base_metadata = table_v1.metadata
850878
base_metadata_backup = base_metadata.model_copy(deep=True)

tests/test_types.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,3 +619,14 @@ def test_types_singleton() -> None:
619619
assert id(BooleanType()) == id(BooleanType())
620620
assert id(FixedType(22)) == id(FixedType(22))
621621
assert id(FixedType(19)) != id(FixedType(25))
622+
623+
624+
def test_deepcopy_of_singleton_fixed_type() -> None:
625+
"""FixedType is a singleton, so deepcopy should return the same instance"""
626+
from copy import deepcopy
627+
628+
list_of_fixed_types = [FixedType(22), FixedType(19)]
629+
copied_list = deepcopy(list_of_fixed_types)
630+
631+
for lhs, rhs in zip(list_of_fixed_types, copied_list):
632+
assert id(lhs) == id(rhs)

0 commit comments

Comments
 (0)