From f3e740fa0127ca58e9b601fb68d3be7e6227605c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 3 Oct 2025 01:49:51 +0530 Subject: [PATCH 1/5] Make And expression JSON serializable using Pydantic --- pyiceberg/expressions/__init__.py | 13 +++++++++++-- tests/table/test_partitioning.py | 8 ++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 2adf898fea..c46dc43270 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -32,6 +32,8 @@ Union, ) +from pydantic import Field + from pyiceberg.expressions.literals import ( AboveMax, BelowMin, @@ -39,7 +41,7 @@ literal, ) from pyiceberg.schema import Accessor, Schema -from pyiceberg.typedef import L, StructProtocol +from pyiceberg.typedef import IcebergBaseModel, L, StructProtocol from pyiceberg.types import DoubleType, FloatType, NestedField from pyiceberg.utils.singleton import Singleton @@ -247,9 +249,10 @@ def as_bound(self) -> Type[BoundReference[L]]: return BoundReference[L] -class And(BooleanExpression): +class And(BooleanExpression, IcebergBaseModel): """AND operation expression - logical conjunction.""" + type: str = Field(default="and", alias="type") left: BooleanExpression right: BooleanExpression @@ -289,6 +292,12 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]: """Pickle the And class.""" return (self.left, self.right) + class Config: + """Pydantic configuration for And expression serialization.""" + + arbitrary_types_allowed = True + json_encoders = {BooleanExpression: lambda v: v.model_dump(by_alias=True) if isinstance(v, IcebergBaseModel) else str(v)} + class Or(BooleanExpression): """OR operation expression - logical disjunction.""" diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py index 0fe22391c0..8b7fff10f7 100644 --- a/tests/table/test_partitioning.py +++ b/tests/table/test_partitioning.py @@ -21,6 +21,7 @@ import pytest +from pyiceberg.expressions import And, EqualTo from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.transforms import ( @@ -125,6 +126,13 @@ def test_serialize_partition_spec() -> None: ) +def test_serialize_and_expression() -> None: + expr = And(EqualTo("foo", 1), EqualTo("bar", 2)) + assert expr.model_dump_json(by_alias=True) == ( + '{"type":"and","left":{"type":"equal_to","term":"foo","literal":1},"right":{"type":"equal_to","term":"bar","literal":2}}' + ) + + def test_deserialize_unpartition_spec() -> None: json_partition_spec = """{"spec-id":0,"fields":[]}""" spec = PartitionSpec.model_validate_json(json_partition_spec) From dd534a89a05d51402439cceaafef462b4407cc72 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Tue, 28 Oct 2025 00:37:09 +0530 Subject: [PATCH 2/5] Make And expression JSON serializable using Pydantic --- pyiceberg/expressions/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index c46dc43270..feb8bb6a31 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -31,8 +31,9 @@ TypeVar, Union, ) +from typing import Literal as TypingLiteral -from pydantic import Field +from pydantic import ConfigDict, Field from pyiceberg.expressions.literals import ( AboveMax, @@ -249,13 +250,18 @@ def as_bound(self) -> Type[BoundReference[L]]: return BoundReference[L] -class And(BooleanExpression, IcebergBaseModel): +class And(IcebergBaseModel, BooleanExpression): """AND operation expression - logical conjunction.""" - type: str = Field(default="and", alias="type") + model_config = ConfigDict(arbitrary_types_allowed=True) + + type: TypingLiteral["and"] = Field(default="and", alias="type") left: BooleanExpression right: BooleanExpression + def __init__(self, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> None: + super().__init__(left=left, right=right) + def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression: # type: ignore if rest: return _build_balanced_tree(And, (left, right, *rest)) @@ -292,12 +298,6 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]: """Pickle the And class.""" return (self.left, self.right) - class Config: - """Pydantic configuration for And expression serialization.""" - - arbitrary_types_allowed = True - json_encoders = {BooleanExpression: lambda v: v.model_dump(by_alias=True) if isinstance(v, IcebergBaseModel) else str(v)} - class Or(BooleanExpression): """OR operation expression - logical disjunction.""" From 59674ecc5eb84b19c5abc803574d5b6ec599bf33 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Tue, 28 Oct 2025 00:44:45 +0530 Subject: [PATCH 3/5] Make And expression JSON serializable using Pydantic --- pyiceberg/expressions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index feb8bb6a31..7b1e70cdab 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -253,7 +253,7 @@ def as_bound(self) -> Type[BoundReference[L]]: class And(IcebergBaseModel, BooleanExpression): """AND operation expression - logical conjunction.""" - model_config = ConfigDict(arbitrary_types_allowed=True) + model_config = ConfigDict(arbitrary_types_allowed=True, frozen=False) type: TypingLiteral["and"] = Field(default="and", alias="type") left: BooleanExpression From 536e2b8de0dad958764aeffbe3893451afb914c8 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Tue, 28 Oct 2025 01:26:14 +0530 Subject: [PATCH 4/5] Make And expression JSON serializable using Pydantic --- pyiceberg/expressions/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 7b1e70cdab..6e26e0a69c 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -272,10 +272,7 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole elif right is AlwaysTrue(): return left else: - obj = super().__new__(cls) - obj.left = left - obj.right = right - return obj + return cls(left=left, right=right) def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the And class.""" From 6f71f21aaa7c30bab77ed2a80d85bc386b5161d7 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Wed, 29 Oct 2025 00:55:00 +0530 Subject: [PATCH 5/5] Make And expression JSON serializable using Pydantic --- pyiceberg/expressions/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 6e26e0a69c..3d8dc17f7e 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -272,7 +272,11 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole elif right is AlwaysTrue(): return left else: - return cls(left=left, right=right) + obj = super().__new__(cls) + obj.__pydantic_fields_set__ = set() + obj.left = left + obj.right = right + return obj def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the And class."""