Skip to content

Commit 94bdcb9

Browse files
committed
Add tests and Pydantic changes
1 parent 39c2d8b commit 94bdcb9

File tree

3 files changed

+278
-90
lines changed

3 files changed

+278
-90
lines changed

pyiceberg/catalog/rest/expression.py

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,58 +15,63 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
from typing import Any, Dict, List, Union
18+
from typing import Any, Dict, List, Literal, Union
1919

2020
from pydantic import Field
2121

22-
from pyiceberg.expressions import Reference
2322
from pyiceberg.transforms import Transform
24-
from pyiceberg.typedef import IcebergBaseModel
23+
from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel
2524

2625

27-
class ExpressionType(IcebergBaseModel):
28-
__root__: str = Field(
26+
class Reference(IcebergRootModel[str]):
27+
root: str = Field(..., json_schema_extra={"example": "column-name"})
28+
29+
30+
class ExpressionType(IcebergRootModel[str]):
31+
root: str = Field(
2932
...,
30-
example=[
31-
"true",
32-
"false",
33-
"eq",
34-
"and",
35-
"or",
36-
"not",
37-
"in",
38-
"not-in",
39-
"lt",
40-
"lt-eq",
41-
"gt",
42-
"gt-eq",
43-
"not-eq",
44-
"starts-with",
45-
"not-starts-with",
46-
"is-null",
47-
"not-null",
48-
"is-nan",
49-
"not-nan",
50-
],
33+
json_schema_extra={
34+
"example": [
35+
"true",
36+
"false",
37+
"eq",
38+
"and",
39+
"or",
40+
"not",
41+
"in",
42+
"not-in",
43+
"lt",
44+
"lt-eq",
45+
"gt",
46+
"gt-eq",
47+
"not-eq",
48+
"starts-with",
49+
"not-starts-with",
50+
"is-null",
51+
"not-null",
52+
"is-nan",
53+
"not-nan",
54+
]
55+
},
5156
)
5257

5358

5459
class TrueExpression(IcebergBaseModel):
55-
type: ExpressionType = Field(default_factory=lambda: ExpressionType.parse_obj("true"), const=True)
60+
type: Literal["true"] = "true"
5661

5762

5863
class FalseExpression(IcebergBaseModel):
59-
type: ExpressionType = Field(default_factory=lambda: ExpressionType.parse_obj("false"), const=True)
64+
type: Literal["false"] = "false"
6065

6166

6267
class TransformTerm(IcebergBaseModel):
63-
type: str = Field("transform", const=True)
68+
type: Literal["transform"] = "transform"
6469
transform: Transform
6570
term: Reference
6671

6772

68-
class Term(IcebergBaseModel):
69-
__root__: Union[Reference, TransformTerm]
73+
class Term(IcebergRootModel[Union[Reference, TransformTerm]]):
74+
root: Union[Reference, TransformTerm]
7075

7176

7277
class AndOrExpression(IcebergBaseModel):
@@ -76,7 +81,7 @@ class AndOrExpression(IcebergBaseModel):
7681

7782

7883
class NotExpression(IcebergBaseModel):
79-
type: ExpressionType = Field(default_factory=lambda: ExpressionType.parse_obj("not"), const=True)
84+
type: Literal["not"] = "not"
8085
child: "Expression"
8186

8287

@@ -98,8 +103,18 @@ class UnaryExpression(IcebergBaseModel):
98103
value: Dict[str, Any]
99104

100105

101-
class Expression(IcebergBaseModel):
102-
__root__: Union[
106+
class Expression(IcebergRootModel[
107+
Union[
108+
TrueExpression,
109+
FalseExpression,
110+
AndOrExpression,
111+
NotExpression,
112+
SetExpression,
113+
LiteralExpression,
114+
UnaryExpression,
115+
]
116+
]):
117+
root: Union[
103118
TrueExpression,
104119
FalseExpression,
105120
AndOrExpression,

pyiceberg/catalog/rest/planning_models.py

Lines changed: 78 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -22,120 +22,121 @@
2222
from pydantic import Field
2323

2424
from pyiceberg.catalog.rest.expression import Expression
25-
from pyiceberg.typedef import IcebergBaseModel
25+
from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel
2626

2727

28-
class FieldName(IcebergBaseModel):
29-
__root__: str = Field(
28+
class FieldName(IcebergRootModel[str]):
29+
root: str = Field(
3030
...,
3131
description="A full field name (including parent field names), such as those passed in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - Nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - Nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - Nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - Nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name",
3232
)
3333

3434

35-
class BooleanTypeValue(IcebergBaseModel):
36-
__root__: bool = Field(..., example=True)
3735

36+
class BooleanTypeValue(IcebergRootModel[bool]):
37+
root: bool = Field(..., json_schema_extra={"example": True})
3838

39-
class IntegerTypeValue(IcebergBaseModel):
40-
__root__: int = Field(..., example=42)
4139

40+
class IntegerTypeValue(IcebergRootModel[int]):
41+
root: int = Field(..., json_schema_extra={"example": 42})
4242

43-
class LongTypeValue(IcebergBaseModel):
44-
__root__: int = Field(..., example=9223372036854775807)
4543

44+
class LongTypeValue(IcebergRootModel[int]):
45+
root: int = Field(..., json_schema_extra={"example": 9223372036854775807})
4646

47-
class FloatTypeValue(IcebergBaseModel):
48-
__root__: float = Field(..., example=3.14)
4947

48+
class FloatTypeValue(IcebergRootModel[float]):
49+
root: float = Field(..., json_schema_extra={"example": 3.14})
5050

51-
class DoubleTypeValue(IcebergBaseModel):
52-
__root__: float = Field(..., example=123.456)
5351

52+
class DoubleTypeValue(IcebergRootModel[float]):
53+
root: float = Field(..., json_schema_extra={"example": 123.456})
5454

55-
class DecimalTypeValue(IcebergBaseModel):
56-
__root__: str = Field(
55+
56+
class DecimalTypeValue(IcebergRootModel[str]):
57+
root: str = Field(
5758
...,
5859
description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'",
59-
example="123.4500",
60+
json_schema_extra={"example": "123.4500"},
6061
)
6162

6263

63-
class StringTypeValue(IcebergBaseModel):
64-
__root__: str = Field(..., example="hello")
64+
class StringTypeValue(IcebergRootModel[str]):
65+
root: str = Field(..., json_schema_extra={"example": "hello"})
6566

6667

67-
class UUIDTypeValue(IcebergBaseModel):
68-
__root__: UUID = Field(
68+
class UUIDTypeValue(IcebergRootModel[UUID]):
69+
root: UUID = Field(
6970
...,
7071
description="UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122",
71-
example="eb26bdb1-a1d8-4aa6-990e-da940875492c",
72+
json_schema_extra={"example": "eb26bdb1-a1d8-4aa6-990e-da940875492c"},
7273
max_length=36,
7374
min_length=36,
74-
regex="^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
75+
pattern="^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
7576
)
7677

7778

78-
class DateTypeValue(IcebergBaseModel):
79-
__root__: date = Field(
79+
class DateTypeValue(IcebergRootModel[date]):
80+
root: date = Field(
8081
...,
8182
description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format",
82-
example="2007-12-03",
83+
json_schema_extra={"example": "2007-12-03"},
8384
)
8485

8586

86-
class TimeTypeValue(IcebergBaseModel):
87-
__root__: str = Field(
87+
class TimeTypeValue(IcebergRootModel[str]):
88+
root: str = Field(
8889
...,
8990
description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
90-
example="22:31:08.123456",
91+
json_schema_extra={"example": "22:31:08.123456"},
9192
)
9293

9394

94-
class TimestampTypeValue(IcebergBaseModel):
95-
__root__: str = Field(
95+
class TimestampTypeValue(IcebergRootModel[str]):
96+
root: str = Field(
9697
...,
9798
description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
98-
example="2007-12-03T10:15:30.123456",
99+
json_schema_extra={"example": "2007-12-03T10:15:30.123456"},
99100
)
100101

101102

102-
class TimestampTzTypeValue(IcebergBaseModel):
103-
__root__: str = Field(
103+
class TimestampTzTypeValue(IcebergRootModel[str]):
104+
root: str = Field(
104105
...,
105106
description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)",
106-
example="2007-12-03T10:15:30.123456+00:00",
107+
json_schema_extra={"example": "2007-12-03T10:15:30.123456+00:00"},
107108
)
108109

109110

110-
class TimestampNanoTypeValue(IcebergBaseModel):
111-
__root__: str = Field(
111+
class TimestampNanoTypeValue(IcebergRootModel[str]):
112+
root: str = Field(
112113
...,
113114
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision",
114-
example="2007-12-03T10:15:30.123456789",
115+
json_schema_extra={"example": "2007-12-03T10:15:30.123456789"},
115116
)
116117

117118

118-
class TimestampTzNanoTypeValue(IcebergBaseModel):
119-
__root__: str = Field(
119+
class TimestampTzNanoTypeValue(IcebergRootModel[str]):
120+
root: str = Field(
120121
...,
121122
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)",
122-
example="2007-12-03T10:15:30.123456789+00:00",
123+
json_schema_extra={"example": "2007-12-03T10:15:30.123456789+00:00"},
123124
)
124125

125126

126-
class FixedTypeValue(IcebergBaseModel):
127-
__root__: str = Field(
127+
class FixedTypeValue(IcebergRootModel[str]):
128+
root: str = Field(
128129
...,
129130
description="Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length",
130-
example="78797A",
131+
json_schema_extra={"example": "78797A"},
131132
)
132133

133134

134-
class BinaryTypeValue(IcebergBaseModel):
135-
__root__: str = Field(
135+
class BinaryTypeValue(IcebergRootModel[str]):
136+
root: str = Field(
136137
...,
137138
description="Binary type values are stored and serialized as an uppercase hexadecimal string",
138-
example="78797A",
139+
json_schema_extra={"example": "78797A"},
139140
)
140141

141142

@@ -144,8 +145,29 @@ class CountMap(IcebergBaseModel):
144145
values: Optional[List[LongTypeValue]] = Field(None, description="List of Long values, matched to 'keys' by index")
145146

146147

147-
class PrimitiveTypeValue(IcebergBaseModel):
148-
__root__: Union[
148+
class PrimitiveTypeValue(
149+
IcebergRootModel[
150+
Union[
151+
BooleanTypeValue,
152+
IntegerTypeValue,
153+
LongTypeValue,
154+
FloatTypeValue,
155+
DoubleTypeValue,
156+
DecimalTypeValue,
157+
StringTypeValue,
158+
UUIDTypeValue,
159+
DateTypeValue,
160+
TimeTypeValue,
161+
TimestampTypeValue,
162+
TimestampTzTypeValue,
163+
TimestampNanoTypeValue,
164+
TimestampTzNanoTypeValue,
165+
FixedTypeValue,
166+
BinaryTypeValue,
167+
]
168+
]
169+
):
170+
root: Union[
149171
BooleanTypeValue,
150172
IntegerTypeValue,
151173
LongTypeValue,
@@ -215,7 +237,7 @@ class ContentFile(IcebergBaseModel):
215237
partition: List[PrimitiveTypeValue] = Field(
216238
...,
217239
description="A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`",
218-
example=[1, "bar"],
240+
json_schema_extra={"example": [1, "bar"]},
219241
)
220242
file_size_in_bytes: int = Field(..., alias="file-size-in-bytes", description="Total file size in bytes")
221243
record_count: int = Field(..., alias="record-count", description="Number of records in the file")
@@ -225,7 +247,7 @@ class ContentFile(IcebergBaseModel):
225247

226248

227249
class PositionDeleteFile(ContentFile):
228-
content: Literal["position-deletes"] = Field(..., const=True)
250+
content: Literal["position-deletes"] = "position-deletes"
229251
content_offset: Optional[int] = Field(
230252
None,
231253
alias="content-offset",
@@ -239,16 +261,16 @@ class PositionDeleteFile(ContentFile):
239261

240262

241263
class EqualityDeleteFile(ContentFile):
242-
content: Literal["equality-deletes"] = Field(..., const=True)
264+
content: Literal["equality-deletes"] = "equality-deletes"
243265
equality_ids: Optional[List[int]] = Field(None, alias="equality-ids", description="List of equality field IDs")
244266

245267

246-
class DeleteFile(IcebergBaseModel):
247-
__root__: Union[PositionDeleteFile, EqualityDeleteFile] = Field(..., discriminator="content")
268+
class DeleteFile(IcebergRootModel[Union[PositionDeleteFile, EqualityDeleteFile]]):
269+
root: Union[PositionDeleteFile, EqualityDeleteFile] = Field(..., discriminator="content")
248270

249271

250272
class DataFile(ContentFile):
251-
content: str = Field(..., const=True)
273+
content: Literal["data"] = "data"
252274
first_row_id: Optional[int] = Field(
253275
None,
254276
alias="first-row-id",
@@ -296,8 +318,8 @@ class FileScanTask(IcebergBaseModel):
296318
)
297319

298320

299-
class PlanTask(IcebergBaseModel):
300-
__root__: str = Field(
321+
class PlanTask(IcebergRootModel[str]):
322+
root: str = Field(
301323
...,
302324
description="An opaque string provided by the REST server that represents a unit of work to produce file scan tasks for scan planning. This allows clients to fetch tasks across multiple requests to accommodate large result sets.",
303325
)

0 commit comments

Comments
 (0)