Skip to content

Commit 47c9de1

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-add-ability-to-delete-full-data-files
2 parents 74497fb + aa850ef commit 47c9de1

File tree

9 files changed

+49
-41
lines changed

9 files changed

+49
-41
lines changed

pyiceberg/types.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
... NestedField(1, "required_field", StringType(), True),
2525
... NestedField(2, "optional_field", IntegerType())
2626
... ))
27-
'struct<1: required_field: optional string, 2: optional_field: optional int>'
27+
'struct<1: required_field: required string, 2: optional_field: optional int>'
2828
2929
Notes:
30-
- https://iceberg.apache.org/#spec/#primitive-types
30+
- https://iceberg.apache.org/spec/#primitive-types
3131
"""
3232

3333
from __future__ import annotations
@@ -289,7 +289,7 @@ class NestedField(IcebergType):
289289
field_id: int = Field(alias="id")
290290
name: str = Field()
291291
field_type: SerializeAsAny[IcebergType] = Field(alias="type")
292-
required: bool = Field(default=True)
292+
required: bool = Field(default=False)
293293
doc: Optional[str] = Field(default=None, repr=False)
294294
initial_default: Optional[Any] = Field(alias="initial-default", default=None, repr=False)
295295
write_default: Optional[L] = Field(alias="write-default", default=None, repr=False) # type: ignore
@@ -299,7 +299,7 @@ def __init__(
299299
field_id: Optional[int] = None,
300300
name: Optional[str] = None,
301301
field_type: Optional[IcebergType] = None,
302-
required: bool = True,
302+
required: bool = False,
303303
doc: Optional[str] = None,
304304
initial_default: Optional[Any] = None,
305305
write_default: Optional[L] = None,

tests/avro/test_resolver.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
FloatReader,
3030
IntegerReader,
3131
MapReader,
32+
OptionReader,
3233
StringReader,
3334
StructReader,
3435
)
@@ -91,28 +92,30 @@ def test_resolver() -> None:
9192
"location",
9293
location_struct,
9394
),
94-
NestedField(1, "id", LongType()),
95+
NestedField(1, "id", LongType(), required=False),
9596
NestedField(6, "preferences", MapType(7, StringType(), 8, StringType())),
9697
schema_id=1,
9798
)
9899
read_tree = resolve_reader(write_schema, read_schema)
99100

100101
assert read_tree == StructReader(
101102
(
102-
(1, IntegerReader()),
103-
(None, StringReader()),
103+
(1, OptionReader(option=IntegerReader())),
104+
(None, OptionReader(option=StringReader())),
104105
(
105106
0,
106-
StructReader(
107-
(
108-
(0, DoubleReader()),
109-
(1, DoubleReader()),
107+
OptionReader(
108+
option=StructReader(
109+
(
110+
(0, OptionReader(option=DoubleReader())),
111+
(1, OptionReader(option=DoubleReader())),
112+
),
113+
Record,
114+
location_struct,
110115
),
111-
Record,
112-
location_struct,
113116
),
114117
),
115-
(2, MapReader(StringReader(), StringReader())),
118+
(2, OptionReader(option=MapReader(StringReader(), StringReader()))),
116119
),
117120
Record,
118121
read_schema.as_struct(),
@@ -309,7 +312,7 @@ def test_resolver_initial_value() -> None:
309312

310313
assert resolve_reader(write_schema, read_schema) == StructReader(
311314
(
312-
(None, StringReader()), # The one we skip
315+
(None, OptionReader(option=StringReader())), # The one we skip
313316
(0, DefaultReader("vo")),
314317
),
315318
Record,

tests/catalog/test_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,9 @@ def catalog(tmp_path: PosixPath) -> InMemoryCatalog:
269269
TEST_TABLE_NAMESPACE = ("com", "organization", "department")
270270
TEST_TABLE_NAME = "my_table"
271271
TEST_TABLE_SCHEMA = Schema(
272-
NestedField(1, "x", LongType()),
273-
NestedField(2, "y", LongType(), doc="comment"),
274-
NestedField(3, "z", LongType()),
272+
NestedField(1, "x", LongType(), required=True),
273+
NestedField(2, "y", LongType(), doc="comment", required=True),
274+
NestedField(3, "z", LongType(), required=True),
275275
)
276276
TEST_TABLE_PARTITION_SPEC = PartitionSpec(PartitionField(name="x", transform=IdentityTransform(), source_id=1, field_id=1000))
277277
TEST_TABLE_PROPERTIES = {"key1": "value1", "key2": "value2"}

tests/catalog/test_hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def test_create_table(
340340
NestedField(field_id=21, name='inner_string', field_type=StringType(), required=False),
341341
NestedField(field_id=22, name='inner_int', field_type=IntegerType(), required=True),
342342
),
343-
required=True,
343+
required=False,
344344
),
345345
schema_id=0,
346346
identifier_field_ids=[2],

tests/cli/test_console.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ def mock_datetime_now(monkeypatch: pytest.MonkeyPatch) -> None:
7878
TEST_NAMESPACE_PROPERTIES = {"location": "s3://warehouse/database/location"}
7979
TEST_TABLE_NAME = "my_table"
8080
TEST_TABLE_SCHEMA = Schema(
81-
NestedField(1, "x", LongType()),
82-
NestedField(2, "y", LongType(), doc="comment"),
83-
NestedField(3, "z", LongType()),
81+
NestedField(1, "x", LongType(), required=True),
82+
NestedField(2, "y", LongType(), doc="comment", required=True),
83+
NestedField(3, "z", LongType(), required=True),
8484
)
8585
TEST_TABLE_PARTITION_SPEC = PartitionSpec(PartitionField(name="x", transform=IdentityTransform(), source_id=1, field_id=1000))
8686
TEST_TABLE_PROPERTIES = {"read.split.target.size": "134217728"}

tests/integration/test_rest_schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ def test_add_nested_map_of_structs(catalog: Catalog) -> None:
978978
tbl = _create_table_with_schema(
979979
catalog,
980980
Schema(
981-
NestedField(field_id=1, name="foo", field_type=StringType()),
981+
NestedField(field_id=1, name="foo", field_type=StringType(), required=True),
982982
),
983983
)
984984

@@ -1031,7 +1031,7 @@ def test_add_nested_list_of_structs(catalog: Catalog) -> None:
10311031
tbl = _create_table_with_schema(
10321032
catalog,
10331033
Schema(
1034-
NestedField(field_id=1, name="foo", field_type=StringType()),
1034+
NestedField(field_id=1, name="foo", field_type=StringType(), required=True),
10351035
),
10361036
)
10371037

tests/io/test_pyarrow.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,9 @@ def schema_map_of_structs() -> Schema:
716716
key_id=51,
717717
value_id=52,
718718
key_type=StringType(),
719-
value_type=StructType(NestedField(511, "lat", DoubleType()), NestedField(512, "long", DoubleType())),
719+
value_type=StructType(
720+
NestedField(511, "lat", DoubleType(), required=True), NestedField(512, "long", DoubleType(), required=True)
721+
),
720722
element_required=False,
721723
),
722724
required=False,
@@ -1038,7 +1040,7 @@ def test_projection_add_column_struct_required(file_int: str) -> None:
10381040
def test_projection_rename_column(schema_int: Schema, file_int: str) -> None:
10391041
schema = Schema(
10401042
# Reuses the id 1
1041-
NestedField(1, "other_name", IntegerType())
1043+
NestedField(1, "other_name", IntegerType(), required=True)
10421044
)
10431045
result_table = project(schema, [file_int])
10441046
assert len(result_table.columns[0]) == 3
@@ -1071,7 +1073,7 @@ def test_projection_filter(schema_int: Schema, file_int: str) -> None:
10711073
def test_projection_filter_renamed_column(file_int: str) -> None:
10721074
schema = Schema(
10731075
# Reuses the id 1
1074-
NestedField(1, "other_id", IntegerType())
1076+
NestedField(1, "other_id", IntegerType(), required=True)
10751077
)
10761078
result_table = project(schema, [file_int], GreaterThan("other_id", 1))
10771079
assert len(result_table.columns[0]) == 1
@@ -1089,7 +1091,7 @@ def test_projection_filter_add_column(schema_int: Schema, file_int: str, file_st
10891091

10901092

10911093
def test_projection_filter_add_column_promote(file_int: str) -> None:
1092-
schema_long = Schema(NestedField(1, "id", LongType()))
1094+
schema_long = Schema(NestedField(1, "id", LongType(), required=True))
10931095
result_table = project(schema_long, [file_int])
10941096

10951097
for actual, expected in zip(result_table.columns[0], [0, 1, 2]):
@@ -1111,9 +1113,10 @@ def test_projection_nested_struct_subset(file_struct: str) -> None:
11111113
4,
11121114
"location",
11131115
StructType(
1114-
NestedField(41, "lat", DoubleType()),
1116+
NestedField(41, "lat", DoubleType(), required=True),
11151117
# long is missing!
11161118
),
1119+
required=True,
11171120
)
11181121
)
11191122

@@ -1138,6 +1141,7 @@ def test_projection_nested_new_field(file_struct: str) -> None:
11381141
StructType(
11391142
NestedField(43, "null", DoubleType(), required=False), # Whoa, this column doesn't exist in the file
11401143
),
1144+
required=True,
11411145
)
11421146
)
11431147

@@ -1163,6 +1167,7 @@ def test_projection_nested_struct(schema_struct: Schema, file_struct: str) -> No
11631167
NestedField(43, "null", DoubleType(), required=False),
11641168
NestedField(42, "long", DoubleType(), required=False),
11651169
),
1170+
required=True,
11661171
)
11671172
)
11681173

@@ -1194,8 +1199,8 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of
11941199
ListType(
11951200
51,
11961201
StructType(
1197-
NestedField(511, "latitude", DoubleType()),
1198-
NestedField(512, "longitude", DoubleType()),
1202+
NestedField(511, "latitude", DoubleType(), required=True),
1203+
NestedField(512, "longitude", DoubleType(), required=True),
11991204
NestedField(513, "altitude", DoubleType(), required=False),
12001205
),
12011206
element_required=False,
@@ -1239,9 +1244,9 @@ def test_projection_maps_of_structs(schema_map_of_structs: Schema, file_map_of_s
12391244
value_id=52,
12401245
key_type=StringType(),
12411246
value_type=StructType(
1242-
NestedField(511, "latitude", DoubleType()),
1243-
NestedField(512, "longitude", DoubleType()),
1244-
NestedField(513, "altitude", DoubleType(), required=False),
1247+
NestedField(511, "latitude", DoubleType(), required=True),
1248+
NestedField(512, "longitude", DoubleType(), required=True),
1249+
NestedField(513, "altitude", DoubleType()),
12451250
),
12461251
element_required=False,
12471252
),
@@ -1308,7 +1313,7 @@ def test_projection_nested_struct_different_parent_id(file_struct: str) -> None:
13081313

13091314

13101315
def test_projection_filter_on_unprojected_field(schema_int_str: Schema, file_int_str: str) -> None:
1311-
schema = Schema(NestedField(1, "id", IntegerType()))
1316+
schema = Schema(NestedField(1, "id", IntegerType(), required=True))
13121317

13131318
result_table = project(schema, [file_int_str], GreaterThan("data", "1"), schema_int_str)
13141319

tests/test_schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_schema_str(table_schema_simple: Schema) -> None:
9191
def test_schema_repr_single_field() -> None:
9292
"""Test schema representation"""
9393
actual = repr(schema.Schema(NestedField(field_id=1, name="foo", field_type=StringType()), schema_id=1))
94-
expected = "Schema(NestedField(field_id=1, name='foo', field_type=StringType(), required=True), schema_id=1, identifier_field_ids=[])"
94+
expected = "Schema(NestedField(field_id=1, name='foo', field_type=StringType(), required=False), schema_id=1, identifier_field_ids=[])"
9595
assert expected == actual
9696

9797

@@ -104,7 +104,7 @@ def test_schema_repr_two_fields() -> None:
104104
schema_id=1,
105105
)
106106
)
107-
expected = "Schema(NestedField(field_id=1, name='foo', field_type=StringType(), required=True), NestedField(field_id=2, name='bar', field_type=IntegerType(), required=False), schema_id=1, identifier_field_ids=[])"
107+
expected = "Schema(NestedField(field_id=1, name='foo', field_type=StringType(), required=False), NestedField(field_id=2, name='bar', field_type=IntegerType(), required=False), schema_id=1, identifier_field_ids=[])"
108108
assert expected == actual
109109

110110

tests/test_types.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ def test_serialization_struct() -> None:
523523
expected = (
524524
'{"type":"struct","fields":['
525525
'{"id":1,"name":"required_field","type":"string","required":true,"doc":"this is a doc"},'
526-
'{"id":2,"name":"optional_field","type":"int","required":true}'
526+
'{"id":2,"name":"optional_field","type":"int","required":false}'
527527
"]}"
528528
)
529529
assert actual == expected
@@ -545,7 +545,7 @@ def test_deserialization_struct() -> None:
545545
"id": 2,
546546
"name": "optional_field",
547547
"type": "int",
548-
"required": true
548+
"required": false
549549
}
550550
]
551551
}
@@ -560,13 +560,13 @@ def test_deserialization_struct() -> None:
560560

561561

562562
def test_str_struct(simple_struct: StructType) -> None:
563-
assert str(simple_struct) == "struct<1: required_field: required string (this is a doc), 2: optional_field: required int>"
563+
assert str(simple_struct) == "struct<1: required_field: required string (this is a doc), 2: optional_field: optional int>"
564564

565565

566566
def test_repr_struct(simple_struct: StructType) -> None:
567567
assert (
568568
repr(simple_struct)
569-
== "StructType(fields=(NestedField(field_id=1, name='required_field', field_type=StringType(), required=True), NestedField(field_id=2, name='optional_field', field_type=IntegerType(), required=True),))"
569+
== "StructType(fields=(NestedField(field_id=1, name='required_field', field_type=StringType(), required=True), NestedField(field_id=2, name='optional_field', field_type=IntegerType(), required=False),))"
570570
)
571571

572572

0 commit comments

Comments
 (0)