@@ -459,9 +459,8 @@ def to_v2(self) -> TableMetadataV2:
459
459
return TableMetadataV2 .model_validate (metadata )
460
460
461
461
format_version : Literal [1 ] = Field (alias = "format-version" , default = 1 )
462
- """An integer version number for the format. Currently, this can be 1 or 2
463
- based on the spec. Implementations must throw an exception if a table’s
464
- version is higher than the supported version."""
462
+ """An integer version number for the format. Implementations must throw
463
+ an exception if a table’s version is higher than the supported version."""
465
464
466
465
schema_ : Schema = Field (alias = "schema" )
467
466
"""The table’s current schema. (Deprecated: use schemas and
@@ -507,16 +506,74 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
507
506
return construct_refs (table_metadata )
508
507
509
508
format_version : Literal [2 ] = Field (alias = "format-version" , default = 2 )
510
- """An integer version number for the format. Currently, this can be 1 or 2
511
- based on the spec. Implementations must throw an exception if a table’s
512
- version is higher than the supported version."""
509
+ """An integer version number for the format. Implementations must throw
510
+ an exception if a table’s version is higher than the supported version."""
513
511
514
512
last_sequence_number : int = Field (alias = "last-sequence-number" , default = INITIAL_SEQUENCE_NUMBER )
515
513
"""The table’s highest assigned sequence number, a monotonically
516
514
increasing long that tracks the order of snapshots in a table."""
517
515
518
516
519
- TableMetadata = Annotated [Union [TableMetadataV1 , TableMetadataV2 ], Field (discriminator = "format_version" )]
517
+ class TableMetadataV3 (TableMetadataCommonFields , IcebergBaseModel ):
518
+ """Represents version 3 of the Table Metadata.
519
+
520
+ Version 3 of the Iceberg spec extends data types and existing metadata structures to add new capabilities:
521
+
522
+ - New data types: nanosecond timestamp(tz), unknown
523
+ - Default value support for columns
524
+ - Multi-argument transforms for partitioning and sorting
525
+ - Row Lineage tracking
526
+ - Binary deletion vectors
527
+
528
+ For more information:
529
+ https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
530
+ """
531
+
532
+ @model_validator (mode = "before" )
533
+ def cleanup_snapshot_id (cls , data : Dict [str , Any ]) -> Dict [str , Any ]:
534
+ return cleanup_snapshot_id (data )
535
+
536
+ @model_validator (mode = "after" )
537
+ def check_schemas (cls , table_metadata : TableMetadata ) -> TableMetadata :
538
+ return check_schemas (table_metadata )
539
+
540
+ @model_validator (mode = "after" )
541
+ def check_partition_specs (cls , table_metadata : TableMetadata ) -> TableMetadata :
542
+ return check_partition_specs (table_metadata )
543
+
544
+ @model_validator (mode = "after" )
545
+ def check_sort_orders (cls , table_metadata : TableMetadata ) -> TableMetadata :
546
+ return check_sort_orders (table_metadata )
547
+
548
+ @model_validator (mode = "after" )
549
+ def construct_refs (cls , table_metadata : TableMetadata ) -> TableMetadata :
550
+ return construct_refs (table_metadata )
551
+
552
+ format_version : Literal [3 ] = Field (alias = "format-version" , default = 3 )
553
+ """An integer version number for the format. Implementations must throw
554
+ an exception if a table’s version is higher than the supported version."""
555
+
556
+ last_sequence_number : int = Field (alias = "last-sequence-number" , default = INITIAL_SEQUENCE_NUMBER )
557
+ """The table’s highest assigned sequence number, a monotonically
558
+ increasing long that tracks the order of snapshots in a table."""
559
+
560
+ row_lineage : bool = Field (alias = "row-lineage" , default = False )
561
+ """Indicates that row-lineage is enabled on the table
562
+
563
+ For more information:
564
+ https://iceberg.apache.org/spec/?column-projection#row-lineage
565
+ """
566
+
567
+ next_row_id : Optional [int ] = Field (alias = "next-row-id" , default = None )
568
+ """A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
569
+
570
+ def model_dump_json (
571
+ self , exclude_none : bool = True , exclude : Optional [Any ] = None , by_alias : bool = True , ** kwargs : Any
572
+ ) -> str :
573
+ raise NotImplementedError ("Writing V3 is not yet supported, see: https://github.com/apache/iceberg-python/issues/1551" )
574
+
575
+
576
+ TableMetadata = Annotated [Union [TableMetadataV1 , TableMetadataV2 , TableMetadataV3 ], Field (discriminator = "format_version" )]
520
577
521
578
522
579
def new_table_metadata (
@@ -553,20 +610,36 @@ def new_table_metadata(
553
610
last_partition_id = fresh_partition_spec .last_assigned_field_id ,
554
611
table_uuid = table_uuid ,
555
612
)
556
-
557
- return TableMetadataV2 (
558
- location = location ,
559
- schemas = [fresh_schema ],
560
- last_column_id = fresh_schema .highest_field_id ,
561
- current_schema_id = fresh_schema .schema_id ,
562
- partition_specs = [fresh_partition_spec ],
563
- default_spec_id = fresh_partition_spec .spec_id ,
564
- sort_orders = [fresh_sort_order ],
565
- default_sort_order_id = fresh_sort_order .order_id ,
566
- properties = properties ,
567
- last_partition_id = fresh_partition_spec .last_assigned_field_id ,
568
- table_uuid = table_uuid ,
569
- )
613
+ elif format_version == 2 :
614
+ return TableMetadataV2 (
615
+ location = location ,
616
+ schemas = [fresh_schema ],
617
+ last_column_id = fresh_schema .highest_field_id ,
618
+ current_schema_id = fresh_schema .schema_id ,
619
+ partition_specs = [fresh_partition_spec ],
620
+ default_spec_id = fresh_partition_spec .spec_id ,
621
+ sort_orders = [fresh_sort_order ],
622
+ default_sort_order_id = fresh_sort_order .order_id ,
623
+ properties = properties ,
624
+ last_partition_id = fresh_partition_spec .last_assigned_field_id ,
625
+ table_uuid = table_uuid ,
626
+ )
627
+ elif format_version == 3 :
628
+ return TableMetadataV3 (
629
+ location = location ,
630
+ schemas = [fresh_schema ],
631
+ last_column_id = fresh_schema .highest_field_id ,
632
+ current_schema_id = fresh_schema .schema_id ,
633
+ partition_specs = [fresh_partition_spec ],
634
+ default_spec_id = fresh_partition_spec .spec_id ,
635
+ sort_orders = [fresh_sort_order ],
636
+ default_sort_order_id = fresh_sort_order .order_id ,
637
+ properties = properties ,
638
+ last_partition_id = fresh_partition_spec .last_assigned_field_id ,
639
+ table_uuid = table_uuid ,
640
+ )
641
+ else :
642
+ raise ValidationError (f"Unknown format version: { format_version } " )
570
643
571
644
572
645
class TableMetadataWrapper (IcebergRootModel [TableMetadata ]):
@@ -593,6 +666,8 @@ def parse_obj(data: Dict[str, Any]) -> TableMetadata:
593
666
return TableMetadataV1 (** data )
594
667
elif format_version == 2 :
595
668
return TableMetadataV2 (** data )
669
+ elif format_version == 3 :
670
+ return TableMetadataV3 (** data )
596
671
else :
597
672
raise ValidationError (f"Unknown format version: { format_version } " )
598
673
@@ -609,6 +684,8 @@ def _construct_without_validation(table_metadata: TableMetadata) -> TableMetadat
609
684
return TableMetadataV1 .model_construct (** dict (table_metadata ))
610
685
elif table_metadata .format_version == 2 :
611
686
return TableMetadataV2 .model_construct (** dict (table_metadata ))
687
+ elif table_metadata .format_version == 3 :
688
+ return TableMetadataV3 .model_construct (** dict (table_metadata ))
612
689
else :
613
690
raise ValidationError (f"Unknown format version: { table_metadata .format_version } " )
614
691
0 commit comments