@@ -434,7 +434,7 @@ impl TableMetadata {
434
434
self . validate_chronological_metadata_logs ( ) ?;
435
435
// Normalize location (remove trailing slash)
436
436
self . location = self . location . trim_end_matches ( '/' ) . to_string ( ) ;
437
- self . validate_format_version_specifics ( ) ?;
437
+ self . validate_snapshot_sequence_number ( ) ?;
438
438
self . try_normalize_partition_spec ( ) ?;
439
439
self . try_normalize_sort_order ( ) ?;
440
440
Ok ( self )
@@ -547,7 +547,7 @@ impl TableMetadata {
547
547
}
548
548
549
549
/// Validate that for V1 Metadata the last_sequence_number is 0
550
- fn validate_format_version_specifics ( & self ) -> Result < ( ) > {
550
+ fn validate_snapshot_sequence_number ( & self ) -> Result < ( ) > {
551
551
if self . format_version < FormatVersion :: V2 && self . last_sequence_number != 0 {
552
552
return Err ( Error :: new (
553
553
ErrorKind :: DataInvalid ,
@@ -558,6 +558,24 @@ impl TableMetadata {
558
558
) ) ;
559
559
}
560
560
561
+ if self . format_version >= FormatVersion :: V2 {
562
+ if let Some ( snapshot) = self
563
+ . snapshots
564
+ . values ( )
565
+ . find ( |snapshot| snapshot. sequence_number ( ) > self . last_sequence_number )
566
+ {
567
+ return Err ( Error :: new (
568
+ ErrorKind :: DataInvalid ,
569
+ format ! (
570
+ "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}" ,
571
+ snapshot. snapshot_id( ) ,
572
+ snapshot. sequence_number( ) ,
573
+ self . last_sequence_number
574
+ ) ,
575
+ ) ) ;
576
+ }
577
+ }
578
+
561
579
Ok ( ( ) )
562
580
}
563
581
@@ -2015,6 +2033,89 @@ mod tests {
2015
2033
. contains( "Snapshot for reference foo does not exist in the existing snapshots list" ) ) ;
2016
2034
}
2017
2035
2036
+ #[ test]
2037
+ fn test_v2_wrong_max_snapshot_sequence_number ( ) {
2038
+ let data = r#"
2039
+ {
2040
+ "format-version": 2,
2041
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
2042
+ "location": "s3://bucket/test/location",
2043
+ "last-sequence-number": 1,
2044
+ "last-updated-ms": 1602638573590,
2045
+ "last-column-id": 3,
2046
+ "current-schema-id": 0,
2047
+ "schemas": [
2048
+ {
2049
+ "type": "struct",
2050
+ "schema-id": 0,
2051
+ "fields": [
2052
+ {
2053
+ "id": 1,
2054
+ "name": "x",
2055
+ "required": true,
2056
+ "type": "long"
2057
+ }
2058
+ ]
2059
+ }
2060
+ ],
2061
+ "default-spec-id": 0,
2062
+ "partition-specs": [
2063
+ {
2064
+ "spec-id": 0,
2065
+ "fields": []
2066
+ }
2067
+ ],
2068
+ "last-partition-id": 1000,
2069
+ "default-sort-order-id": 0,
2070
+ "sort-orders": [
2071
+ {
2072
+ "order-id": 0,
2073
+ "fields": []
2074
+ }
2075
+ ],
2076
+ "properties": {},
2077
+ "current-snapshot-id": 3055729675574597004,
2078
+ "snapshots": [
2079
+ {
2080
+ "snapshot-id": 3055729675574597004,
2081
+ "timestamp-ms": 1555100955770,
2082
+ "sequence-number": 4,
2083
+ "summary": {
2084
+ "operation": "append"
2085
+ },
2086
+ "manifest-list": "s3://a/b/2.avro",
2087
+ "schema-id": 0
2088
+ }
2089
+ ],
2090
+ "statistics": [],
2091
+ "snapshot-log": [],
2092
+ "metadata-log": []
2093
+ }
2094
+ "# ;
2095
+
2096
+ let err = serde_json:: from_str :: < TableMetadata > ( data) . unwrap_err ( ) ;
2097
+ println ! ( "{}" , err) ;
2098
+ assert ! ( err. to_string( ) . contains(
2099
+ "Invalid snapshot with id 3055729675574597004 and sequence number 4 greater than last sequence number 1"
2100
+ ) ) ;
2101
+
2102
+ // Change max sequence number to 4 - should work
2103
+ let data = data. replace (
2104
+ r#""last-sequence-number": 1,"# ,
2105
+ r#""last-sequence-number": 4,"# ,
2106
+ ) ;
2107
+ let metadata = serde_json:: from_str :: < TableMetadata > ( data. as_str ( ) ) . unwrap ( ) ;
2108
+ assert_eq ! ( metadata. last_sequence_number, 4 ) ;
2109
+
2110
+ // Change max sequence number to 5 - should work
2111
+ let data = data. replace (
2112
+ r#""last-sequence-number": 4,"# ,
2113
+ r#""last-sequence-number": 5,"# ,
2114
+ ) ;
2115
+ let metadata = serde_json:: from_str :: < TableMetadata > ( data. as_str ( ) ) . unwrap ( ) ;
2116
+ assert_eq ! ( metadata. last_sequence_number, 5 ) ;
2117
+ }
2118
+
2018
2119
#[ test]
2019
2120
fn test_statistic_files ( ) {
2020
2121
let data = r#"
0 commit comments