Skip to content

Commit c6cf0e9

Browse files
authored
fix: TableMetadata last_updated_ms not increased for all operations (#978)
Currently we increase the `last_updated_ms` timestamp only if a snapshot was added. Java always updates this timestamp, also if i.e. only Properties where added.. Trino has a catalog integration test that validates this - which we failed due to this. This PR ensures that `last_updated_ms` is always updated for builds.
1 parent 4466656 commit c6cf0e9

File tree

1 file changed

+51
-10
lines changed

1 file changed

+51
-10
lines changed

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ pub struct TableMetadataBuilder {
5454
last_added_order_id: Option<i64>,
5555
// None if this is a new table (from_metadata) method not used
5656
previous_history_entry: Option<MetadataLog>,
57+
last_updated_ms: Option<i64>,
5758
}
5859

5960
#[derive(Debug, Clone, PartialEq)]
@@ -120,6 +121,7 @@ impl TableMetadataBuilder {
120121
statistics: HashMap::new(),
121122
partition_statistics: HashMap::new(),
122123
},
124+
last_updated_ms: None,
123125
changes: vec![],
124126
last_added_schema_id: Some(schema_id),
125127
last_added_spec_id: None,
@@ -156,6 +158,7 @@ impl TableMetadataBuilder {
156158
last_added_schema_id: None,
157159
last_added_spec_id: None,
158160
last_added_order_id: None,
161+
last_updated_ms: None,
159162
}
160163
}
161164

@@ -368,13 +371,17 @@ impl TableMetadataBuilder {
368371
}
369372
}
370373

371-
if snapshot.timestamp_ms() - self.metadata.last_updated_ms < -ONE_MINUTE_MS {
374+
let max_last_updated = self
375+
.last_updated_ms
376+
.unwrap_or_default()
377+
.max(self.metadata.last_updated_ms);
378+
if snapshot.timestamp_ms() - max_last_updated < -ONE_MINUTE_MS {
372379
return Err(Error::new(
373380
ErrorKind::DataInvalid,
374381
format!(
375382
"Invalid snapshot timestamp {}: before last updated timestamp {}",
376383
snapshot.timestamp_ms(),
377-
self.metadata.last_updated_ms
384+
max_last_updated
378385
),
379386
));
380387
}
@@ -384,7 +391,7 @@ impl TableMetadataBuilder {
384391
snapshot: snapshot.clone(),
385392
});
386393

387-
self.metadata.last_updated_ms = snapshot.timestamp_ms();
394+
self.last_updated_ms = Some(snapshot.timestamp_ms());
388395
self.metadata.last_sequence_number = snapshot.sequence_number();
389396
self.metadata
390397
.snapshots
@@ -483,19 +490,23 @@ impl TableMetadataBuilder {
483490
matches!(update, TableUpdate::AddSnapshot { snapshot: snap } if snap.snapshot_id() == snapshot.snapshot_id())
484491
});
485492
if is_added_snapshot {
486-
self.metadata.last_updated_ms = snapshot.timestamp_ms();
493+
self.last_updated_ms = Some(snapshot.timestamp_ms());
487494
}
488495

489496
// Current snapshot id is set only for the main branch
490497
if ref_name == MAIN_BRANCH {
491498
self.metadata.current_snapshot_id = Some(snapshot.snapshot_id());
492-
if self.metadata.last_updated_ms == i64::default() {
493-
self.metadata.last_updated_ms = chrono::Utc::now().timestamp_millis();
499+
let timestamp_ms = if let Some(last_updated_ms) = self.last_updated_ms {
500+
last_updated_ms
501+
} else {
502+
let last_updated_ms = chrono::Utc::now().timestamp_millis();
503+
self.last_updated_ms = Some(last_updated_ms);
504+
last_updated_ms
494505
};
495506

496507
self.metadata.snapshot_log.push(SnapshotLog {
497508
snapshot_id: snapshot.snapshot_id(),
498-
timestamp_ms: self.metadata.last_updated_ms,
509+
timestamp_ms,
499510
});
500511
}
501512

@@ -911,9 +922,9 @@ impl TableMetadataBuilder {
911922

912923
/// Build the table metadata.
913924
pub fn build(mut self) -> Result<TableMetadataBuildResult> {
914-
if self.metadata.last_updated_ms == i64::default() {
915-
self.metadata.last_updated_ms = chrono::Utc::now().timestamp_millis();
916-
}
925+
self.metadata.last_updated_ms = self
926+
.last_updated_ms
927+
.unwrap_or_else(|| chrono::Utc::now().timestamp_millis());
917928

918929
// Check compatibility of the current schema to the default partition spec and sort order.
919930
// We use the `get_xxx` methods from the builder to avoid using the panicking
@@ -1210,6 +1221,8 @@ impl From<TableMetadataBuildResult> for TableMetadata {
12101221

12111222
#[cfg(test)]
12121223
mod tests {
1224+
use std::thread::sleep;
1225+
12131226
use super::*;
12141227
use crate::spec::{
12151228
BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PrimitiveType, Schema,
@@ -2341,4 +2354,32 @@ mod tests {
23412354
assert_eq!(build_result.metadata.partition_statistics.len(), 0);
23422355
assert_eq!(build_result.changes.len(), 0);
23432356
}
2357+
2358+
#[test]
2359+
fn last_update_increased_for_property_only_update() {
2360+
let builder = builder_without_changes(FormatVersion::V2);
2361+
2362+
let metadata = builder.build().unwrap().metadata;
2363+
let last_updated_ms = metadata.last_updated_ms;
2364+
sleep(std::time::Duration::from_millis(2));
2365+
2366+
let build_result = metadata
2367+
.into_builder(Some(
2368+
"s3://bucket/test/location/metadata/metadata1.json".to_string(),
2369+
))
2370+
.set_properties(HashMap::from_iter(vec![(
2371+
"foo".to_string(),
2372+
"bar".to_string(),
2373+
)]))
2374+
.unwrap()
2375+
.build()
2376+
.unwrap();
2377+
2378+
assert!(
2379+
build_result.metadata.last_updated_ms > last_updated_ms,
2380+
"{} > {}",
2381+
build_result.metadata.last_updated_ms,
2382+
last_updated_ms
2383+
);
2384+
}
23442385
}

0 commit comments

Comments
 (0)