Skip to content

Commit 9277a17

Browse files
authored
refactor: Add read_from() and write_to() to TableMetadata (#1523)
## Which issue does this PR close? - Closes #1388 ## What changes are included in this PR? - Add `TableMetadataIO` to read/write metadata from/to a location ## Are these changes tested? Added unit test
1 parent 9787140 commit 9277a17

File tree

9 files changed

+83
-39
lines changed

9 files changed

+83
-39
lines changed

Cargo.lock

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/catalog/glue/src/catalog.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,7 @@ impl Catalog for GlueCatalog {
395395
.metadata;
396396
let metadata_location = create_metadata_location(&location, 0)?;
397397

398-
self.file_io
399-
.new_output(&metadata_location)?
400-
.write(serde_json::to_vec(&metadata)?.into())
401-
.await?;
398+
metadata.write_to(&self.file_io, &metadata_location).await?;
402399

403400
let glue_table = convert_to_glue_table(
404401
&table_name,
@@ -463,9 +460,7 @@ impl Catalog for GlueCatalog {
463460
Some(table) => {
464461
let metadata_location = get_metadata_location(&table.parameters)?;
465462

466-
let input_file = self.file_io.new_input(&metadata_location)?;
467-
let metadata_content = input_file.read().await?;
468-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
463+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
469464

470465
Table::builder()
471466
.file_io(self.file_io())

crates/catalog/hms/src/catalog.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,7 @@ impl Catalog for HmsCatalog {
353353

354354
let metadata_location = create_metadata_location(&location, 0)?;
355355

356-
self.file_io
357-
.new_output(&metadata_location)?
358-
.write(serde_json::to_vec(&metadata)?.into())
359-
.await?;
356+
metadata.write_to(&self.file_io, &metadata_location).await?;
360357

361358
let hive_table = convert_to_hive_table(
362359
db_name.clone(),
@@ -406,8 +403,7 @@ impl Catalog for HmsCatalog {
406403

407404
let metadata_location = get_metadata_location(&hive_table.parameters)?;
408405

409-
let metadata_content = self.file_io.new_input(&metadata_location)?.read().await?;
410-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
406+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
411407

412408
Table::builder()
413409
.file_io(self.file_io())

crates/catalog/s3tables/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ async-trait = { workspace = true }
3434
aws-config = { workspace = true }
3535
aws-sdk-s3tables = "1.10.0"
3636
iceberg = { workspace = true }
37-
serde_json = { workspace = true }
3837
typed-builder = { workspace = true }
3938
uuid = { workspace = true, features = ["v4"] }
4039

crates/catalog/s3tables/src/catalog.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ impl Catalog for S3TablesCatalog {
334334
let metadata = TableMetadataBuilder::from_table_creation(creation)?
335335
.build()?
336336
.metadata;
337-
self.file_io
338-
.new_output(&metadata_location)?
339-
.write(serde_json::to_vec(&metadata)?.into())
340-
.await?;
337+
metadata.write_to(&self.file_io, &metadata_location).await?;
341338

342339
// update metadata location
343340
self.s3tables_client
@@ -389,9 +386,7 @@ impl Catalog for S3TablesCatalog {
389386
),
390387
)
391388
})?;
392-
let input_file = self.file_io.new_input(metadata_location)?;
393-
let metadata_content = input_file.read().await?;
394-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
389+
let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?;
395390

396391
let table = Table::builder()
397392
.identifier(table_ident.clone())

crates/catalog/sql/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ repository = { workspace = true }
3131
[dependencies]
3232
async-trait = { workspace = true }
3333
iceberg = { workspace = true }
34-
serde_json = { workspace = true }
3534
sqlx = { version = "0.8.1", features = ["any"], default-features = false }
3635
typed-builder = { workspace = true }
3736
uuid = { workspace = true, features = ["v4"] }

crates/catalog/sql/src/catalog.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -642,9 +642,7 @@ impl Catalog for SqlCatalog {
642642
.try_get::<String, _>(CATALOG_FIELD_METADATA_LOCATION_PROP)
643643
.map_err(from_sqlx_error)?;
644644

645-
let file = self.fileio.new_input(&tbl_metadata_location)?;
646-
let metadata_content = file.read().await?;
647-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
645+
let metadata = TableMetadata::read_from(&self.fileio, &tbl_metadata_location).await?;
648646

649647
Ok(Table::builder()
650648
.file_io(self.fileio.clone())
@@ -708,8 +706,8 @@ impl Catalog for SqlCatalog {
708706
Uuid::new_v4()
709707
);
710708

711-
let file = self.fileio.new_output(&tbl_metadata_location)?;
712-
file.write(serde_json::to_vec(&tbl_metadata)?.into())
709+
tbl_metadata
710+
.write_to(&self.fileio, &tbl_metadata_location)
713711
.await?;
714712

715713
self.execute(&format!(

crates/iceberg/src/catalog/memory/catalog.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,7 @@ impl Catalog for MemoryCatalog {
210210
Uuid::new_v4()
211211
);
212212

213-
self.file_io
214-
.new_output(&metadata_location)?
215-
.write(serde_json::to_vec(&metadata)?.into())
216-
.await?;
213+
metadata.write_to(&self.file_io, &metadata_location).await?;
217214

218215
root_namespace_state.insert_new_table(&table_ident, metadata_location.clone())?;
219216

@@ -230,9 +227,7 @@ impl Catalog for MemoryCatalog {
230227
let root_namespace_state = self.root_namespace_state.lock().await;
231228

232229
let metadata_location = root_namespace_state.get_existing_table_location(table_ident)?;
233-
let input_file = self.file_io.new_input(metadata_location)?;
234-
let metadata_content = input_file.read().await?;
235-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
230+
let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?;
236231

237232
Table::builder()
238233
.file_io(self.file_io.clone())
@@ -284,9 +279,7 @@ impl Catalog for MemoryCatalog {
284279
let mut root_namespace_state = self.root_namespace_state.lock().await;
285280
root_namespace_state.insert_new_table(&table_ident.clone(), metadata_location.clone())?;
286281

287-
let input_file = self.file_io.new_input(metadata_location.clone())?;
288-
let metadata_content = input_file.read().await?;
289-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
282+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
290283

291284
Table::builder()
292285
.file_io(self.file_io.clone())

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use super::{
3737
SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType,
3838
};
3939
use crate::error::{Result, timestamp_ms_to_utc};
40+
use crate::io::FileIO;
4041
use crate::{Error, ErrorKind};
4142

4243
static MAIN_BRANCH: &str = "main";
@@ -464,6 +465,29 @@ impl TableMetadata {
464465
self.encryption_keys.get(key_id)
465466
}
466467

468+
/// Read table metadata from the given location.
469+
pub async fn read_from(
470+
file_io: &FileIO,
471+
metadata_location: impl AsRef<str>,
472+
) -> Result<TableMetadata> {
473+
let input_file = file_io.new_input(metadata_location)?;
474+
let metadata_content = input_file.read().await?;
475+
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
476+
Ok(metadata)
477+
}
478+
479+
/// Write table metadata to the given location.
480+
pub async fn write_to(
481+
&self,
482+
file_io: &FileIO,
483+
metadata_location: impl AsRef<str>,
484+
) -> Result<()> {
485+
file_io
486+
.new_output(metadata_location)?
487+
.write(serde_json::to_vec(self)?.into())
488+
.await
489+
}
490+
467491
/// Normalize this partition spec.
468492
///
469493
/// This is an internal method
@@ -1355,10 +1379,12 @@ mod tests {
13551379

13561380
use anyhow::Result;
13571381
use pretty_assertions::assert_eq;
1382+
use tempfile::TempDir;
13581383
use uuid::Uuid;
13591384

13601385
use super::{FormatVersion, MetadataLog, SnapshotLog, TableMetadataBuilder};
13611386
use crate::TableCreation;
1387+
use crate::io::FileIOBuilder;
13621388
use crate::spec::table_metadata::TableMetadata;
13631389
use crate::spec::{
13641390
BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PartitionStatisticsFile,
@@ -3050,4 +3076,49 @@ mod tests {
30503076
)])
30513077
);
30523078
}
3079+
3080+
#[tokio::test]
3081+
async fn test_table_metadata_io_read_write() {
3082+
// Create a temporary directory for our test
3083+
let temp_dir = TempDir::new().unwrap();
3084+
let temp_path = temp_dir.path().to_str().unwrap();
3085+
3086+
// Create a FileIO instance
3087+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
3088+
3089+
// Use an existing test metadata from the test files
3090+
let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json");
3091+
3092+
// Define the metadata location
3093+
let metadata_location = format!("{}/metadata.json", temp_path);
3094+
3095+
// Write the metadata
3096+
original_metadata
3097+
.write_to(&file_io, &metadata_location)
3098+
.await
3099+
.unwrap();
3100+
3101+
// Verify the file exists
3102+
assert!(fs::metadata(&metadata_location).is_ok());
3103+
3104+
// Read the metadata back
3105+
let read_metadata = TableMetadata::read_from(&file_io, &metadata_location)
3106+
.await
3107+
.unwrap();
3108+
3109+
// Verify the metadata matches
3110+
assert_eq!(read_metadata, original_metadata);
3111+
}
3112+
3113+
#[tokio::test]
3114+
async fn test_table_metadata_io_read_nonexistent_file() {
3115+
// Create a FileIO instance
3116+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
3117+
3118+
// Try to read a non-existent file
3119+
let result = TableMetadata::read_from(&file_io, "/nonexistent/path/metadata.json").await;
3120+
3121+
// Verify it returns an error
3122+
assert!(result.is_err());
3123+
}
30533124
}

0 commit comments

Comments
 (0)