Skip to content

Commit 078f458

Browse files
authored
Merge branch 'main' into ctty/df-insert
2 parents 5001e07 + b3ea8d1 commit 078f458

File tree

12 files changed

+140
-45
lines changed

12 files changed

+140
-45
lines changed

Cargo.lock

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindings/python/README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,18 @@
1919

2020
# Pyiceberg Core
2121

22-
This project is used to build an iceberg-rust powered core for pyiceberg.
22+
This project is used to build an Iceberg-rust powered core for [PyIceberg](https://py.iceberg.apache.org/).
2323

2424
## Setup
2525

26+
Install Hatch:
27+
2628
```shell
2729
pip install hatch==1.12.0
2830
```
2931

32+
Hatch uses [uv](https://docs.astral.sh/uv/) as a backend by default, so [make sure that it is installed](https://docs.astral.sh/uv/getting-started/installation/) as well.
33+
3034
## Build
3135

3236
```shell
@@ -37,4 +41,4 @@ hatch run dev:develop
3741

3842
```shell
3943
hatch run dev:test
40-
```
44+
```

crates/catalog/glue/src/catalog.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,7 @@ impl Catalog for GlueCatalog {
395395
.metadata;
396396
let metadata_location = create_metadata_location(&location, 0)?;
397397

398-
self.file_io
399-
.new_output(&metadata_location)?
400-
.write(serde_json::to_vec(&metadata)?.into())
401-
.await?;
398+
metadata.write_to(&self.file_io, &metadata_location).await?;
402399

403400
let glue_table = convert_to_glue_table(
404401
&table_name,
@@ -463,9 +460,7 @@ impl Catalog for GlueCatalog {
463460
Some(table) => {
464461
let metadata_location = get_metadata_location(&table.parameters)?;
465462

466-
let input_file = self.file_io.new_input(&metadata_location)?;
467-
let metadata_content = input_file.read().await?;
468-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
463+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
469464

470465
Table::builder()
471466
.file_io(self.file_io())

crates/catalog/hms/src/catalog.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,7 @@ impl Catalog for HmsCatalog {
353353

354354
let metadata_location = create_metadata_location(&location, 0)?;
355355

356-
self.file_io
357-
.new_output(&metadata_location)?
358-
.write(serde_json::to_vec(&metadata)?.into())
359-
.await?;
356+
metadata.write_to(&self.file_io, &metadata_location).await?;
360357

361358
let hive_table = convert_to_hive_table(
362359
db_name.clone(),
@@ -406,8 +403,7 @@ impl Catalog for HmsCatalog {
406403

407404
let metadata_location = get_metadata_location(&hive_table.parameters)?;
408405

409-
let metadata_content = self.file_io.new_input(&metadata_location)?.read().await?;
410-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
406+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
411407

412408
Table::builder()
413409
.file_io(self.file_io())

crates/catalog/s3tables/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ async-trait = { workspace = true }
3434
aws-config = { workspace = true }
3535
aws-sdk-s3tables = "1.10.0"
3636
iceberg = { workspace = true }
37-
serde_json = { workspace = true }
3837
typed-builder = { workspace = true }
3938
uuid = { workspace = true, features = ["v4"] }
4039

crates/catalog/s3tables/src/catalog.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ impl Catalog for S3TablesCatalog {
334334
let metadata = TableMetadataBuilder::from_table_creation(creation)?
335335
.build()?
336336
.metadata;
337-
self.file_io
338-
.new_output(&metadata_location)?
339-
.write(serde_json::to_vec(&metadata)?.into())
340-
.await?;
337+
metadata.write_to(&self.file_io, &metadata_location).await?;
341338

342339
// update metadata location
343340
self.s3tables_client
@@ -389,9 +386,7 @@ impl Catalog for S3TablesCatalog {
389386
),
390387
)
391388
})?;
392-
let input_file = self.file_io.new_input(metadata_location)?;
393-
let metadata_content = input_file.read().await?;
394-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
389+
let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?;
395390

396391
let table = Table::builder()
397392
.identifier(table_ident.clone())

crates/catalog/sql/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ repository = { workspace = true }
3131
[dependencies]
3232
async-trait = { workspace = true }
3333
iceberg = { workspace = true }
34-
serde_json = { workspace = true }
3534
sqlx = { version = "0.8.1", features = ["any"], default-features = false }
3635
typed-builder = { workspace = true }
3736
uuid = { workspace = true, features = ["v4"] }

crates/catalog/sql/src/catalog.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -642,9 +642,7 @@ impl Catalog for SqlCatalog {
642642
.try_get::<String, _>(CATALOG_FIELD_METADATA_LOCATION_PROP)
643643
.map_err(from_sqlx_error)?;
644644

645-
let file = self.fileio.new_input(&tbl_metadata_location)?;
646-
let metadata_content = file.read().await?;
647-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
645+
let metadata = TableMetadata::read_from(&self.fileio, &tbl_metadata_location).await?;
648646

649647
Ok(Table::builder()
650648
.file_io(self.fileio.clone())
@@ -708,8 +706,8 @@ impl Catalog for SqlCatalog {
708706
Uuid::new_v4()
709707
);
710708

711-
let file = self.fileio.new_output(&tbl_metadata_location)?;
712-
file.write(serde_json::to_vec(&tbl_metadata)?.into())
709+
tbl_metadata
710+
.write_to(&self.fileio, &tbl_metadata_location)
713711
.await?;
714712

715713
self.execute(&format!(

crates/iceberg/src/catalog/memory/catalog.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,7 @@ impl Catalog for MemoryCatalog {
210210
Uuid::new_v4()
211211
);
212212

213-
self.file_io
214-
.new_output(&metadata_location)?
215-
.write(serde_json::to_vec(&metadata)?.into())
216-
.await?;
213+
metadata.write_to(&self.file_io, &metadata_location).await?;
217214

218215
root_namespace_state.insert_new_table(&table_ident, metadata_location.clone())?;
219216

@@ -230,9 +227,7 @@ impl Catalog for MemoryCatalog {
230227
let root_namespace_state = self.root_namespace_state.lock().await;
231228

232229
let metadata_location = root_namespace_state.get_existing_table_location(table_ident)?;
233-
let input_file = self.file_io.new_input(metadata_location)?;
234-
let metadata_content = input_file.read().await?;
235-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
230+
let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?;
236231

237232
Table::builder()
238233
.file_io(self.file_io.clone())
@@ -284,9 +279,7 @@ impl Catalog for MemoryCatalog {
284279
let mut root_namespace_state = self.root_namespace_state.lock().await;
285280
root_namespace_state.insert_new_table(&table_ident.clone(), metadata_location.clone())?;
286281

287-
let input_file = self.file_io.new_input(metadata_location.clone())?;
288-
let metadata_content = input_file.read().await?;
289-
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
282+
let metadata = TableMetadata::read_from(&self.file_io, &metadata_location).await?;
290283

291284
Table::builder()
292285
.file_io(self.file_io.clone())

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use super::{
3737
SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType,
3838
};
3939
use crate::error::{Result, timestamp_ms_to_utc};
40+
use crate::io::FileIO;
4041
use crate::{Error, ErrorKind};
4142

4243
static MAIN_BRANCH: &str = "main";
@@ -471,6 +472,29 @@ impl TableMetadata {
471472
self.encryption_keys.get(key_id)
472473
}
473474

475+
/// Read table metadata from the given location.
476+
pub async fn read_from(
477+
file_io: &FileIO,
478+
metadata_location: impl AsRef<str>,
479+
) -> Result<TableMetadata> {
480+
let input_file = file_io.new_input(metadata_location)?;
481+
let metadata_content = input_file.read().await?;
482+
let metadata = serde_json::from_slice::<TableMetadata>(&metadata_content)?;
483+
Ok(metadata)
484+
}
485+
486+
/// Write table metadata to the given location.
487+
pub async fn write_to(
488+
&self,
489+
file_io: &FileIO,
490+
metadata_location: impl AsRef<str>,
491+
) -> Result<()> {
492+
file_io
493+
.new_output(metadata_location)?
494+
.write(serde_json::to_vec(self)?.into())
495+
.await
496+
}
497+
474498
/// Normalize this partition spec.
475499
///
476500
/// This is an internal method
@@ -1362,10 +1386,12 @@ mod tests {
13621386

13631387
use anyhow::Result;
13641388
use pretty_assertions::assert_eq;
1389+
use tempfile::TempDir;
13651390
use uuid::Uuid;
13661391

13671392
use super::{FormatVersion, MetadataLog, SnapshotLog, TableMetadataBuilder};
13681393
use crate::TableCreation;
1394+
use crate::io::FileIOBuilder;
13691395
use crate::spec::table_metadata::TableMetadata;
13701396
use crate::spec::{
13711397
BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PartitionStatisticsFile,
@@ -3057,4 +3083,49 @@ mod tests {
30573083
)])
30583084
);
30593085
}
3086+
3087+
#[tokio::test]
3088+
async fn test_table_metadata_io_read_write() {
3089+
// Create a temporary directory for our test
3090+
let temp_dir = TempDir::new().unwrap();
3091+
let temp_path = temp_dir.path().to_str().unwrap();
3092+
3093+
// Create a FileIO instance
3094+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
3095+
3096+
// Use an existing test metadata from the test files
3097+
let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json");
3098+
3099+
// Define the metadata location
3100+
let metadata_location = format!("{}/metadata.json", temp_path);
3101+
3102+
// Write the metadata
3103+
original_metadata
3104+
.write_to(&file_io, &metadata_location)
3105+
.await
3106+
.unwrap();
3107+
3108+
// Verify the file exists
3109+
assert!(fs::metadata(&metadata_location).is_ok());
3110+
3111+
// Read the metadata back
3112+
let read_metadata = TableMetadata::read_from(&file_io, &metadata_location)
3113+
.await
3114+
.unwrap();
3115+
3116+
// Verify the metadata matches
3117+
assert_eq!(read_metadata, original_metadata);
3118+
}
3119+
3120+
#[tokio::test]
3121+
async fn test_table_metadata_io_read_nonexistent_file() {
3122+
// Create a FileIO instance
3123+
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
3124+
3125+
// Try to read a non-existent file
3126+
let result = TableMetadata::read_from(&file_io, "/nonexistent/path/metadata.json").await;
3127+
3128+
// Verify it returns an error
3129+
assert!(result.is_err());
3130+
}
30603131
}

0 commit comments

Comments
 (0)