Skip to content

Commit 1f0add9

Browse files
committed
feat(arrow): support ArrowReaderOptions in get_metadata
Respect ArrowReaderOptions by extracting metadata_options and passing it to ParquetMetaDataReader via with_metadata_options(). This allows callers to configure metadata decoding behavior. This change requires parquet 57.1.0 which added the ParquetMetaDataReader::with_metadata_options() API. Changes: - Update arrow and parquet dependencies from 57.0 to 57.1 - Update bindings/python/Cargo.lock for the new versions - Use metadata_options from ArrowReaderOptions in get_metadata Closes #1934
1 parent 9844638 commit 1f0add9

File tree

3 files changed

+40
-37
lines changed

3 files changed

+40
-37
lines changed

Cargo.toml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ rust-version = "1.88"
4242
anyhow = "1.0.72"
4343
apache-avro = { version = "0.21", features = ["zstandard"] }
4444
array-init = "2"
45-
arrow-arith = "57.0"
46-
arrow-array = "57.0"
47-
arrow-buffer = "57.0"
48-
arrow-cast = "57.0"
49-
arrow-ord = "57.0"
50-
arrow-schema = "57.0"
51-
arrow-select = "57.0"
52-
arrow-string = "57.0"
45+
arrow-arith = "57.1"
46+
arrow-array = "57.1"
47+
arrow-buffer = "57.1"
48+
arrow-cast = "57.1"
49+
arrow-ord = "57.1"
50+
arrow-schema = "57.1"
51+
arrow-select = "57.1"
52+
arrow-string = "57.1"
5353
as-any = "0.3.2"
5454
async-trait = "0.1.89"
5555
aws-config = "1.8.7"
@@ -101,7 +101,7 @@ num-bigint = "0.4.6"
101101
once_cell = "1.20"
102102
opendal = "0.55.0"
103103
ordered-float = "4"
104-
parquet = "57.0"
104+
parquet = "57.1"
105105
pilota = "0.11.10"
106106
port_scanner = "0.1.5"
107107
pretty_assertions = "1.4"

bindings/python/Cargo.lock

Lines changed: 25 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/iceberg/src/arrow/reader.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,19 +1705,21 @@ impl<R: FileRead> AsyncFileReader for ArrowFileReader<R> {
17051705
)
17061706
}
17071707

1708-
// TODO: currently we don't respect `ArrowReaderOptions` cause it don't expose any method to access the option field
1709-
// we will fix it after `v55.1.0` is released in https://github.com/apache/arrow-rs/issues/7393
17101708
fn get_metadata(
17111709
&mut self,
1712-
_options: Option<&'_ ArrowReaderOptions>,
1710+
options: Option<&'_ ArrowReaderOptions>,
17131711
) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
1712+
// Extract metadata options from ArrowReaderOptions if provided
1713+
let metadata_options = options.map(|o| o.metadata_options().clone());
1714+
17141715
async move {
17151716
let reader = ParquetMetaDataReader::new()
17161717
.with_prefetch_hint(self.metadata_size_hint)
17171718
// Set the page policy first because it updates both column and offset policies.
17181719
.with_page_index_policy(PageIndexPolicy::from(self.preload_page_index))
17191720
.with_column_index_policy(PageIndexPolicy::from(self.preload_column_index))
1720-
.with_offset_index_policy(PageIndexPolicy::from(self.preload_offset_index));
1721+
.with_offset_index_policy(PageIndexPolicy::from(self.preload_offset_index))
1722+
.with_metadata_options(metadata_options);
17211723
let size = self.meta.size;
17221724
let meta = reader.load_and_finish(self, size).await?;
17231725

0 commit comments

Comments
 (0)