Skip to content

Commit 9a66768

Browse files
authored
feat: Track size for ParquetMetadata (#5)
1 parent 6505f1b commit 9a66768

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

parquet/src/file/footer.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,11 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
117117
schema_descr,
118118
column_orders,
119119
);
120-
Ok(ParquetMetaData::new(file_metadata, row_groups))
120+
Ok(ParquetMetaData::new_with_size(
121+
file_metadata,
122+
row_groups,
123+
footer_metadata_len as u32,
124+
))
121125
}
122126

123127
/// Parses column orders from Thrift definition.

parquet/src/file/metadata.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,26 @@ use crate::schema::types::{
5050
pub struct ParquetMetaData {
5151
file_metadata: FileMetaData,
5252
row_groups: Vec<RowGroupMetaData>,
53+
/// Size of serialized metadata. See footer::parse_metadata
54+
metadata_size: u32,
5355
}
5456

5557
impl ParquetMetaData {
5658
/// Creates Parquet metadata from file metadata and a list of row group metadata `Arc`s
5759
/// for each available row group.
5860
pub fn new(file_metadata: FileMetaData, row_groups: Vec<RowGroupMetaData>) -> Self {
61+
ParquetMetaData::new_with_size(file_metadata, row_groups, 0)
62+
}
63+
64+
pub fn new_with_size(
65+
file_metadata: FileMetaData,
66+
row_groups: Vec<RowGroupMetaData>,
67+
metadata_size: u32,
68+
) -> Self {
5969
ParquetMetaData {
6070
file_metadata,
6171
row_groups,
72+
metadata_size,
6273
}
6374
}
6475

@@ -72,6 +83,10 @@ impl ParquetMetaData {
7283
self.row_groups.len()
7384
}
7485

86+
pub fn metadata_size(&self) -> u32 {
87+
self.metadata_size
88+
}
89+
7590
/// Returns row group metadata for `i`th position.
7691
/// Position should be less than number of row groups `num_row_groups`.
7792
pub fn row_group(&self, i: usize) -> &RowGroupMetaData {

parquet/src/file/serialized_reader.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,10 @@ impl<R: 'static + ChunkReader> SerializedFileReader<R> {
158158
filtered_row_groups.push(row_group_metadata.clone());
159159
}
160160
}
161-
self.metadata = ParquetMetaData::new(
161+
self.metadata = ParquetMetaData::new_with_size(
162162
self.metadata.file_metadata().clone(),
163163
filtered_row_groups,
164+
self.metadata.metadata_size(),
164165
);
165166
}
166167
}

0 commit comments

Comments
 (0)