Skip to content

Commit 7c83641

Browse files
authored
Set file_size attribute directly from file metadata (#576)
Datafusion PartitionedFile struct relies on correct file url and file size. This file size is used for calculating offset for reading the parquet footer. Wrong file size can lead to query issues. This PR aims to fix that issue. Fixes #575
1 parent f289529 commit 7c83641

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

server/src/catalog/manifest.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ pub fn create_from_parquet_file(
9797
};
9898

9999
let file = std::fs::File::open(fs_file_path)?;
100+
manifest_file.file_size = file.metadata()?.len();
101+
100102
let file = parquet::file::serialized_reader::SerializedFileReader::new(file)?;
101103
let file_meta = file.metadata().file_metadata();
102104
let row_groups = file.metadata().row_groups();
@@ -105,9 +107,6 @@ pub fn create_from_parquet_file(
105107
manifest_file.ingestion_size = row_groups
106108
.iter()
107109
.fold(0, |acc, x| acc + x.total_byte_size() as u64);
108-
manifest_file.file_size = row_groups
109-
.iter()
110-
.fold(0, |acc, x| acc + x.compressed_size() as u64);
111110

112111
let columns = column_statistics(row_groups);
113112
manifest_file.columns = columns.into_values().collect();

0 commit comments

Comments
 (0)